blob: 6d6df5b8128ec443c4e9179d39fa907c1f1283c8 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Daniel Veillard3c5ed912002-01-08 10:36:16 +000035#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000044#include <libxml/threads.h>
45#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/tree.h>
47#include <libxml/parser.h>
48#include <libxml/parserInternals.h>
49#include <libxml/valid.h>
50#include <libxml/entities.h>
51#include <libxml/xmlerror.h>
52#include <libxml/encoding.h>
53#include <libxml/xmlIO.h>
54#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000055#ifdef LIBXML_CATALOG_ENABLED
56#include <libxml/catalog.h>
57#endif
Owen Taylor3473f882001-02-23 17:55:21 +000058
59#ifdef HAVE_CTYPE_H
60#include <ctype.h>
61#endif
62#ifdef HAVE_STDLIB_H
63#include <stdlib.h>
64#endif
65#ifdef HAVE_SYS_STAT_H
66#include <sys/stat.h>
67#endif
68#ifdef HAVE_FCNTL_H
69#include <fcntl.h>
70#endif
71#ifdef HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74#ifdef HAVE_ZLIB_H
75#include <zlib.h>
76#endif
77
78
Daniel Veillard21a0f912001-02-25 19:54:14 +000079#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000080#define XML_PARSER_BUFFER_SIZE 100
81
82/*
Owen Taylor3473f882001-02-23 17:55:21 +000083 * List of XML prefixed PI allowed by W3C specs
84 */
85
Daniel Veillardb44025c2001-10-11 22:55:55 +000086static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000087 "xml-stylesheet",
88 NULL
89};
90
91/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000092xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
93 const xmlChar **str);
94
Daniel Veillard257d9102001-05-08 10:41:44 +000095static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000096xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
97 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000098 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000100
Daniel Veillard8107a222002-01-13 14:10:10 +0000101static void
102xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
103 xmlNodePtr lastNode);
104
Owen Taylor3473f882001-02-23 17:55:21 +0000105/************************************************************************
106 * *
107 * Parser stacks related functions and macros *
108 * *
109 ************************************************************************/
110
111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
112 const xmlChar ** str);
113
114/*
115 * Generic function for accessing stacks in the Parser Context
116 */
117
118#define PUSH_AND_POP(scope, type, name) \
119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
120 if (ctxt->name##Nr >= ctxt->name##Max) { \
121 ctxt->name##Max *= 2; \
122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
124 if (ctxt->name##Tab == NULL) { \
125 xmlGenericError(xmlGenericErrorContext, \
126 "realloc failed !\n"); \
127 return(0); \
128 } \
129 } \
130 ctxt->name##Tab[ctxt->name##Nr] = value; \
131 ctxt->name = value; \
132 return(ctxt->name##Nr++); \
133} \
134scope type name##Pop(xmlParserCtxtPtr ctxt) { \
135 type ret; \
136 if (ctxt->name##Nr <= 0) return(0); \
137 ctxt->name##Nr--; \
138 if (ctxt->name##Nr > 0) \
139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
140 else \
141 ctxt->name = NULL; \
142 ret = ctxt->name##Tab[ctxt->name##Nr]; \
143 ctxt->name##Tab[ctxt->name##Nr] = 0; \
144 return(ret); \
145} \
146
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
148 * inputPop:
149 * @ctxt: an XML parser context
150 *
151 * Pops the top parser input from the input stack
152 *
153 * Returns the input just removed
154 */
155/**
156 * inputPush:
157 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000158 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000159 *
160 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 *
162 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000163 */
164/**
165 * namePop:
166 * @ctxt: an XML parser context
167 *
168 * Pops the top element name from the name stack
169 *
170 * Returns the name just removed
171 */
172/**
173 * namePush:
174 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000175 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000176 *
177 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 *
179 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000180 */
181/**
182 * nodePop:
183 * @ctxt: an XML parser context
184 *
185 * Pops the top element node from the node stack
186 *
187 * Returns the node just removed
188 */
189/**
190 * nodePush:
191 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000192 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000193 *
194 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 *
196 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000197 */
Owen Taylor3473f882001-02-23 17:55:21 +0000198/*
199 * Those macros actually generate the functions
200 */
201PUSH_AND_POP(extern, xmlParserInputPtr, input)
202PUSH_AND_POP(extern, xmlNodePtr, node)
203PUSH_AND_POP(extern, xmlChar*, name)
204
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000205static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000206 if (ctxt->spaceNr >= ctxt->spaceMax) {
207 ctxt->spaceMax *= 2;
208 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
209 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
210 if (ctxt->spaceTab == NULL) {
211 xmlGenericError(xmlGenericErrorContext,
212 "realloc failed !\n");
213 return(0);
214 }
215 }
216 ctxt->spaceTab[ctxt->spaceNr] = val;
217 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
218 return(ctxt->spaceNr++);
219}
220
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000221static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000222 int ret;
223 if (ctxt->spaceNr <= 0) return(0);
224 ctxt->spaceNr--;
225 if (ctxt->spaceNr > 0)
226 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
227 else
228 ctxt->space = NULL;
229 ret = ctxt->spaceTab[ctxt->spaceNr];
230 ctxt->spaceTab[ctxt->spaceNr] = -1;
231 return(ret);
232}
233
234/*
235 * Macros for accessing the content. Those should be used only by the parser,
236 * and not exported.
237 *
238 * Dirty macros, i.e. one often need to make assumption on the context to
239 * use them
240 *
241 * CUR_PTR return the current pointer to the xmlChar to be parsed.
242 * To be used with extreme caution since operations consuming
243 * characters may move the input buffer to a different location !
244 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
245 * This should be used internally by the parser
246 * only to compare to ASCII values otherwise it would break when
247 * running with UTF-8 encoding.
248 * RAW same as CUR but in the input buffer, bypass any token
249 * extraction that may have been done
250 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
251 * to compare on ASCII based substring.
252 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
253 * strings within the parser.
254 *
255 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
256 *
257 * NEXT Skip to the next character, this does the proper decoding
258 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000259 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000260 * CUR_CHAR(l) returns the current unicode character (int), set l
261 * to the number of xmlChars used for the encoding [0-5].
262 * CUR_SCHAR same but operate on a string instead of the context
263 * COPY_BUF copy the current unicode char to the target buffer, increment
264 * the index
265 * GROW, SHRINK handling of input buffers
266 */
267
268#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
269#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
270#define NXT(val) ctxt->input->cur[(val)]
271#define CUR_PTR ctxt->input->cur
272
273#define SKIP(val) do { \
274 ctxt->nbChars += (val),ctxt->input->cur += (val); \
275 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000276 if ((*ctxt->input->cur == 0) && \
277 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
278 xmlPopInput(ctxt); \
279 } while (0)
280
Daniel Veillard48b2f892001-02-25 16:11:03 +0000281#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000282 xmlParserInputShrink(ctxt->input); \
283 if ((*ctxt->input->cur == 0) && \
284 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
285 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000286 }
Owen Taylor3473f882001-02-23 17:55:21 +0000287
Daniel Veillard48b2f892001-02-25 16:11:03 +0000288#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000289 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
290 if ((*ctxt->input->cur == 0) && \
291 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
292 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000293 }
Owen Taylor3473f882001-02-23 17:55:21 +0000294
295#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
296
297#define NEXT xmlNextChar(ctxt)
298
Daniel Veillard21a0f912001-02-25 19:54:14 +0000299#define NEXT1 { \
300 ctxt->input->cur++; \
301 ctxt->nbChars++; \
302 if (*ctxt->input->cur == 0) \
303 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
304 }
305
Owen Taylor3473f882001-02-23 17:55:21 +0000306#define NEXTL(l) do { \
307 if (*(ctxt->input->cur) == '\n') { \
308 ctxt->input->line++; ctxt->input->col = 1; \
309 } else ctxt->input->col++; \
310 ctxt->token = 0; ctxt->input->cur += l; \
311 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000312 } while (0)
313
314#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
315#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
316
317#define COPY_BUF(l,b,i,v) \
318 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000319 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000320
321/**
322 * xmlSkipBlankChars:
323 * @ctxt: the XML parser context
324 *
325 * skip all blanks character found at that point in the input streams.
326 * It pops up finished entities in the process if allowable at that point.
327 *
328 * Returns the number of space chars skipped
329 */
330
331int
332xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000333 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000334
Daniel Veillard02141ea2001-04-30 11:46:40 +0000335 if (ctxt->token != 0) {
336 if (!IS_BLANK(ctxt->token))
337 return(0);
338 ctxt->token = 0;
339 res++;
340 }
Owen Taylor3473f882001-02-23 17:55:21 +0000341 /*
342 * It's Okay to use CUR/NEXT here since all the blanks are on
343 * the ASCII range.
344 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000345 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
346 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000347 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000348 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000349 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000350 cur = ctxt->input->cur;
351 while (IS_BLANK(*cur)) {
352 if (*cur == '\n') {
353 ctxt->input->line++; ctxt->input->col = 1;
354 }
355 cur++;
356 res++;
357 if (*cur == 0) {
358 ctxt->input->cur = cur;
359 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
360 cur = ctxt->input->cur;
361 }
362 }
363 ctxt->input->cur = cur;
364 } else {
365 int cur;
366 do {
367 cur = CUR;
368 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
369 NEXT;
370 cur = CUR;
371 res++;
372 }
373 while ((cur == 0) && (ctxt->inputNr > 1) &&
374 (ctxt->instate != XML_PARSER_COMMENT)) {
375 xmlPopInput(ctxt);
376 cur = CUR;
377 }
378 /*
379 * Need to handle support of entities branching here
380 */
381 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
382 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
383 }
Owen Taylor3473f882001-02-23 17:55:21 +0000384 return(res);
385}
386
387/************************************************************************
388 * *
389 * Commodity functions to handle entities *
390 * *
391 ************************************************************************/
392
393/**
394 * xmlPopInput:
395 * @ctxt: an XML parser context
396 *
397 * xmlPopInput: the current input pointed by ctxt->input came to an end
398 * pop it and return the next char.
399 *
400 * Returns the current xmlChar in the parser context
401 */
402xmlChar
403xmlPopInput(xmlParserCtxtPtr ctxt) {
404 if (ctxt->inputNr == 1) return(0); /* End of main Input */
405 if (xmlParserDebugEntities)
406 xmlGenericError(xmlGenericErrorContext,
407 "Popping input %d\n", ctxt->inputNr);
408 xmlFreeInputStream(inputPop(ctxt));
409 if ((*ctxt->input->cur == 0) &&
410 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
411 return(xmlPopInput(ctxt));
412 return(CUR);
413}
414
415/**
416 * xmlPushInput:
417 * @ctxt: an XML parser context
418 * @input: an XML parser input fragment (entity, XML fragment ...).
419 *
420 * xmlPushInput: switch to a new input stream which is stacked on top
421 * of the previous one(s).
422 */
423void
424xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
425 if (input == NULL) return;
426
427 if (xmlParserDebugEntities) {
428 if ((ctxt->input != NULL) && (ctxt->input->filename))
429 xmlGenericError(xmlGenericErrorContext,
430 "%s(%d): ", ctxt->input->filename,
431 ctxt->input->line);
432 xmlGenericError(xmlGenericErrorContext,
433 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
434 }
435 inputPush(ctxt, input);
436 GROW;
437}
438
439/**
440 * xmlParseCharRef:
441 * @ctxt: an XML parser context
442 *
443 * parse Reference declarations
444 *
445 * [66] CharRef ::= '&#' [0-9]+ ';' |
446 * '&#x' [0-9a-fA-F]+ ';'
447 *
448 * [ WFC: Legal Character ]
449 * Characters referred to using character references must match the
450 * production for Char.
451 *
452 * Returns the value parsed (as an int), 0 in case of error
453 */
454int
455xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000456 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000457 int count = 0;
458
459 if (ctxt->token != 0) {
460 val = ctxt->token;
461 ctxt->token = 0;
462 return(val);
463 }
464 /*
465 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
466 */
467 if ((RAW == '&') && (NXT(1) == '#') &&
468 (NXT(2) == 'x')) {
469 SKIP(3);
470 GROW;
471 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000472 if (count++ > 20) {
473 count = 0;
474 GROW;
475 }
476 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000477 val = val * 16 + (CUR - '0');
478 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
479 val = val * 16 + (CUR - 'a') + 10;
480 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
481 val = val * 16 + (CUR - 'A') + 10;
482 else {
483 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
485 ctxt->sax->error(ctxt->userData,
486 "xmlParseCharRef: invalid hexadecimal value\n");
487 ctxt->wellFormed = 0;
488 ctxt->disableSAX = 1;
489 val = 0;
490 break;
491 }
492 NEXT;
493 count++;
494 }
495 if (RAW == ';') {
496 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
497 ctxt->nbChars ++;
498 ctxt->input->cur++;
499 }
500 } else if ((RAW == '&') && (NXT(1) == '#')) {
501 SKIP(2);
502 GROW;
503 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000504 if (count++ > 20) {
505 count = 0;
506 GROW;
507 }
508 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000509 val = val * 10 + (CUR - '0');
510 else {
511 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
513 ctxt->sax->error(ctxt->userData,
514 "xmlParseCharRef: invalid decimal value\n");
515 ctxt->wellFormed = 0;
516 ctxt->disableSAX = 1;
517 val = 0;
518 break;
519 }
520 NEXT;
521 count++;
522 }
523 if (RAW == ';') {
524 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
525 ctxt->nbChars ++;
526 ctxt->input->cur++;
527 }
528 } else {
529 ctxt->errNo = XML_ERR_INVALID_CHARREF;
530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
531 ctxt->sax->error(ctxt->userData,
532 "xmlParseCharRef: invalid value\n");
533 ctxt->wellFormed = 0;
534 ctxt->disableSAX = 1;
535 }
536
537 /*
538 * [ WFC: Legal Character ]
539 * Characters referred to using character references must match the
540 * production for Char.
541 */
542 if (IS_CHAR(val)) {
543 return(val);
544 } else {
545 ctxt->errNo = XML_ERR_INVALID_CHAR;
546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000547 ctxt->sax->error(ctxt->userData,
548 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000549 val);
550 ctxt->wellFormed = 0;
551 ctxt->disableSAX = 1;
552 }
553 return(0);
554}
555
556/**
557 * xmlParseStringCharRef:
558 * @ctxt: an XML parser context
559 * @str: a pointer to an index in the string
560 *
561 * parse Reference declarations, variant parsing from a string rather
562 * than an an input flow.
563 *
564 * [66] CharRef ::= '&#' [0-9]+ ';' |
565 * '&#x' [0-9a-fA-F]+ ';'
566 *
567 * [ WFC: Legal Character ]
568 * Characters referred to using character references must match the
569 * production for Char.
570 *
571 * Returns the value parsed (as an int), 0 in case of error, str will be
572 * updated to the current value of the index
573 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000574static int
Owen Taylor3473f882001-02-23 17:55:21 +0000575xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
576 const xmlChar *ptr;
577 xmlChar cur;
578 int val = 0;
579
580 if ((str == NULL) || (*str == NULL)) return(0);
581 ptr = *str;
582 cur = *ptr;
583 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
584 ptr += 3;
585 cur = *ptr;
586 while (cur != ';') { /* Non input consuming loop */
587 if ((cur >= '0') && (cur <= '9'))
588 val = val * 16 + (cur - '0');
589 else if ((cur >= 'a') && (cur <= 'f'))
590 val = val * 16 + (cur - 'a') + 10;
591 else if ((cur >= 'A') && (cur <= 'F'))
592 val = val * 16 + (cur - 'A') + 10;
593 else {
594 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
596 ctxt->sax->error(ctxt->userData,
597 "xmlParseStringCharRef: invalid hexadecimal value\n");
598 ctxt->wellFormed = 0;
599 ctxt->disableSAX = 1;
600 val = 0;
601 break;
602 }
603 ptr++;
604 cur = *ptr;
605 }
606 if (cur == ';')
607 ptr++;
608 } else if ((cur == '&') && (ptr[1] == '#')){
609 ptr += 2;
610 cur = *ptr;
611 while (cur != ';') { /* Non input consuming loops */
612 if ((cur >= '0') && (cur <= '9'))
613 val = val * 10 + (cur - '0');
614 else {
615 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
617 ctxt->sax->error(ctxt->userData,
618 "xmlParseStringCharRef: invalid decimal value\n");
619 ctxt->wellFormed = 0;
620 ctxt->disableSAX = 1;
621 val = 0;
622 break;
623 }
624 ptr++;
625 cur = *ptr;
626 }
627 if (cur == ';')
628 ptr++;
629 } else {
630 ctxt->errNo = XML_ERR_INVALID_CHARREF;
631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
632 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000633 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000634 ctxt->wellFormed = 0;
635 ctxt->disableSAX = 1;
636 return(0);
637 }
638 *str = ptr;
639
640 /*
641 * [ WFC: Legal Character ]
642 * Characters referred to using character references must match the
643 * production for Char.
644 */
645 if (IS_CHAR(val)) {
646 return(val);
647 } else {
648 ctxt->errNo = XML_ERR_INVALID_CHAR;
649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
650 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000651 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000652 ctxt->wellFormed = 0;
653 ctxt->disableSAX = 1;
654 }
655 return(0);
656}
657
658/**
659 * xmlParserHandlePEReference:
660 * @ctxt: the parser context
661 *
662 * [69] PEReference ::= '%' Name ';'
663 *
664 * [ WFC: No Recursion ]
665 * A parsed entity must not contain a recursive
666 * reference to itself, either directly or indirectly.
667 *
668 * [ WFC: Entity Declared ]
669 * In a document without any DTD, a document with only an internal DTD
670 * subset which contains no parameter entity references, or a document
671 * with "standalone='yes'", ... ... The declaration of a parameter
672 * entity must precede any reference to it...
673 *
674 * [ VC: Entity Declared ]
675 * In a document with an external subset or external parameter entities
676 * with "standalone='no'", ... ... The declaration of a parameter entity
677 * must precede any reference to it...
678 *
679 * [ WFC: In DTD ]
680 * Parameter-entity references may only appear in the DTD.
681 * NOTE: misleading but this is handled.
682 *
683 * A PEReference may have been detected in the current input stream
684 * the handling is done accordingly to
685 * http://www.w3.org/TR/REC-xml#entproc
686 * i.e.
687 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000688 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000689 */
690void
691xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
692 xmlChar *name;
693 xmlEntityPtr entity = NULL;
694 xmlParserInputPtr input;
695
696 if (ctxt->token != 0) {
697 return;
698 }
699 if (RAW != '%') return;
700 switch(ctxt->instate) {
701 case XML_PARSER_CDATA_SECTION:
702 return;
703 case XML_PARSER_COMMENT:
704 return;
705 case XML_PARSER_START_TAG:
706 return;
707 case XML_PARSER_END_TAG:
708 return;
709 case XML_PARSER_EOF:
710 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
712 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
713 ctxt->wellFormed = 0;
714 ctxt->disableSAX = 1;
715 return;
716 case XML_PARSER_PROLOG:
717 case XML_PARSER_START:
718 case XML_PARSER_MISC:
719 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
721 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
722 ctxt->wellFormed = 0;
723 ctxt->disableSAX = 1;
724 return;
725 case XML_PARSER_ENTITY_DECL:
726 case XML_PARSER_CONTENT:
727 case XML_PARSER_ATTRIBUTE_VALUE:
728 case XML_PARSER_PI:
729 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000730 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000731 /* we just ignore it there */
732 return;
733 case XML_PARSER_EPILOG:
734 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
736 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
737 ctxt->wellFormed = 0;
738 ctxt->disableSAX = 1;
739 return;
740 case XML_PARSER_ENTITY_VALUE:
741 /*
742 * NOTE: in the case of entity values, we don't do the
743 * substitution here since we need the literal
744 * entity value to be able to save the internal
745 * subset of the document.
746 * This will be handled by xmlStringDecodeEntities
747 */
748 return;
749 case XML_PARSER_DTD:
750 /*
751 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
752 * In the internal DTD subset, parameter-entity references
753 * can occur only where markup declarations can occur, not
754 * within markup declarations.
755 * In that case this is handled in xmlParseMarkupDecl
756 */
757 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
758 return;
759 break;
760 case XML_PARSER_IGNORE:
761 return;
762 }
763
764 NEXT;
765 name = xmlParseName(ctxt);
766 if (xmlParserDebugEntities)
767 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000768 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000769 if (name == NULL) {
770 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000772 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000773 ctxt->wellFormed = 0;
774 ctxt->disableSAX = 1;
775 } else {
776 if (RAW == ';') {
777 NEXT;
778 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
779 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
780 if (entity == NULL) {
781
782 /*
783 * [ WFC: Entity Declared ]
784 * In a document without any DTD, a document with only an
785 * internal DTD subset which contains no parameter entity
786 * references, or a document with "standalone='yes'", ...
787 * ... The declaration of a parameter entity must precede
788 * any reference to it...
789 */
790 if ((ctxt->standalone == 1) ||
791 ((ctxt->hasExternalSubset == 0) &&
792 (ctxt->hasPErefs == 0))) {
793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
794 ctxt->sax->error(ctxt->userData,
795 "PEReference: %%%s; not found\n", name);
796 ctxt->wellFormed = 0;
797 ctxt->disableSAX = 1;
798 } else {
799 /*
800 * [ VC: Entity Declared ]
801 * In a document with an external subset or external
802 * parameter entities with "standalone='no'", ...
803 * ... The declaration of a parameter entity must precede
804 * any reference to it...
805 */
806 if ((!ctxt->disableSAX) &&
807 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
808 ctxt->vctxt.error(ctxt->vctxt.userData,
809 "PEReference: %%%s; not found\n", name);
810 } else if ((!ctxt->disableSAX) &&
811 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
812 ctxt->sax->warning(ctxt->userData,
813 "PEReference: %%%s; not found\n", name);
814 ctxt->valid = 0;
815 }
816 } else {
817 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
818 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000819 xmlChar start[4];
820 xmlCharEncoding enc;
821
Owen Taylor3473f882001-02-23 17:55:21 +0000822 /*
823 * handle the extra spaces added before and after
824 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000825 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000826 */
827 input = xmlNewEntityInputStream(ctxt, entity);
828 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000829
830 /*
831 * Get the 4 first bytes and decode the charset
832 * if enc != XML_CHAR_ENCODING_NONE
833 * plug some encoding conversion routines.
834 */
835 GROW
836 start[0] = RAW;
837 start[1] = NXT(1);
838 start[2] = NXT(2);
839 start[3] = NXT(3);
840 enc = xmlDetectCharEncoding(start, 4);
841 if (enc != XML_CHAR_ENCODING_NONE) {
842 xmlSwitchEncoding(ctxt, enc);
843 }
844
Owen Taylor3473f882001-02-23 17:55:21 +0000845 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
846 (RAW == '<') && (NXT(1) == '?') &&
847 (NXT(2) == 'x') && (NXT(3) == 'm') &&
848 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
849 xmlParseTextDecl(ctxt);
850 }
851 if (ctxt->token == 0)
852 ctxt->token = ' ';
853 } else {
854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
855 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000856 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000857 name);
858 ctxt->wellFormed = 0;
859 ctxt->disableSAX = 1;
860 }
861 }
862 } else {
863 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
865 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000866 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000867 ctxt->wellFormed = 0;
868 ctxt->disableSAX = 1;
869 }
870 xmlFree(name);
871 }
872}
873
874/*
875 * Macro used to grow the current buffer.
876 */
877#define growBuffer(buffer) { \
878 buffer##_size *= 2; \
879 buffer = (xmlChar *) \
880 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
881 if (buffer == NULL) { \
882 perror("realloc failed"); \
883 return(NULL); \
884 } \
885}
886
887/**
888 * xmlStringDecodeEntities:
889 * @ctxt: the parser context
890 * @str: the input string
891 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
892 * @end: an end marker xmlChar, 0 if none
893 * @end2: an end marker xmlChar, 0 if none
894 * @end3: an end marker xmlChar, 0 if none
895 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000896 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000897 *
898 * [67] Reference ::= EntityRef | CharRef
899 *
900 * [69] PEReference ::= '%' Name ';'
901 *
902 * Returns A newly allocated string with the substitution done. The caller
903 * must deallocate it !
904 */
905xmlChar *
906xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
907 xmlChar end, xmlChar end2, xmlChar end3) {
908 xmlChar *buffer = NULL;
909 int buffer_size = 0;
910
911 xmlChar *current = NULL;
912 xmlEntityPtr ent;
913 int c,l;
914 int nbchars = 0;
915
916 if (str == NULL)
917 return(NULL);
918
919 if (ctxt->depth > 40) {
920 ctxt->errNo = XML_ERR_ENTITY_LOOP;
921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
922 ctxt->sax->error(ctxt->userData,
923 "Detected entity reference loop\n");
924 ctxt->wellFormed = 0;
925 ctxt->disableSAX = 1;
926 return(NULL);
927 }
928
929 /*
930 * allocate a translation buffer.
931 */
932 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
933 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
934 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000935 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000936 return(NULL);
937 }
938
939 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000940 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000941 * we are operating on already parsed values.
942 */
943 c = CUR_SCHAR(str, l);
944 while ((c != 0) && (c != end) && /* non input consuming loop */
945 (c != end2) && (c != end3)) {
946
947 if (c == 0) break;
948 if ((c == '&') && (str[1] == '#')) {
949 int val = xmlParseStringCharRef(ctxt, &str);
950 if (val != 0) {
951 COPY_BUF(0,buffer,nbchars,val);
952 }
953 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
954 if (xmlParserDebugEntities)
955 xmlGenericError(xmlGenericErrorContext,
956 "String decoding Entity Reference: %.30s\n",
957 str);
958 ent = xmlParseStringEntityRef(ctxt, &str);
959 if ((ent != NULL) &&
960 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
961 if (ent->content != NULL) {
962 COPY_BUF(0,buffer,nbchars,ent->content[0]);
963 } else {
964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
965 ctxt->sax->error(ctxt->userData,
966 "internal error entity has no content\n");
967 }
968 } else if ((ent != NULL) && (ent->content != NULL)) {
969 xmlChar *rep;
970
971 ctxt->depth++;
972 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
973 0, 0, 0);
974 ctxt->depth--;
975 if (rep != NULL) {
976 current = rep;
977 while (*current != 0) { /* non input consuming loop */
978 buffer[nbchars++] = *current++;
979 if (nbchars >
980 buffer_size - XML_PARSER_BUFFER_SIZE) {
981 growBuffer(buffer);
982 }
983 }
984 xmlFree(rep);
985 }
986 } else if (ent != NULL) {
987 int i = xmlStrlen(ent->name);
988 const xmlChar *cur = ent->name;
989
990 buffer[nbchars++] = '&';
991 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
992 growBuffer(buffer);
993 }
994 for (;i > 0;i--)
995 buffer[nbchars++] = *cur++;
996 buffer[nbchars++] = ';';
997 }
998 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
999 if (xmlParserDebugEntities)
1000 xmlGenericError(xmlGenericErrorContext,
1001 "String decoding PE Reference: %.30s\n", str);
1002 ent = xmlParseStringPEReference(ctxt, &str);
1003 if (ent != NULL) {
1004 xmlChar *rep;
1005
1006 ctxt->depth++;
1007 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1008 0, 0, 0);
1009 ctxt->depth--;
1010 if (rep != NULL) {
1011 current = rep;
1012 while (*current != 0) { /* non input consuming loop */
1013 buffer[nbchars++] = *current++;
1014 if (nbchars >
1015 buffer_size - XML_PARSER_BUFFER_SIZE) {
1016 growBuffer(buffer);
1017 }
1018 }
1019 xmlFree(rep);
1020 }
1021 }
1022 } else {
1023 COPY_BUF(l,buffer,nbchars,c);
1024 str += l;
1025 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1026 growBuffer(buffer);
1027 }
1028 }
1029 c = CUR_SCHAR(str, l);
1030 }
1031 buffer[nbchars++] = 0;
1032 return(buffer);
1033}
1034
1035
1036/************************************************************************
1037 * *
1038 * Commodity functions to handle xmlChars *
1039 * *
1040 ************************************************************************/
1041
1042/**
1043 * xmlStrndup:
1044 * @cur: the input xmlChar *
1045 * @len: the len of @cur
1046 *
1047 * a strndup for array of xmlChar's
1048 *
1049 * Returns a new xmlChar * or NULL
1050 */
1051xmlChar *
1052xmlStrndup(const xmlChar *cur, int len) {
1053 xmlChar *ret;
1054
1055 if ((cur == NULL) || (len < 0)) return(NULL);
1056 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1057 if (ret == NULL) {
1058 xmlGenericError(xmlGenericErrorContext,
1059 "malloc of %ld byte failed\n",
1060 (len + 1) * (long)sizeof(xmlChar));
1061 return(NULL);
1062 }
1063 memcpy(ret, cur, len * sizeof(xmlChar));
1064 ret[len] = 0;
1065 return(ret);
1066}
1067
1068/**
1069 * xmlStrdup:
1070 * @cur: the input xmlChar *
1071 *
1072 * a strdup for array of xmlChar's. Since they are supposed to be
1073 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1074 * a termination mark of '0'.
1075 *
1076 * Returns a new xmlChar * or NULL
1077 */
1078xmlChar *
1079xmlStrdup(const xmlChar *cur) {
1080 const xmlChar *p = cur;
1081
1082 if (cur == NULL) return(NULL);
1083 while (*p != 0) p++; /* non input consuming */
1084 return(xmlStrndup(cur, p - cur));
1085}
1086
1087/**
1088 * xmlCharStrndup:
1089 * @cur: the input char *
1090 * @len: the len of @cur
1091 *
1092 * a strndup for char's to xmlChar's
1093 *
1094 * Returns a new xmlChar * or NULL
1095 */
1096
1097xmlChar *
1098xmlCharStrndup(const char *cur, int len) {
1099 int i;
1100 xmlChar *ret;
1101
1102 if ((cur == NULL) || (len < 0)) return(NULL);
1103 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1104 if (ret == NULL) {
1105 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1106 (len + 1) * (long)sizeof(xmlChar));
1107 return(NULL);
1108 }
1109 for (i = 0;i < len;i++)
1110 ret[i] = (xmlChar) cur[i];
1111 ret[len] = 0;
1112 return(ret);
1113}
1114
1115/**
1116 * xmlCharStrdup:
1117 * @cur: the input char *
1118 * @len: the len of @cur
1119 *
1120 * a strdup for char's to xmlChar's
1121 *
1122 * Returns a new xmlChar * or NULL
1123 */
1124
1125xmlChar *
1126xmlCharStrdup(const char *cur) {
1127 const char *p = cur;
1128
1129 if (cur == NULL) return(NULL);
1130 while (*p != '\0') p++; /* non input consuming */
1131 return(xmlCharStrndup(cur, p - cur));
1132}
1133
1134/**
1135 * xmlStrcmp:
1136 * @str1: the first xmlChar *
1137 * @str2: the second xmlChar *
1138 *
1139 * a strcmp for xmlChar's
1140 *
1141 * Returns the integer result of the comparison
1142 */
1143
1144int
1145xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1146 register int tmp;
1147
1148 if (str1 == str2) return(0);
1149 if (str1 == NULL) return(-1);
1150 if (str2 == NULL) return(1);
1151 do {
1152 tmp = *str1++ - *str2;
1153 if (tmp != 0) return(tmp);
1154 } while (*str2++ != 0);
1155 return 0;
1156}
1157
1158/**
1159 * xmlStrEqual:
1160 * @str1: the first xmlChar *
1161 * @str2: the second xmlChar *
1162 *
1163 * Check if both string are equal of have same content
1164 * Should be a bit more readable and faster than xmlStrEqual()
1165 *
1166 * Returns 1 if they are equal, 0 if they are different
1167 */
1168
1169int
1170xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1171 if (str1 == str2) return(1);
1172 if (str1 == NULL) return(0);
1173 if (str2 == NULL) return(0);
1174 do {
1175 if (*str1++ != *str2) return(0);
1176 } while (*str2++);
1177 return(1);
1178}
1179
1180/**
1181 * xmlStrncmp:
1182 * @str1: the first xmlChar *
1183 * @str2: the second xmlChar *
1184 * @len: the max comparison length
1185 *
1186 * a strncmp for xmlChar's
1187 *
1188 * Returns the integer result of the comparison
1189 */
1190
1191int
1192xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1193 register int tmp;
1194
1195 if (len <= 0) return(0);
1196 if (str1 == str2) return(0);
1197 if (str1 == NULL) return(-1);
1198 if (str2 == NULL) return(1);
1199 do {
1200 tmp = *str1++ - *str2;
1201 if (tmp != 0 || --len == 0) return(tmp);
1202 } while (*str2++ != 0);
1203 return 0;
1204}
1205
Daniel Veillardb44025c2001-10-11 22:55:55 +00001206static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001207 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1208 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1209 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1210 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1211 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1212 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1213 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1214 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1215 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1216 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1217 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1218 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1219 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1220 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1221 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1222 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1223 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1224 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1225 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1226 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1227 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1228 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1229 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1230 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1231 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1232 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1233 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1234 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1235 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1236 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1237 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1238 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1239};
1240
1241/**
1242 * xmlStrcasecmp:
1243 * @str1: the first xmlChar *
1244 * @str2: the second xmlChar *
1245 *
1246 * a strcasecmp for xmlChar's
1247 *
1248 * Returns the integer result of the comparison
1249 */
1250
1251int
1252xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1253 register int tmp;
1254
1255 if (str1 == str2) return(0);
1256 if (str1 == NULL) return(-1);
1257 if (str2 == NULL) return(1);
1258 do {
1259 tmp = casemap[*str1++] - casemap[*str2];
1260 if (tmp != 0) return(tmp);
1261 } while (*str2++ != 0);
1262 return 0;
1263}
1264
1265/**
1266 * xmlStrncasecmp:
1267 * @str1: the first xmlChar *
1268 * @str2: the second xmlChar *
1269 * @len: the max comparison length
1270 *
1271 * a strncasecmp for xmlChar's
1272 *
1273 * Returns the integer result of the comparison
1274 */
1275
1276int
1277xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1278 register int tmp;
1279
1280 if (len <= 0) return(0);
1281 if (str1 == str2) return(0);
1282 if (str1 == NULL) return(-1);
1283 if (str2 == NULL) return(1);
1284 do {
1285 tmp = casemap[*str1++] - casemap[*str2];
1286 if (tmp != 0 || --len == 0) return(tmp);
1287 } while (*str2++ != 0);
1288 return 0;
1289}
1290
1291/**
1292 * xmlStrchr:
1293 * @str: the xmlChar * array
1294 * @val: the xmlChar to search
1295 *
1296 * a strchr for xmlChar's
1297 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001298 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001299 */
1300
1301const xmlChar *
1302xmlStrchr(const xmlChar *str, xmlChar val) {
1303 if (str == NULL) return(NULL);
1304 while (*str != 0) { /* non input consuming */
1305 if (*str == val) return((xmlChar *) str);
1306 str++;
1307 }
1308 return(NULL);
1309}
1310
1311/**
1312 * xmlStrstr:
1313 * @str: the xmlChar * array (haystack)
1314 * @val: the xmlChar to search (needle)
1315 *
1316 * a strstr for xmlChar's
1317 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001318 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001319 */
1320
1321const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001322xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001323 int n;
1324
1325 if (str == NULL) return(NULL);
1326 if (val == NULL) return(NULL);
1327 n = xmlStrlen(val);
1328
1329 if (n == 0) return(str);
1330 while (*str != 0) { /* non input consuming */
1331 if (*str == *val) {
1332 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1333 }
1334 str++;
1335 }
1336 return(NULL);
1337}
1338
1339/**
1340 * xmlStrcasestr:
1341 * @str: the xmlChar * array (haystack)
1342 * @val: the xmlChar to search (needle)
1343 *
1344 * a case-ignoring strstr for xmlChar's
1345 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001346 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001347 */
1348
1349const xmlChar *
1350xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1351 int n;
1352
1353 if (str == NULL) return(NULL);
1354 if (val == NULL) return(NULL);
1355 n = xmlStrlen(val);
1356
1357 if (n == 0) return(str);
1358 while (*str != 0) { /* non input consuming */
1359 if (casemap[*str] == casemap[*val])
1360 if (!xmlStrncasecmp(str, val, n)) return(str);
1361 str++;
1362 }
1363 return(NULL);
1364}
1365
1366/**
1367 * xmlStrsub:
1368 * @str: the xmlChar * array (haystack)
1369 * @start: the index of the first char (zero based)
1370 * @len: the length of the substring
1371 *
1372 * Extract a substring of a given string
1373 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001374 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001375 */
1376
1377xmlChar *
1378xmlStrsub(const xmlChar *str, int start, int len) {
1379 int i;
1380
1381 if (str == NULL) return(NULL);
1382 if (start < 0) return(NULL);
1383 if (len < 0) return(NULL);
1384
1385 for (i = 0;i < start;i++) {
1386 if (*str == 0) return(NULL);
1387 str++;
1388 }
1389 if (*str == 0) return(NULL);
1390 return(xmlStrndup(str, len));
1391}
1392
1393/**
1394 * xmlStrlen:
1395 * @str: the xmlChar * array
1396 *
1397 * length of a xmlChar's string
1398 *
1399 * Returns the number of xmlChar contained in the ARRAY.
1400 */
1401
1402int
1403xmlStrlen(const xmlChar *str) {
1404 int len = 0;
1405
1406 if (str == NULL) return(0);
1407 while (*str != 0) { /* non input consuming */
1408 str++;
1409 len++;
1410 }
1411 return(len);
1412}
1413
1414/**
1415 * xmlStrncat:
1416 * @cur: the original xmlChar * array
1417 * @add: the xmlChar * array added
1418 * @len: the length of @add
1419 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001420 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001421 * first bytes of @add.
1422 *
1423 * Returns a new xmlChar *, the original @cur is reallocated if needed
1424 * and should not be freed
1425 */
1426
1427xmlChar *
1428xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1429 int size;
1430 xmlChar *ret;
1431
1432 if ((add == NULL) || (len == 0))
1433 return(cur);
1434 if (cur == NULL)
1435 return(xmlStrndup(add, len));
1436
1437 size = xmlStrlen(cur);
1438 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1439 if (ret == NULL) {
1440 xmlGenericError(xmlGenericErrorContext,
1441 "xmlStrncat: realloc of %ld byte failed\n",
1442 (size + len + 1) * (long)sizeof(xmlChar));
1443 return(cur);
1444 }
1445 memcpy(&ret[size], add, len * sizeof(xmlChar));
1446 ret[size + len] = 0;
1447 return(ret);
1448}
1449
1450/**
1451 * xmlStrcat:
1452 * @cur: the original xmlChar * array
1453 * @add: the xmlChar * array added
1454 *
1455 * a strcat for array of xmlChar's. Since they are supposed to be
1456 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1457 * a termination mark of '0'.
1458 *
1459 * Returns a new xmlChar * containing the concatenated string.
1460 */
1461xmlChar *
1462xmlStrcat(xmlChar *cur, const xmlChar *add) {
1463 const xmlChar *p = add;
1464
1465 if (add == NULL) return(cur);
1466 if (cur == NULL)
1467 return(xmlStrdup(add));
1468
1469 while (*p != 0) p++; /* non input consuming */
1470 return(xmlStrncat(cur, add, p - add));
1471}
1472
1473/************************************************************************
1474 * *
1475 * Commodity functions, cleanup needed ? *
1476 * *
1477 ************************************************************************/
1478
1479/**
1480 * areBlanks:
1481 * @ctxt: an XML parser context
1482 * @str: a xmlChar *
1483 * @len: the size of @str
1484 *
1485 * Is this a sequence of blank chars that one can ignore ?
1486 *
1487 * Returns 1 if ignorable 0 otherwise.
1488 */
1489
1490static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1491 int i, ret;
1492 xmlNodePtr lastChild;
1493
Daniel Veillard05c13a22001-09-09 08:38:09 +00001494 /*
1495 * Don't spend time trying to differentiate them, the same callback is
1496 * used !
1497 */
1498 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001499 return(0);
1500
Owen Taylor3473f882001-02-23 17:55:21 +00001501 /*
1502 * Check for xml:space value.
1503 */
1504 if (*(ctxt->space) == 1)
1505 return(0);
1506
1507 /*
1508 * Check that the string is made of blanks
1509 */
1510 for (i = 0;i < len;i++)
1511 if (!(IS_BLANK(str[i]))) return(0);
1512
1513 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001514 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001515 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001516 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (ctxt->myDoc != NULL) {
1518 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1519 if (ret == 0) return(1);
1520 if (ret == 1) return(0);
1521 }
1522
1523 /*
1524 * Otherwise, heuristic :-\
1525 */
Owen Taylor3473f882001-02-23 17:55:21 +00001526 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001527 if ((ctxt->node->children == NULL) &&
1528 (RAW == '<') && (NXT(1) == '/')) return(0);
1529
1530 lastChild = xmlGetLastChild(ctxt->node);
1531 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001532 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1533 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001534 } else if (xmlNodeIsText(lastChild))
1535 return(0);
1536 else if ((ctxt->node->children != NULL) &&
1537 (xmlNodeIsText(ctxt->node->children)))
1538 return(0);
1539 return(1);
1540}
1541
Owen Taylor3473f882001-02-23 17:55:21 +00001542/************************************************************************
1543 * *
1544 * Extra stuff for namespace support *
1545 * Relates to http://www.w3.org/TR/WD-xml-names *
1546 * *
1547 ************************************************************************/
1548
1549/**
1550 * xmlSplitQName:
1551 * @ctxt: an XML parser context
1552 * @name: an XML parser context
1553 * @prefix: a xmlChar **
1554 *
1555 * parse an UTF8 encoded XML qualified name string
1556 *
1557 * [NS 5] QName ::= (Prefix ':')? LocalPart
1558 *
1559 * [NS 6] Prefix ::= NCName
1560 *
1561 * [NS 7] LocalPart ::= NCName
1562 *
1563 * Returns the local part, and prefix is updated
1564 * to get the Prefix if any.
1565 */
1566
1567xmlChar *
1568xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1569 xmlChar buf[XML_MAX_NAMELEN + 5];
1570 xmlChar *buffer = NULL;
1571 int len = 0;
1572 int max = XML_MAX_NAMELEN;
1573 xmlChar *ret = NULL;
1574 const xmlChar *cur = name;
1575 int c;
1576
1577 *prefix = NULL;
1578
1579#ifndef XML_XML_NAMESPACE
1580 /* xml: prefix is not really a namespace */
1581 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1582 (cur[2] == 'l') && (cur[3] == ':'))
1583 return(xmlStrdup(name));
1584#endif
1585
1586 /* nasty but valid */
1587 if (cur[0] == ':')
1588 return(xmlStrdup(name));
1589
1590 c = *cur++;
1591 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1592 buf[len++] = c;
1593 c = *cur++;
1594 }
1595 if (len >= max) {
1596 /*
1597 * Okay someone managed to make a huge name, so he's ready to pay
1598 * for the processing speed.
1599 */
1600 max = len * 2;
1601
1602 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1603 if (buffer == NULL) {
1604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1605 ctxt->sax->error(ctxt->userData,
1606 "xmlSplitQName: out of memory\n");
1607 return(NULL);
1608 }
1609 memcpy(buffer, buf, len);
1610 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1611 if (len + 10 > max) {
1612 max *= 2;
1613 buffer = (xmlChar *) xmlRealloc(buffer,
1614 max * sizeof(xmlChar));
1615 if (buffer == NULL) {
1616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1617 ctxt->sax->error(ctxt->userData,
1618 "xmlSplitQName: out of memory\n");
1619 return(NULL);
1620 }
1621 }
1622 buffer[len++] = c;
1623 c = *cur++;
1624 }
1625 buffer[len] = 0;
1626 }
1627
1628 if (buffer == NULL)
1629 ret = xmlStrndup(buf, len);
1630 else {
1631 ret = buffer;
1632 buffer = NULL;
1633 max = XML_MAX_NAMELEN;
1634 }
1635
1636
1637 if (c == ':') {
1638 c = *cur++;
1639 if (c == 0) return(ret);
1640 *prefix = ret;
1641 len = 0;
1642
1643 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1644 buf[len++] = c;
1645 c = *cur++;
1646 }
1647 if (len >= max) {
1648 /*
1649 * Okay someone managed to make a huge name, so he's ready to pay
1650 * for the processing speed.
1651 */
1652 max = len * 2;
1653
1654 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1655 if (buffer == NULL) {
1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1657 ctxt->sax->error(ctxt->userData,
1658 "xmlSplitQName: out of memory\n");
1659 return(NULL);
1660 }
1661 memcpy(buffer, buf, len);
1662 while (c != 0) { /* tested bigname2.xml */
1663 if (len + 10 > max) {
1664 max *= 2;
1665 buffer = (xmlChar *) xmlRealloc(buffer,
1666 max * sizeof(xmlChar));
1667 if (buffer == NULL) {
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "xmlSplitQName: out of memory\n");
1671 return(NULL);
1672 }
1673 }
1674 buffer[len++] = c;
1675 c = *cur++;
1676 }
1677 buffer[len] = 0;
1678 }
1679
1680 if (buffer == NULL)
1681 ret = xmlStrndup(buf, len);
1682 else {
1683 ret = buffer;
1684 }
1685 }
1686
1687 return(ret);
1688}
1689
1690/************************************************************************
1691 * *
1692 * The parser itself *
1693 * Relates to http://www.w3.org/TR/REC-xml *
1694 * *
1695 ************************************************************************/
1696
Daniel Veillard76d66f42001-05-16 21:05:17 +00001697static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001698/**
1699 * xmlParseName:
1700 * @ctxt: an XML parser context
1701 *
1702 * parse an XML name.
1703 *
1704 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1705 * CombiningChar | Extender
1706 *
1707 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1708 *
1709 * [6] Names ::= Name (S Name)*
1710 *
1711 * Returns the Name parsed or NULL
1712 */
1713
1714xmlChar *
1715xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001716 const xmlChar *in;
1717 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 int count = 0;
1719
1720 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001721
1722 /*
1723 * Accelerator for simple ASCII names
1724 */
1725 in = ctxt->input->cur;
1726 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1727 ((*in >= 0x41) && (*in <= 0x5A)) ||
1728 (*in == '_') || (*in == ':')) {
1729 in++;
1730 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1731 ((*in >= 0x41) && (*in <= 0x5A)) ||
1732 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001733 (*in == '_') || (*in == '-') ||
1734 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001735 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001736 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001737 count = in - ctxt->input->cur;
1738 ret = xmlStrndup(ctxt->input->cur, count);
1739 ctxt->input->cur = in;
1740 return(ret);
1741 }
1742 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001743 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001744}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001745
Daniel Veillard76d66f42001-05-16 21:05:17 +00001746static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001747xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1748 xmlChar buf[XML_MAX_NAMELEN + 5];
1749 int len = 0, l;
1750 int c;
1751 int count = 0;
1752
1753 /*
1754 * Handler for more complex cases
1755 */
1756 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001757 c = CUR_CHAR(l);
1758 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1759 (!IS_LETTER(c) && (c != '_') &&
1760 (c != ':'))) {
1761 return(NULL);
1762 }
1763
1764 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1765 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1766 (c == '.') || (c == '-') ||
1767 (c == '_') || (c == ':') ||
1768 (IS_COMBINING(c)) ||
1769 (IS_EXTENDER(c)))) {
1770 if (count++ > 100) {
1771 count = 0;
1772 GROW;
1773 }
1774 COPY_BUF(l,buf,len,c);
1775 NEXTL(l);
1776 c = CUR_CHAR(l);
1777 if (len >= XML_MAX_NAMELEN) {
1778 /*
1779 * Okay someone managed to make a huge name, so he's ready to pay
1780 * for the processing speed.
1781 */
1782 xmlChar *buffer;
1783 int max = len * 2;
1784
1785 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1786 if (buffer == NULL) {
1787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1788 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001789 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001790 return(NULL);
1791 }
1792 memcpy(buffer, buf, len);
1793 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1794 (c == '.') || (c == '-') ||
1795 (c == '_') || (c == ':') ||
1796 (IS_COMBINING(c)) ||
1797 (IS_EXTENDER(c))) {
1798 if (count++ > 100) {
1799 count = 0;
1800 GROW;
1801 }
1802 if (len + 10 > max) {
1803 max *= 2;
1804 buffer = (xmlChar *) xmlRealloc(buffer,
1805 max * sizeof(xmlChar));
1806 if (buffer == NULL) {
1807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1808 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001809 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001810 return(NULL);
1811 }
1812 }
1813 COPY_BUF(l,buffer,len,c);
1814 NEXTL(l);
1815 c = CUR_CHAR(l);
1816 }
1817 buffer[len] = 0;
1818 return(buffer);
1819 }
1820 }
1821 return(xmlStrndup(buf, len));
1822}
1823
1824/**
1825 * xmlParseStringName:
1826 * @ctxt: an XML parser context
1827 * @str: a pointer to the string pointer (IN/OUT)
1828 *
1829 * parse an XML name.
1830 *
1831 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1832 * CombiningChar | Extender
1833 *
1834 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1835 *
1836 * [6] Names ::= Name (S Name)*
1837 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001838 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001839 * is updated to the current location in the string.
1840 */
1841
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001842static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001843xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1844 xmlChar buf[XML_MAX_NAMELEN + 5];
1845 const xmlChar *cur = *str;
1846 int len = 0, l;
1847 int c;
1848
1849 c = CUR_SCHAR(cur, l);
1850 if (!IS_LETTER(c) && (c != '_') &&
1851 (c != ':')) {
1852 return(NULL);
1853 }
1854
1855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1856 (c == '.') || (c == '-') ||
1857 (c == '_') || (c == ':') ||
1858 (IS_COMBINING(c)) ||
1859 (IS_EXTENDER(c))) {
1860 COPY_BUF(l,buf,len,c);
1861 cur += l;
1862 c = CUR_SCHAR(cur, l);
1863 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1864 /*
1865 * Okay someone managed to make a huge name, so he's ready to pay
1866 * for the processing speed.
1867 */
1868 xmlChar *buffer;
1869 int max = len * 2;
1870
1871 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1872 if (buffer == NULL) {
1873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1874 ctxt->sax->error(ctxt->userData,
1875 "xmlParseStringName: out of memory\n");
1876 return(NULL);
1877 }
1878 memcpy(buffer, buf, len);
1879 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1880 (c == '.') || (c == '-') ||
1881 (c == '_') || (c == ':') ||
1882 (IS_COMBINING(c)) ||
1883 (IS_EXTENDER(c))) {
1884 if (len + 10 > max) {
1885 max *= 2;
1886 buffer = (xmlChar *) xmlRealloc(buffer,
1887 max * sizeof(xmlChar));
1888 if (buffer == NULL) {
1889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1890 ctxt->sax->error(ctxt->userData,
1891 "xmlParseStringName: out of memory\n");
1892 return(NULL);
1893 }
1894 }
1895 COPY_BUF(l,buffer,len,c);
1896 cur += l;
1897 c = CUR_SCHAR(cur, l);
1898 }
1899 buffer[len] = 0;
1900 *str = cur;
1901 return(buffer);
1902 }
1903 }
1904 *str = cur;
1905 return(xmlStrndup(buf, len));
1906}
1907
1908/**
1909 * xmlParseNmtoken:
1910 * @ctxt: an XML parser context
1911 *
1912 * parse an XML Nmtoken.
1913 *
1914 * [7] Nmtoken ::= (NameChar)+
1915 *
1916 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1917 *
1918 * Returns the Nmtoken parsed or NULL
1919 */
1920
1921xmlChar *
1922xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1923 xmlChar buf[XML_MAX_NAMELEN + 5];
1924 int len = 0, l;
1925 int c;
1926 int count = 0;
1927
1928 GROW;
1929 c = CUR_CHAR(l);
1930
1931 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1932 (c == '.') || (c == '-') ||
1933 (c == '_') || (c == ':') ||
1934 (IS_COMBINING(c)) ||
1935 (IS_EXTENDER(c))) {
1936 if (count++ > 100) {
1937 count = 0;
1938 GROW;
1939 }
1940 COPY_BUF(l,buf,len,c);
1941 NEXTL(l);
1942 c = CUR_CHAR(l);
1943 if (len >= XML_MAX_NAMELEN) {
1944 /*
1945 * Okay someone managed to make a huge token, so he's ready to pay
1946 * for the processing speed.
1947 */
1948 xmlChar *buffer;
1949 int max = len * 2;
1950
1951 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1952 if (buffer == NULL) {
1953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1954 ctxt->sax->error(ctxt->userData,
1955 "xmlParseNmtoken: out of memory\n");
1956 return(NULL);
1957 }
1958 memcpy(buffer, buf, len);
1959 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1960 (c == '.') || (c == '-') ||
1961 (c == '_') || (c == ':') ||
1962 (IS_COMBINING(c)) ||
1963 (IS_EXTENDER(c))) {
1964 if (count++ > 100) {
1965 count = 0;
1966 GROW;
1967 }
1968 if (len + 10 > max) {
1969 max *= 2;
1970 buffer = (xmlChar *) xmlRealloc(buffer,
1971 max * sizeof(xmlChar));
1972 if (buffer == NULL) {
1973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1974 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001975 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001976 return(NULL);
1977 }
1978 }
1979 COPY_BUF(l,buffer,len,c);
1980 NEXTL(l);
1981 c = CUR_CHAR(l);
1982 }
1983 buffer[len] = 0;
1984 return(buffer);
1985 }
1986 }
1987 if (len == 0)
1988 return(NULL);
1989 return(xmlStrndup(buf, len));
1990}
1991
1992/**
1993 * xmlParseEntityValue:
1994 * @ctxt: an XML parser context
1995 * @orig: if non-NULL store a copy of the original entity value
1996 *
1997 * parse a value for ENTITY declarations
1998 *
1999 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2000 * "'" ([^%&'] | PEReference | Reference)* "'"
2001 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002002 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002003 */
2004
2005xmlChar *
2006xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2007 xmlChar *buf = NULL;
2008 int len = 0;
2009 int size = XML_PARSER_BUFFER_SIZE;
2010 int c, l;
2011 xmlChar stop;
2012 xmlChar *ret = NULL;
2013 const xmlChar *cur = NULL;
2014 xmlParserInputPtr input;
2015
2016 if (RAW == '"') stop = '"';
2017 else if (RAW == '\'') stop = '\'';
2018 else {
2019 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2021 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2022 ctxt->wellFormed = 0;
2023 ctxt->disableSAX = 1;
2024 return(NULL);
2025 }
2026 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2027 if (buf == NULL) {
2028 xmlGenericError(xmlGenericErrorContext,
2029 "malloc of %d byte failed\n", size);
2030 return(NULL);
2031 }
2032
2033 /*
2034 * The content of the entity definition is copied in a buffer.
2035 */
2036
2037 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2038 input = ctxt->input;
2039 GROW;
2040 NEXT;
2041 c = CUR_CHAR(l);
2042 /*
2043 * NOTE: 4.4.5 Included in Literal
2044 * When a parameter entity reference appears in a literal entity
2045 * value, ... a single or double quote character in the replacement
2046 * text is always treated as a normal data character and will not
2047 * terminate the literal.
2048 * In practice it means we stop the loop only when back at parsing
2049 * the initial entity and the quote is found
2050 */
2051 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2052 (ctxt->input != input))) {
2053 if (len + 5 >= size) {
2054 size *= 2;
2055 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2056 if (buf == NULL) {
2057 xmlGenericError(xmlGenericErrorContext,
2058 "realloc of %d byte failed\n", size);
2059 return(NULL);
2060 }
2061 }
2062 COPY_BUF(l,buf,len,c);
2063 NEXTL(l);
2064 /*
2065 * Pop-up of finished entities.
2066 */
2067 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2068 xmlPopInput(ctxt);
2069
2070 GROW;
2071 c = CUR_CHAR(l);
2072 if (c == 0) {
2073 GROW;
2074 c = CUR_CHAR(l);
2075 }
2076 }
2077 buf[len] = 0;
2078
2079 /*
2080 * Raise problem w.r.t. '&' and '%' being used in non-entities
2081 * reference constructs. Note Charref will be handled in
2082 * xmlStringDecodeEntities()
2083 */
2084 cur = buf;
2085 while (*cur != 0) { /* non input consuming */
2086 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2087 xmlChar *name;
2088 xmlChar tmp = *cur;
2089
2090 cur++;
2091 name = xmlParseStringName(ctxt, &cur);
2092 if ((name == NULL) || (*cur != ';')) {
2093 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2095 ctxt->sax->error(ctxt->userData,
2096 "EntityValue: '%c' forbidden except for entities references\n",
2097 tmp);
2098 ctxt->wellFormed = 0;
2099 ctxt->disableSAX = 1;
2100 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002101 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2102 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002103 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2105 ctxt->sax->error(ctxt->userData,
2106 "EntityValue: PEReferences forbidden in internal subset\n",
2107 tmp);
2108 ctxt->wellFormed = 0;
2109 ctxt->disableSAX = 1;
2110 }
2111 if (name != NULL)
2112 xmlFree(name);
2113 }
2114 cur++;
2115 }
2116
2117 /*
2118 * Then PEReference entities are substituted.
2119 */
2120 if (c != stop) {
2121 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2123 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2124 ctxt->wellFormed = 0;
2125 ctxt->disableSAX = 1;
2126 xmlFree(buf);
2127 } else {
2128 NEXT;
2129 /*
2130 * NOTE: 4.4.7 Bypassed
2131 * When a general entity reference appears in the EntityValue in
2132 * an entity declaration, it is bypassed and left as is.
2133 * so XML_SUBSTITUTE_REF is not set here.
2134 */
2135 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2136 0, 0, 0);
2137 if (orig != NULL)
2138 *orig = buf;
2139 else
2140 xmlFree(buf);
2141 }
2142
2143 return(ret);
2144}
2145
2146/**
2147 * xmlParseAttValue:
2148 * @ctxt: an XML parser context
2149 *
2150 * parse a value for an attribute
2151 * Note: the parser won't do substitution of entities here, this
2152 * will be handled later in xmlStringGetNodeList
2153 *
2154 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2155 * "'" ([^<&'] | Reference)* "'"
2156 *
2157 * 3.3.3 Attribute-Value Normalization:
2158 * Before the value of an attribute is passed to the application or
2159 * checked for validity, the XML processor must normalize it as follows:
2160 * - a character reference is processed by appending the referenced
2161 * character to the attribute value
2162 * - an entity reference is processed by recursively processing the
2163 * replacement text of the entity
2164 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2165 * appending #x20 to the normalized value, except that only a single
2166 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2167 * parsed entity or the literal entity value of an internal parsed entity
2168 * - other characters are processed by appending them to the normalized value
2169 * If the declared value is not CDATA, then the XML processor must further
2170 * process the normalized attribute value by discarding any leading and
2171 * trailing space (#x20) characters, and by replacing sequences of space
2172 * (#x20) characters by a single space (#x20) character.
2173 * All attributes for which no declaration has been read should be treated
2174 * by a non-validating parser as if declared CDATA.
2175 *
2176 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2177 */
2178
2179xmlChar *
2180xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2181 xmlChar limit = 0;
2182 xmlChar *buf = NULL;
2183 int len = 0;
2184 int buf_size = 0;
2185 int c, l;
2186 xmlChar *current = NULL;
2187 xmlEntityPtr ent;
2188
2189
2190 SHRINK;
2191 if (NXT(0) == '"') {
2192 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2193 limit = '"';
2194 NEXT;
2195 } else if (NXT(0) == '\'') {
2196 limit = '\'';
2197 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2198 NEXT;
2199 } else {
2200 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2202 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2203 ctxt->wellFormed = 0;
2204 ctxt->disableSAX = 1;
2205 return(NULL);
2206 }
2207
2208 /*
2209 * allocate a translation buffer.
2210 */
2211 buf_size = XML_PARSER_BUFFER_SIZE;
2212 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2213 if (buf == NULL) {
2214 perror("xmlParseAttValue: malloc failed");
2215 return(NULL);
2216 }
2217
2218 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002219 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002220 */
2221 c = CUR_CHAR(l);
2222 while (((NXT(0) != limit) && /* checked */
2223 (c != '<')) || (ctxt->token != 0)) {
2224 if (c == 0) break;
2225 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002226 if (ctxt->replaceEntities) {
2227 if (len > buf_size - 10) {
2228 growBuffer(buf);
2229 }
2230 buf[len++] = '&';
2231 } else {
2232 /*
2233 * The reparsing will be done in xmlStringGetNodeList()
2234 * called by the attribute() function in SAX.c
2235 */
2236 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002237
Daniel Veillard319a7422001-09-11 09:27:09 +00002238 if (len > buf_size - 10) {
2239 growBuffer(buf);
2240 }
2241 current = &buffer[0];
2242 while (*current != 0) { /* non input consuming */
2243 buf[len++] = *current++;
2244 }
2245 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002246 }
Owen Taylor3473f882001-02-23 17:55:21 +00002247 } else if (c == '&') {
2248 if (NXT(1) == '#') {
2249 int val = xmlParseCharRef(ctxt);
2250 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002251 if (ctxt->replaceEntities) {
2252 if (len > buf_size - 10) {
2253 growBuffer(buf);
2254 }
2255 buf[len++] = '&';
2256 } else {
2257 /*
2258 * The reparsing will be done in xmlStringGetNodeList()
2259 * called by the attribute() function in SAX.c
2260 */
2261 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002262
Daniel Veillard319a7422001-09-11 09:27:09 +00002263 if (len > buf_size - 10) {
2264 growBuffer(buf);
2265 }
2266 current = &buffer[0];
2267 while (*current != 0) { /* non input consuming */
2268 buf[len++] = *current++;
2269 }
Owen Taylor3473f882001-02-23 17:55:21 +00002270 }
2271 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002272 if (len > buf_size - 10) {
2273 growBuffer(buf);
2274 }
Owen Taylor3473f882001-02-23 17:55:21 +00002275 len += xmlCopyChar(0, &buf[len], val);
2276 }
2277 } else {
2278 ent = xmlParseEntityRef(ctxt);
2279 if ((ent != NULL) &&
2280 (ctxt->replaceEntities != 0)) {
2281 xmlChar *rep;
2282
2283 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2284 rep = xmlStringDecodeEntities(ctxt, ent->content,
2285 XML_SUBSTITUTE_REF, 0, 0, 0);
2286 if (rep != NULL) {
2287 current = rep;
2288 while (*current != 0) { /* non input consuming */
2289 buf[len++] = *current++;
2290 if (len > buf_size - 10) {
2291 growBuffer(buf);
2292 }
2293 }
2294 xmlFree(rep);
2295 }
2296 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002297 if (len > buf_size - 10) {
2298 growBuffer(buf);
2299 }
Owen Taylor3473f882001-02-23 17:55:21 +00002300 if (ent->content != NULL)
2301 buf[len++] = ent->content[0];
2302 }
2303 } else if (ent != NULL) {
2304 int i = xmlStrlen(ent->name);
2305 const xmlChar *cur = ent->name;
2306
2307 /*
2308 * This may look absurd but is needed to detect
2309 * entities problems
2310 */
2311 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2312 (ent->content != NULL)) {
2313 xmlChar *rep;
2314 rep = xmlStringDecodeEntities(ctxt, ent->content,
2315 XML_SUBSTITUTE_REF, 0, 0, 0);
2316 if (rep != NULL)
2317 xmlFree(rep);
2318 }
2319
2320 /*
2321 * Just output the reference
2322 */
2323 buf[len++] = '&';
2324 if (len > buf_size - i - 10) {
2325 growBuffer(buf);
2326 }
2327 for (;i > 0;i--)
2328 buf[len++] = *cur++;
2329 buf[len++] = ';';
2330 }
2331 }
2332 } else {
2333 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2334 COPY_BUF(l,buf,len,0x20);
2335 if (len > buf_size - 10) {
2336 growBuffer(buf);
2337 }
2338 } else {
2339 COPY_BUF(l,buf,len,c);
2340 if (len > buf_size - 10) {
2341 growBuffer(buf);
2342 }
2343 }
2344 NEXTL(l);
2345 }
2346 GROW;
2347 c = CUR_CHAR(l);
2348 }
2349 buf[len++] = 0;
2350 if (RAW == '<') {
2351 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2353 ctxt->sax->error(ctxt->userData,
2354 "Unescaped '<' not allowed in attributes values\n");
2355 ctxt->wellFormed = 0;
2356 ctxt->disableSAX = 1;
2357 } else if (RAW != limit) {
2358 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2360 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2361 ctxt->wellFormed = 0;
2362 ctxt->disableSAX = 1;
2363 } else
2364 NEXT;
2365 return(buf);
2366}
2367
2368/**
2369 * xmlParseSystemLiteral:
2370 * @ctxt: an XML parser context
2371 *
2372 * parse an XML Literal
2373 *
2374 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2375 *
2376 * Returns the SystemLiteral parsed or NULL
2377 */
2378
2379xmlChar *
2380xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2381 xmlChar *buf = NULL;
2382 int len = 0;
2383 int size = XML_PARSER_BUFFER_SIZE;
2384 int cur, l;
2385 xmlChar stop;
2386 int state = ctxt->instate;
2387 int count = 0;
2388
2389 SHRINK;
2390 if (RAW == '"') {
2391 NEXT;
2392 stop = '"';
2393 } else if (RAW == '\'') {
2394 NEXT;
2395 stop = '\'';
2396 } else {
2397 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2399 ctxt->sax->error(ctxt->userData,
2400 "SystemLiteral \" or ' expected\n");
2401 ctxt->wellFormed = 0;
2402 ctxt->disableSAX = 1;
2403 return(NULL);
2404 }
2405
2406 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2407 if (buf == NULL) {
2408 xmlGenericError(xmlGenericErrorContext,
2409 "malloc of %d byte failed\n", size);
2410 return(NULL);
2411 }
2412 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2413 cur = CUR_CHAR(l);
2414 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2415 if (len + 5 >= size) {
2416 size *= 2;
2417 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2418 if (buf == NULL) {
2419 xmlGenericError(xmlGenericErrorContext,
2420 "realloc of %d byte failed\n", size);
2421 ctxt->instate = (xmlParserInputState) state;
2422 return(NULL);
2423 }
2424 }
2425 count++;
2426 if (count > 50) {
2427 GROW;
2428 count = 0;
2429 }
2430 COPY_BUF(l,buf,len,cur);
2431 NEXTL(l);
2432 cur = CUR_CHAR(l);
2433 if (cur == 0) {
2434 GROW;
2435 SHRINK;
2436 cur = CUR_CHAR(l);
2437 }
2438 }
2439 buf[len] = 0;
2440 ctxt->instate = (xmlParserInputState) state;
2441 if (!IS_CHAR(cur)) {
2442 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2444 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2445 ctxt->wellFormed = 0;
2446 ctxt->disableSAX = 1;
2447 } else {
2448 NEXT;
2449 }
2450 return(buf);
2451}
2452
2453/**
2454 * xmlParsePubidLiteral:
2455 * @ctxt: an XML parser context
2456 *
2457 * parse an XML public literal
2458 *
2459 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2460 *
2461 * Returns the PubidLiteral parsed or NULL.
2462 */
2463
2464xmlChar *
2465xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2466 xmlChar *buf = NULL;
2467 int len = 0;
2468 int size = XML_PARSER_BUFFER_SIZE;
2469 xmlChar cur;
2470 xmlChar stop;
2471 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002472 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002473
2474 SHRINK;
2475 if (RAW == '"') {
2476 NEXT;
2477 stop = '"';
2478 } else if (RAW == '\'') {
2479 NEXT;
2480 stop = '\'';
2481 } else {
2482 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2484 ctxt->sax->error(ctxt->userData,
2485 "SystemLiteral \" or ' expected\n");
2486 ctxt->wellFormed = 0;
2487 ctxt->disableSAX = 1;
2488 return(NULL);
2489 }
2490 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2491 if (buf == NULL) {
2492 xmlGenericError(xmlGenericErrorContext,
2493 "malloc of %d byte failed\n", size);
2494 return(NULL);
2495 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002496 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002497 cur = CUR;
2498 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2499 if (len + 1 >= size) {
2500 size *= 2;
2501 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2502 if (buf == NULL) {
2503 xmlGenericError(xmlGenericErrorContext,
2504 "realloc of %d byte failed\n", size);
2505 return(NULL);
2506 }
2507 }
2508 buf[len++] = cur;
2509 count++;
2510 if (count > 50) {
2511 GROW;
2512 count = 0;
2513 }
2514 NEXT;
2515 cur = CUR;
2516 if (cur == 0) {
2517 GROW;
2518 SHRINK;
2519 cur = CUR;
2520 }
2521 }
2522 buf[len] = 0;
2523 if (cur != stop) {
2524 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2526 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2527 ctxt->wellFormed = 0;
2528 ctxt->disableSAX = 1;
2529 } else {
2530 NEXT;
2531 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002532 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002533 return(buf);
2534}
2535
Daniel Veillard48b2f892001-02-25 16:11:03 +00002536void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002537/**
2538 * xmlParseCharData:
2539 * @ctxt: an XML parser context
2540 * @cdata: int indicating whether we are within a CDATA section
2541 *
2542 * parse a CharData section.
2543 * if we are within a CDATA section ']]>' marks an end of section.
2544 *
2545 * The right angle bracket (>) may be represented using the string "&gt;",
2546 * and must, for compatibility, be escaped using "&gt;" or a character
2547 * reference when it appears in the string "]]>" in content, when that
2548 * string is not marking the end of a CDATA section.
2549 *
2550 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2551 */
2552
2553void
2554xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002555 const xmlChar *in;
2556 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002557 int line = ctxt->input->line;
2558 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002559
2560 SHRINK;
2561 GROW;
2562 /*
2563 * Accelerated common case where input don't need to be
2564 * modified before passing it to the handler.
2565 */
2566 if ((ctxt->token == 0) && (!cdata)) {
2567 in = ctxt->input->cur;
2568 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002569get_more:
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002570 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
Daniel Veillard48b2f892001-02-25 16:11:03 +00002571 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2572 in++;
2573 if (*in == 0xA) {
2574 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002575 in++;
2576 while (*in == 0xA) {
2577 ctxt->input->line++;
2578 in++;
2579 }
2580 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002581 }
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002582 if (*in == ']') {
2583 if ((in[1] == ']') && (in[2] == '>')) {
2584 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2586 ctxt->sax->error(ctxt->userData,
2587 "Sequence ']]>' not allowed in content\n");
2588 ctxt->input->cur = in;
2589 ctxt->wellFormed = 0;
2590 ctxt->disableSAX = 1;
2591 return;
2592 }
2593 in++;
2594 goto get_more;
2595 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002596 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002597 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002598 if (IS_BLANK(*ctxt->input->cur)) {
2599 const xmlChar *tmp = ctxt->input->cur;
2600 ctxt->input->cur = in;
2601 if (areBlanks(ctxt, tmp, nbchar)) {
2602 if (ctxt->sax->ignorableWhitespace != NULL)
2603 ctxt->sax->ignorableWhitespace(ctxt->userData,
2604 tmp, nbchar);
2605 } else {
2606 if (ctxt->sax->characters != NULL)
2607 ctxt->sax->characters(ctxt->userData,
2608 tmp, nbchar);
2609 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002610 line = ctxt->input->line;
2611 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002612 } else {
2613 if (ctxt->sax->characters != NULL)
2614 ctxt->sax->characters(ctxt->userData,
2615 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002616 line = ctxt->input->line;
2617 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002618 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002619 }
2620 ctxt->input->cur = in;
2621 if (*in == 0xD) {
2622 in++;
2623 if (*in == 0xA) {
2624 ctxt->input->cur = in;
2625 in++;
2626 ctxt->input->line++;
2627 continue; /* while */
2628 }
2629 in--;
2630 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002631 if (*in == '<') {
2632 return;
2633 }
2634 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002635 return;
2636 }
2637 SHRINK;
2638 GROW;
2639 in = ctxt->input->cur;
2640 } while ((*in >= 0x20) && (*in <= 0x7F));
2641 nbchar = 0;
2642 }
Daniel Veillard50582112001-03-26 22:52:16 +00002643 ctxt->input->line = line;
2644 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002645 xmlParseCharDataComplex(ctxt, cdata);
2646}
2647
2648void
2649xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002650 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2651 int nbchar = 0;
2652 int cur, l;
2653 int count = 0;
2654
2655 SHRINK;
2656 GROW;
2657 cur = CUR_CHAR(l);
2658 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2659 ((cur != '&') || (ctxt->token == '&')) &&
2660 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2661 if ((cur == ']') && (NXT(1) == ']') &&
2662 (NXT(2) == '>')) {
2663 if (cdata) break;
2664 else {
2665 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2667 ctxt->sax->error(ctxt->userData,
2668 "Sequence ']]>' not allowed in content\n");
2669 /* Should this be relaxed ??? I see a "must here */
2670 ctxt->wellFormed = 0;
2671 ctxt->disableSAX = 1;
2672 }
2673 }
2674 COPY_BUF(l,buf,nbchar,cur);
2675 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2676 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002677 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002678 */
2679 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2680 if (areBlanks(ctxt, buf, nbchar)) {
2681 if (ctxt->sax->ignorableWhitespace != NULL)
2682 ctxt->sax->ignorableWhitespace(ctxt->userData,
2683 buf, nbchar);
2684 } else {
2685 if (ctxt->sax->characters != NULL)
2686 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2687 }
2688 }
2689 nbchar = 0;
2690 }
2691 count++;
2692 if (count > 50) {
2693 GROW;
2694 count = 0;
2695 }
2696 NEXTL(l);
2697 cur = CUR_CHAR(l);
2698 }
2699 if (nbchar != 0) {
2700 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002701 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002702 */
2703 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2704 if (areBlanks(ctxt, buf, nbchar)) {
2705 if (ctxt->sax->ignorableWhitespace != NULL)
2706 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2707 } else {
2708 if (ctxt->sax->characters != NULL)
2709 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2710 }
2711 }
2712 }
2713}
2714
2715/**
2716 * xmlParseExternalID:
2717 * @ctxt: an XML parser context
2718 * @publicID: a xmlChar** receiving PubidLiteral
2719 * @strict: indicate whether we should restrict parsing to only
2720 * production [75], see NOTE below
2721 *
2722 * Parse an External ID or a Public ID
2723 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002724 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002725 * 'PUBLIC' S PubidLiteral S SystemLiteral
2726 *
2727 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2728 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2729 *
2730 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2731 *
2732 * Returns the function returns SystemLiteral and in the second
2733 * case publicID receives PubidLiteral, is strict is off
2734 * it is possible to return NULL and have publicID set.
2735 */
2736
2737xmlChar *
2738xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2739 xmlChar *URI = NULL;
2740
2741 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002742
2743 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002744 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2745 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2746 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2747 SKIP(6);
2748 if (!IS_BLANK(CUR)) {
2749 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2751 ctxt->sax->error(ctxt->userData,
2752 "Space required after 'SYSTEM'\n");
2753 ctxt->wellFormed = 0;
2754 ctxt->disableSAX = 1;
2755 }
2756 SKIP_BLANKS;
2757 URI = xmlParseSystemLiteral(ctxt);
2758 if (URI == NULL) {
2759 ctxt->errNo = XML_ERR_URI_REQUIRED;
2760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2761 ctxt->sax->error(ctxt->userData,
2762 "xmlParseExternalID: SYSTEM, no URI\n");
2763 ctxt->wellFormed = 0;
2764 ctxt->disableSAX = 1;
2765 }
2766 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2767 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2768 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2769 SKIP(6);
2770 if (!IS_BLANK(CUR)) {
2771 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2773 ctxt->sax->error(ctxt->userData,
2774 "Space required after 'PUBLIC'\n");
2775 ctxt->wellFormed = 0;
2776 ctxt->disableSAX = 1;
2777 }
2778 SKIP_BLANKS;
2779 *publicID = xmlParsePubidLiteral(ctxt);
2780 if (*publicID == NULL) {
2781 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2783 ctxt->sax->error(ctxt->userData,
2784 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2785 ctxt->wellFormed = 0;
2786 ctxt->disableSAX = 1;
2787 }
2788 if (strict) {
2789 /*
2790 * We don't handle [83] so "S SystemLiteral" is required.
2791 */
2792 if (!IS_BLANK(CUR)) {
2793 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2795 ctxt->sax->error(ctxt->userData,
2796 "Space required after the Public Identifier\n");
2797 ctxt->wellFormed = 0;
2798 ctxt->disableSAX = 1;
2799 }
2800 } else {
2801 /*
2802 * We handle [83] so we return immediately, if
2803 * "S SystemLiteral" is not detected. From a purely parsing
2804 * point of view that's a nice mess.
2805 */
2806 const xmlChar *ptr;
2807 GROW;
2808
2809 ptr = CUR_PTR;
2810 if (!IS_BLANK(*ptr)) return(NULL);
2811
2812 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2813 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2814 }
2815 SKIP_BLANKS;
2816 URI = xmlParseSystemLiteral(ctxt);
2817 if (URI == NULL) {
2818 ctxt->errNo = XML_ERR_URI_REQUIRED;
2819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2820 ctxt->sax->error(ctxt->userData,
2821 "xmlParseExternalID: PUBLIC, no URI\n");
2822 ctxt->wellFormed = 0;
2823 ctxt->disableSAX = 1;
2824 }
2825 }
2826 return(URI);
2827}
2828
2829/**
2830 * xmlParseComment:
2831 * @ctxt: an XML parser context
2832 *
2833 * Skip an XML (SGML) comment <!-- .... -->
2834 * The spec says that "For compatibility, the string "--" (double-hyphen)
2835 * must not occur within comments. "
2836 *
2837 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2838 */
2839void
2840xmlParseComment(xmlParserCtxtPtr ctxt) {
2841 xmlChar *buf = NULL;
2842 int len;
2843 int size = XML_PARSER_BUFFER_SIZE;
2844 int q, ql;
2845 int r, rl;
2846 int cur, l;
2847 xmlParserInputState state;
2848 xmlParserInputPtr input = ctxt->input;
2849 int count = 0;
2850
2851 /*
2852 * Check that there is a comment right here.
2853 */
2854 if ((RAW != '<') || (NXT(1) != '!') ||
2855 (NXT(2) != '-') || (NXT(3) != '-')) return;
2856
2857 state = ctxt->instate;
2858 ctxt->instate = XML_PARSER_COMMENT;
2859 SHRINK;
2860 SKIP(4);
2861 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2862 if (buf == NULL) {
2863 xmlGenericError(xmlGenericErrorContext,
2864 "malloc of %d byte failed\n", size);
2865 ctxt->instate = state;
2866 return;
2867 }
2868 q = CUR_CHAR(ql);
2869 NEXTL(ql);
2870 r = CUR_CHAR(rl);
2871 NEXTL(rl);
2872 cur = CUR_CHAR(l);
2873 len = 0;
2874 while (IS_CHAR(cur) && /* checked */
2875 ((cur != '>') ||
2876 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002877 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002878 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2880 ctxt->sax->error(ctxt->userData,
2881 "Comment must not contain '--' (double-hyphen)`\n");
2882 ctxt->wellFormed = 0;
2883 ctxt->disableSAX = 1;
2884 }
2885 if (len + 5 >= size) {
2886 size *= 2;
2887 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2888 if (buf == NULL) {
2889 xmlGenericError(xmlGenericErrorContext,
2890 "realloc of %d byte failed\n", size);
2891 ctxt->instate = state;
2892 return;
2893 }
2894 }
2895 COPY_BUF(ql,buf,len,q);
2896 q = r;
2897 ql = rl;
2898 r = cur;
2899 rl = l;
2900
2901 count++;
2902 if (count > 50) {
2903 GROW;
2904 count = 0;
2905 }
2906 NEXTL(l);
2907 cur = CUR_CHAR(l);
2908 if (cur == 0) {
2909 SHRINK;
2910 GROW;
2911 cur = CUR_CHAR(l);
2912 }
2913 }
2914 buf[len] = 0;
2915 if (!IS_CHAR(cur)) {
2916 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2917 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2918 ctxt->sax->error(ctxt->userData,
2919 "Comment not terminated \n<!--%.50s\n", buf);
2920 ctxt->wellFormed = 0;
2921 ctxt->disableSAX = 1;
2922 xmlFree(buf);
2923 } else {
2924 if (input != ctxt->input) {
2925 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2927 ctxt->sax->error(ctxt->userData,
2928"Comment doesn't start and stop in the same entity\n");
2929 ctxt->wellFormed = 0;
2930 ctxt->disableSAX = 1;
2931 }
2932 NEXT;
2933 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2934 (!ctxt->disableSAX))
2935 ctxt->sax->comment(ctxt->userData, buf);
2936 xmlFree(buf);
2937 }
2938 ctxt->instate = state;
2939}
2940
2941/**
2942 * xmlParsePITarget:
2943 * @ctxt: an XML parser context
2944 *
2945 * parse the name of a PI
2946 *
2947 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2948 *
2949 * Returns the PITarget name or NULL
2950 */
2951
2952xmlChar *
2953xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2954 xmlChar *name;
2955
2956 name = xmlParseName(ctxt);
2957 if ((name != NULL) &&
2958 ((name[0] == 'x') || (name[0] == 'X')) &&
2959 ((name[1] == 'm') || (name[1] == 'M')) &&
2960 ((name[2] == 'l') || (name[2] == 'L'))) {
2961 int i;
2962 if ((name[0] == 'x') && (name[1] == 'm') &&
2963 (name[2] == 'l') && (name[3] == 0)) {
2964 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2966 ctxt->sax->error(ctxt->userData,
2967 "XML declaration allowed only at the start of the document\n");
2968 ctxt->wellFormed = 0;
2969 ctxt->disableSAX = 1;
2970 return(name);
2971 } else if (name[3] == 0) {
2972 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2974 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2975 ctxt->wellFormed = 0;
2976 ctxt->disableSAX = 1;
2977 return(name);
2978 }
2979 for (i = 0;;i++) {
2980 if (xmlW3CPIs[i] == NULL) break;
2981 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2982 return(name);
2983 }
2984 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2985 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2986 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002987 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002988 }
2989 }
2990 return(name);
2991}
2992
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002993#ifdef LIBXML_CATALOG_ENABLED
2994/**
2995 * xmlParseCatalogPI:
2996 * @ctxt: an XML parser context
2997 * @catalog: the PI value string
2998 *
2999 * parse an XML Catalog Processing Instruction.
3000 *
3001 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3002 *
3003 * Occurs only if allowed by the user and if happening in the Misc
3004 * part of the document before any doctype informations
3005 * This will add the given catalog to the parsing context in order
3006 * to be used if there is a resolution need further down in the document
3007 */
3008
3009static void
3010xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3011 xmlChar *URL = NULL;
3012 const xmlChar *tmp, *base;
3013 xmlChar marker;
3014
3015 tmp = catalog;
3016 while (IS_BLANK(*tmp)) tmp++;
3017 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3018 goto error;
3019 tmp += 7;
3020 while (IS_BLANK(*tmp)) tmp++;
3021 if (*tmp != '=') {
3022 return;
3023 }
3024 tmp++;
3025 while (IS_BLANK(*tmp)) tmp++;
3026 marker = *tmp;
3027 if ((marker != '\'') && (marker != '"'))
3028 goto error;
3029 tmp++;
3030 base = tmp;
3031 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3032 if (*tmp == 0)
3033 goto error;
3034 URL = xmlStrndup(base, tmp - base);
3035 tmp++;
3036 while (IS_BLANK(*tmp)) tmp++;
3037 if (*tmp != 0)
3038 goto error;
3039
3040 if (URL != NULL) {
3041 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3042 xmlFree(URL);
3043 }
3044 return;
3045
3046error:
3047 ctxt->errNo = XML_WAR_CATALOG_PI;
3048 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3049 ctxt->sax->warning(ctxt->userData,
3050 "Catalog PI syntax error: %s\n", catalog);
3051 if (URL != NULL)
3052 xmlFree(URL);
3053}
3054#endif
3055
Owen Taylor3473f882001-02-23 17:55:21 +00003056/**
3057 * xmlParsePI:
3058 * @ctxt: an XML parser context
3059 *
3060 * parse an XML Processing Instruction.
3061 *
3062 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3063 *
3064 * The processing is transfered to SAX once parsed.
3065 */
3066
3067void
3068xmlParsePI(xmlParserCtxtPtr ctxt) {
3069 xmlChar *buf = NULL;
3070 int len = 0;
3071 int size = XML_PARSER_BUFFER_SIZE;
3072 int cur, l;
3073 xmlChar *target;
3074 xmlParserInputState state;
3075 int count = 0;
3076
3077 if ((RAW == '<') && (NXT(1) == '?')) {
3078 xmlParserInputPtr input = ctxt->input;
3079 state = ctxt->instate;
3080 ctxt->instate = XML_PARSER_PI;
3081 /*
3082 * this is a Processing Instruction.
3083 */
3084 SKIP(2);
3085 SHRINK;
3086
3087 /*
3088 * Parse the target name and check for special support like
3089 * namespace.
3090 */
3091 target = xmlParsePITarget(ctxt);
3092 if (target != NULL) {
3093 if ((RAW == '?') && (NXT(1) == '>')) {
3094 if (input != ctxt->input) {
3095 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3097 ctxt->sax->error(ctxt->userData,
3098 "PI declaration doesn't start and stop in the same entity\n");
3099 ctxt->wellFormed = 0;
3100 ctxt->disableSAX = 1;
3101 }
3102 SKIP(2);
3103
3104 /*
3105 * SAX: PI detected.
3106 */
3107 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3108 (ctxt->sax->processingInstruction != NULL))
3109 ctxt->sax->processingInstruction(ctxt->userData,
3110 target, NULL);
3111 ctxt->instate = state;
3112 xmlFree(target);
3113 return;
3114 }
3115 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3116 if (buf == NULL) {
3117 xmlGenericError(xmlGenericErrorContext,
3118 "malloc of %d byte failed\n", size);
3119 ctxt->instate = state;
3120 return;
3121 }
3122 cur = CUR;
3123 if (!IS_BLANK(cur)) {
3124 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3126 ctxt->sax->error(ctxt->userData,
3127 "xmlParsePI: PI %s space expected\n", target);
3128 ctxt->wellFormed = 0;
3129 ctxt->disableSAX = 1;
3130 }
3131 SKIP_BLANKS;
3132 cur = CUR_CHAR(l);
3133 while (IS_CHAR(cur) && /* checked */
3134 ((cur != '?') || (NXT(1) != '>'))) {
3135 if (len + 5 >= size) {
3136 size *= 2;
3137 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3138 if (buf == NULL) {
3139 xmlGenericError(xmlGenericErrorContext,
3140 "realloc of %d byte failed\n", size);
3141 ctxt->instate = state;
3142 return;
3143 }
3144 }
3145 count++;
3146 if (count > 50) {
3147 GROW;
3148 count = 0;
3149 }
3150 COPY_BUF(l,buf,len,cur);
3151 NEXTL(l);
3152 cur = CUR_CHAR(l);
3153 if (cur == 0) {
3154 SHRINK;
3155 GROW;
3156 cur = CUR_CHAR(l);
3157 }
3158 }
3159 buf[len] = 0;
3160 if (cur != '?') {
3161 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3163 ctxt->sax->error(ctxt->userData,
3164 "xmlParsePI: PI %s never end ...\n", target);
3165 ctxt->wellFormed = 0;
3166 ctxt->disableSAX = 1;
3167 } else {
3168 if (input != ctxt->input) {
3169 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3171 ctxt->sax->error(ctxt->userData,
3172 "PI declaration doesn't start and stop in the same entity\n");
3173 ctxt->wellFormed = 0;
3174 ctxt->disableSAX = 1;
3175 }
3176 SKIP(2);
3177
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003178#ifdef LIBXML_CATALOG_ENABLED
3179 if (((state == XML_PARSER_MISC) ||
3180 (state == XML_PARSER_START)) &&
3181 (xmlStrEqual(target, XML_CATALOG_PI))) {
3182 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3183 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3184 (allow == XML_CATA_ALLOW_ALL))
3185 xmlParseCatalogPI(ctxt, buf);
3186 }
3187#endif
3188
3189
Owen Taylor3473f882001-02-23 17:55:21 +00003190 /*
3191 * SAX: PI detected.
3192 */
3193 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3194 (ctxt->sax->processingInstruction != NULL))
3195 ctxt->sax->processingInstruction(ctxt->userData,
3196 target, buf);
3197 }
3198 xmlFree(buf);
3199 xmlFree(target);
3200 } else {
3201 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3203 ctxt->sax->error(ctxt->userData,
3204 "xmlParsePI : no target name\n");
3205 ctxt->wellFormed = 0;
3206 ctxt->disableSAX = 1;
3207 }
3208 ctxt->instate = state;
3209 }
3210}
3211
3212/**
3213 * xmlParseNotationDecl:
3214 * @ctxt: an XML parser context
3215 *
3216 * parse a notation declaration
3217 *
3218 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3219 *
3220 * Hence there is actually 3 choices:
3221 * 'PUBLIC' S PubidLiteral
3222 * 'PUBLIC' S PubidLiteral S SystemLiteral
3223 * and 'SYSTEM' S SystemLiteral
3224 *
3225 * See the NOTE on xmlParseExternalID().
3226 */
3227
3228void
3229xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3230 xmlChar *name;
3231 xmlChar *Pubid;
3232 xmlChar *Systemid;
3233
3234 if ((RAW == '<') && (NXT(1) == '!') &&
3235 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3236 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3237 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3238 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3239 xmlParserInputPtr input = ctxt->input;
3240 SHRINK;
3241 SKIP(10);
3242 if (!IS_BLANK(CUR)) {
3243 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3245 ctxt->sax->error(ctxt->userData,
3246 "Space required after '<!NOTATION'\n");
3247 ctxt->wellFormed = 0;
3248 ctxt->disableSAX = 1;
3249 return;
3250 }
3251 SKIP_BLANKS;
3252
Daniel Veillard76d66f42001-05-16 21:05:17 +00003253 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003254 if (name == NULL) {
3255 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3257 ctxt->sax->error(ctxt->userData,
3258 "NOTATION: Name expected here\n");
3259 ctxt->wellFormed = 0;
3260 ctxt->disableSAX = 1;
3261 return;
3262 }
3263 if (!IS_BLANK(CUR)) {
3264 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3266 ctxt->sax->error(ctxt->userData,
3267 "Space required after the NOTATION name'\n");
3268 ctxt->wellFormed = 0;
3269 ctxt->disableSAX = 1;
3270 return;
3271 }
3272 SKIP_BLANKS;
3273
3274 /*
3275 * Parse the IDs.
3276 */
3277 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3278 SKIP_BLANKS;
3279
3280 if (RAW == '>') {
3281 if (input != ctxt->input) {
3282 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3284 ctxt->sax->error(ctxt->userData,
3285"Notation declaration doesn't start and stop in the same entity\n");
3286 ctxt->wellFormed = 0;
3287 ctxt->disableSAX = 1;
3288 }
3289 NEXT;
3290 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3291 (ctxt->sax->notationDecl != NULL))
3292 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3293 } else {
3294 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3295 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3296 ctxt->sax->error(ctxt->userData,
3297 "'>' required to close NOTATION declaration\n");
3298 ctxt->wellFormed = 0;
3299 ctxt->disableSAX = 1;
3300 }
3301 xmlFree(name);
3302 if (Systemid != NULL) xmlFree(Systemid);
3303 if (Pubid != NULL) xmlFree(Pubid);
3304 }
3305}
3306
3307/**
3308 * xmlParseEntityDecl:
3309 * @ctxt: an XML parser context
3310 *
3311 * parse <!ENTITY declarations
3312 *
3313 * [70] EntityDecl ::= GEDecl | PEDecl
3314 *
3315 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3316 *
3317 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3318 *
3319 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3320 *
3321 * [74] PEDef ::= EntityValue | ExternalID
3322 *
3323 * [76] NDataDecl ::= S 'NDATA' S Name
3324 *
3325 * [ VC: Notation Declared ]
3326 * The Name must match the declared name of a notation.
3327 */
3328
3329void
3330xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3331 xmlChar *name = NULL;
3332 xmlChar *value = NULL;
3333 xmlChar *URI = NULL, *literal = NULL;
3334 xmlChar *ndata = NULL;
3335 int isParameter = 0;
3336 xmlChar *orig = NULL;
3337
3338 GROW;
3339 if ((RAW == '<') && (NXT(1) == '!') &&
3340 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3341 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3342 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3343 xmlParserInputPtr input = ctxt->input;
3344 ctxt->instate = XML_PARSER_ENTITY_DECL;
3345 SHRINK;
3346 SKIP(8);
3347 if (!IS_BLANK(CUR)) {
3348 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3350 ctxt->sax->error(ctxt->userData,
3351 "Space required after '<!ENTITY'\n");
3352 ctxt->wellFormed = 0;
3353 ctxt->disableSAX = 1;
3354 }
3355 SKIP_BLANKS;
3356
3357 if (RAW == '%') {
3358 NEXT;
3359 if (!IS_BLANK(CUR)) {
3360 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3362 ctxt->sax->error(ctxt->userData,
3363 "Space required after '%'\n");
3364 ctxt->wellFormed = 0;
3365 ctxt->disableSAX = 1;
3366 }
3367 SKIP_BLANKS;
3368 isParameter = 1;
3369 }
3370
Daniel Veillard76d66f42001-05-16 21:05:17 +00003371 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003372 if (name == NULL) {
3373 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3375 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3376 ctxt->wellFormed = 0;
3377 ctxt->disableSAX = 1;
3378 return;
3379 }
3380 if (!IS_BLANK(CUR)) {
3381 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3383 ctxt->sax->error(ctxt->userData,
3384 "Space required after the entity name\n");
3385 ctxt->wellFormed = 0;
3386 ctxt->disableSAX = 1;
3387 }
3388 SKIP_BLANKS;
3389
3390 /*
3391 * handle the various case of definitions...
3392 */
3393 if (isParameter) {
3394 if ((RAW == '"') || (RAW == '\'')) {
3395 value = xmlParseEntityValue(ctxt, &orig);
3396 if (value) {
3397 if ((ctxt->sax != NULL) &&
3398 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3399 ctxt->sax->entityDecl(ctxt->userData, name,
3400 XML_INTERNAL_PARAMETER_ENTITY,
3401 NULL, NULL, value);
3402 }
3403 } else {
3404 URI = xmlParseExternalID(ctxt, &literal, 1);
3405 if ((URI == NULL) && (literal == NULL)) {
3406 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3408 ctxt->sax->error(ctxt->userData,
3409 "Entity value required\n");
3410 ctxt->wellFormed = 0;
3411 ctxt->disableSAX = 1;
3412 }
3413 if (URI) {
3414 xmlURIPtr uri;
3415
3416 uri = xmlParseURI((const char *) URI);
3417 if (uri == NULL) {
3418 ctxt->errNo = XML_ERR_INVALID_URI;
3419 if ((ctxt->sax != NULL) &&
3420 (!ctxt->disableSAX) &&
3421 (ctxt->sax->error != NULL))
3422 ctxt->sax->error(ctxt->userData,
3423 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003424 /*
3425 * This really ought to be a well formedness error
3426 * but the XML Core WG decided otherwise c.f. issue
3427 * E26 of the XML erratas.
3428 */
Owen Taylor3473f882001-02-23 17:55:21 +00003429 } else {
3430 if (uri->fragment != NULL) {
3431 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3432 if ((ctxt->sax != NULL) &&
3433 (!ctxt->disableSAX) &&
3434 (ctxt->sax->error != NULL))
3435 ctxt->sax->error(ctxt->userData,
3436 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003437 /*
3438 * Okay this is foolish to block those but not
3439 * invalid URIs.
3440 */
Owen Taylor3473f882001-02-23 17:55:21 +00003441 ctxt->wellFormed = 0;
3442 } else {
3443 if ((ctxt->sax != NULL) &&
3444 (!ctxt->disableSAX) &&
3445 (ctxt->sax->entityDecl != NULL))
3446 ctxt->sax->entityDecl(ctxt->userData, name,
3447 XML_EXTERNAL_PARAMETER_ENTITY,
3448 literal, URI, NULL);
3449 }
3450 xmlFreeURI(uri);
3451 }
3452 }
3453 }
3454 } else {
3455 if ((RAW == '"') || (RAW == '\'')) {
3456 value = xmlParseEntityValue(ctxt, &orig);
3457 if ((ctxt->sax != NULL) &&
3458 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3459 ctxt->sax->entityDecl(ctxt->userData, name,
3460 XML_INTERNAL_GENERAL_ENTITY,
3461 NULL, NULL, value);
3462 } else {
3463 URI = xmlParseExternalID(ctxt, &literal, 1);
3464 if ((URI == NULL) && (literal == NULL)) {
3465 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3467 ctxt->sax->error(ctxt->userData,
3468 "Entity value required\n");
3469 ctxt->wellFormed = 0;
3470 ctxt->disableSAX = 1;
3471 }
3472 if (URI) {
3473 xmlURIPtr uri;
3474
3475 uri = xmlParseURI((const char *)URI);
3476 if (uri == NULL) {
3477 ctxt->errNo = XML_ERR_INVALID_URI;
3478 if ((ctxt->sax != NULL) &&
3479 (!ctxt->disableSAX) &&
3480 (ctxt->sax->error != NULL))
3481 ctxt->sax->error(ctxt->userData,
3482 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003483 /*
3484 * This really ought to be a well formedness error
3485 * but the XML Core WG decided otherwise c.f. issue
3486 * E26 of the XML erratas.
3487 */
Owen Taylor3473f882001-02-23 17:55:21 +00003488 } else {
3489 if (uri->fragment != NULL) {
3490 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3491 if ((ctxt->sax != NULL) &&
3492 (!ctxt->disableSAX) &&
3493 (ctxt->sax->error != NULL))
3494 ctxt->sax->error(ctxt->userData,
3495 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003496 /*
3497 * Okay this is foolish to block those but not
3498 * invalid URIs.
3499 */
Owen Taylor3473f882001-02-23 17:55:21 +00003500 ctxt->wellFormed = 0;
3501 }
3502 xmlFreeURI(uri);
3503 }
3504 }
3505 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3506 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3508 ctxt->sax->error(ctxt->userData,
3509 "Space required before 'NDATA'\n");
3510 ctxt->wellFormed = 0;
3511 ctxt->disableSAX = 1;
3512 }
3513 SKIP_BLANKS;
3514 if ((RAW == 'N') && (NXT(1) == 'D') &&
3515 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3516 (NXT(4) == 'A')) {
3517 SKIP(5);
3518 if (!IS_BLANK(CUR)) {
3519 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3521 ctxt->sax->error(ctxt->userData,
3522 "Space required after 'NDATA'\n");
3523 ctxt->wellFormed = 0;
3524 ctxt->disableSAX = 1;
3525 }
3526 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003527 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003528 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3529 (ctxt->sax->unparsedEntityDecl != NULL))
3530 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3531 literal, URI, ndata);
3532 } else {
3533 if ((ctxt->sax != NULL) &&
3534 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3535 ctxt->sax->entityDecl(ctxt->userData, name,
3536 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3537 literal, URI, NULL);
3538 }
3539 }
3540 }
3541 SKIP_BLANKS;
3542 if (RAW != '>') {
3543 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3545 ctxt->sax->error(ctxt->userData,
3546 "xmlParseEntityDecl: entity %s not terminated\n", name);
3547 ctxt->wellFormed = 0;
3548 ctxt->disableSAX = 1;
3549 } else {
3550 if (input != ctxt->input) {
3551 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3553 ctxt->sax->error(ctxt->userData,
3554"Entity declaration doesn't start and stop in the same entity\n");
3555 ctxt->wellFormed = 0;
3556 ctxt->disableSAX = 1;
3557 }
3558 NEXT;
3559 }
3560 if (orig != NULL) {
3561 /*
3562 * Ugly mechanism to save the raw entity value.
3563 */
3564 xmlEntityPtr cur = NULL;
3565
3566 if (isParameter) {
3567 if ((ctxt->sax != NULL) &&
3568 (ctxt->sax->getParameterEntity != NULL))
3569 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3570 } else {
3571 if ((ctxt->sax != NULL) &&
3572 (ctxt->sax->getEntity != NULL))
3573 cur = ctxt->sax->getEntity(ctxt->userData, name);
3574 }
3575 if (cur != NULL) {
3576 if (cur->orig != NULL)
3577 xmlFree(orig);
3578 else
3579 cur->orig = orig;
3580 } else
3581 xmlFree(orig);
3582 }
3583 if (name != NULL) xmlFree(name);
3584 if (value != NULL) xmlFree(value);
3585 if (URI != NULL) xmlFree(URI);
3586 if (literal != NULL) xmlFree(literal);
3587 if (ndata != NULL) xmlFree(ndata);
3588 }
3589}
3590
3591/**
3592 * xmlParseDefaultDecl:
3593 * @ctxt: an XML parser context
3594 * @value: Receive a possible fixed default value for the attribute
3595 *
3596 * Parse an attribute default declaration
3597 *
3598 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3599 *
3600 * [ VC: Required Attribute ]
3601 * if the default declaration is the keyword #REQUIRED, then the
3602 * attribute must be specified for all elements of the type in the
3603 * attribute-list declaration.
3604 *
3605 * [ VC: Attribute Default Legal ]
3606 * The declared default value must meet the lexical constraints of
3607 * the declared attribute type c.f. xmlValidateAttributeDecl()
3608 *
3609 * [ VC: Fixed Attribute Default ]
3610 * if an attribute has a default value declared with the #FIXED
3611 * keyword, instances of that attribute must match the default value.
3612 *
3613 * [ WFC: No < in Attribute Values ]
3614 * handled in xmlParseAttValue()
3615 *
3616 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3617 * or XML_ATTRIBUTE_FIXED.
3618 */
3619
3620int
3621xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3622 int val;
3623 xmlChar *ret;
3624
3625 *value = NULL;
3626 if ((RAW == '#') && (NXT(1) == 'R') &&
3627 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3628 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3629 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3630 (NXT(8) == 'D')) {
3631 SKIP(9);
3632 return(XML_ATTRIBUTE_REQUIRED);
3633 }
3634 if ((RAW == '#') && (NXT(1) == 'I') &&
3635 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3636 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3637 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3638 SKIP(8);
3639 return(XML_ATTRIBUTE_IMPLIED);
3640 }
3641 val = XML_ATTRIBUTE_NONE;
3642 if ((RAW == '#') && (NXT(1) == 'F') &&
3643 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3644 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3645 SKIP(6);
3646 val = XML_ATTRIBUTE_FIXED;
3647 if (!IS_BLANK(CUR)) {
3648 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3650 ctxt->sax->error(ctxt->userData,
3651 "Space required after '#FIXED'\n");
3652 ctxt->wellFormed = 0;
3653 ctxt->disableSAX = 1;
3654 }
3655 SKIP_BLANKS;
3656 }
3657 ret = xmlParseAttValue(ctxt);
3658 ctxt->instate = XML_PARSER_DTD;
3659 if (ret == NULL) {
3660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3661 ctxt->sax->error(ctxt->userData,
3662 "Attribute default value declaration error\n");
3663 ctxt->wellFormed = 0;
3664 ctxt->disableSAX = 1;
3665 } else
3666 *value = ret;
3667 return(val);
3668}
3669
3670/**
3671 * xmlParseNotationType:
3672 * @ctxt: an XML parser context
3673 *
3674 * parse an Notation attribute type.
3675 *
3676 * Note: the leading 'NOTATION' S part has already being parsed...
3677 *
3678 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3679 *
3680 * [ VC: Notation Attributes ]
3681 * Values of this type must match one of the notation names included
3682 * in the declaration; all notation names in the declaration must be declared.
3683 *
3684 * Returns: the notation attribute tree built while parsing
3685 */
3686
3687xmlEnumerationPtr
3688xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3689 xmlChar *name;
3690 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3691
3692 if (RAW != '(') {
3693 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3695 ctxt->sax->error(ctxt->userData,
3696 "'(' required to start 'NOTATION'\n");
3697 ctxt->wellFormed = 0;
3698 ctxt->disableSAX = 1;
3699 return(NULL);
3700 }
3701 SHRINK;
3702 do {
3703 NEXT;
3704 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003705 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003706 if (name == NULL) {
3707 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3709 ctxt->sax->error(ctxt->userData,
3710 "Name expected in NOTATION declaration\n");
3711 ctxt->wellFormed = 0;
3712 ctxt->disableSAX = 1;
3713 return(ret);
3714 }
3715 cur = xmlCreateEnumeration(name);
3716 xmlFree(name);
3717 if (cur == NULL) return(ret);
3718 if (last == NULL) ret = last = cur;
3719 else {
3720 last->next = cur;
3721 last = cur;
3722 }
3723 SKIP_BLANKS;
3724 } while (RAW == '|');
3725 if (RAW != ')') {
3726 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3728 ctxt->sax->error(ctxt->userData,
3729 "')' required to finish NOTATION declaration\n");
3730 ctxt->wellFormed = 0;
3731 ctxt->disableSAX = 1;
3732 if ((last != NULL) && (last != ret))
3733 xmlFreeEnumeration(last);
3734 return(ret);
3735 }
3736 NEXT;
3737 return(ret);
3738}
3739
3740/**
3741 * xmlParseEnumerationType:
3742 * @ctxt: an XML parser context
3743 *
3744 * parse an Enumeration attribute type.
3745 *
3746 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3747 *
3748 * [ VC: Enumeration ]
3749 * Values of this type must match one of the Nmtoken tokens in
3750 * the declaration
3751 *
3752 * Returns: the enumeration attribute tree built while parsing
3753 */
3754
3755xmlEnumerationPtr
3756xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3757 xmlChar *name;
3758 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3759
3760 if (RAW != '(') {
3761 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3763 ctxt->sax->error(ctxt->userData,
3764 "'(' required to start ATTLIST enumeration\n");
3765 ctxt->wellFormed = 0;
3766 ctxt->disableSAX = 1;
3767 return(NULL);
3768 }
3769 SHRINK;
3770 do {
3771 NEXT;
3772 SKIP_BLANKS;
3773 name = xmlParseNmtoken(ctxt);
3774 if (name == NULL) {
3775 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3777 ctxt->sax->error(ctxt->userData,
3778 "NmToken expected in ATTLIST enumeration\n");
3779 ctxt->wellFormed = 0;
3780 ctxt->disableSAX = 1;
3781 return(ret);
3782 }
3783 cur = xmlCreateEnumeration(name);
3784 xmlFree(name);
3785 if (cur == NULL) return(ret);
3786 if (last == NULL) ret = last = cur;
3787 else {
3788 last->next = cur;
3789 last = cur;
3790 }
3791 SKIP_BLANKS;
3792 } while (RAW == '|');
3793 if (RAW != ')') {
3794 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3796 ctxt->sax->error(ctxt->userData,
3797 "')' required to finish ATTLIST enumeration\n");
3798 ctxt->wellFormed = 0;
3799 ctxt->disableSAX = 1;
3800 return(ret);
3801 }
3802 NEXT;
3803 return(ret);
3804}
3805
3806/**
3807 * xmlParseEnumeratedType:
3808 * @ctxt: an XML parser context
3809 * @tree: the enumeration tree built while parsing
3810 *
3811 * parse an Enumerated attribute type.
3812 *
3813 * [57] EnumeratedType ::= NotationType | Enumeration
3814 *
3815 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3816 *
3817 *
3818 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3819 */
3820
3821int
3822xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3823 if ((RAW == 'N') && (NXT(1) == 'O') &&
3824 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3825 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3826 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3827 SKIP(8);
3828 if (!IS_BLANK(CUR)) {
3829 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3831 ctxt->sax->error(ctxt->userData,
3832 "Space required after 'NOTATION'\n");
3833 ctxt->wellFormed = 0;
3834 ctxt->disableSAX = 1;
3835 return(0);
3836 }
3837 SKIP_BLANKS;
3838 *tree = xmlParseNotationType(ctxt);
3839 if (*tree == NULL) return(0);
3840 return(XML_ATTRIBUTE_NOTATION);
3841 }
3842 *tree = xmlParseEnumerationType(ctxt);
3843 if (*tree == NULL) return(0);
3844 return(XML_ATTRIBUTE_ENUMERATION);
3845}
3846
3847/**
3848 * xmlParseAttributeType:
3849 * @ctxt: an XML parser context
3850 * @tree: the enumeration tree built while parsing
3851 *
3852 * parse the Attribute list def for an element
3853 *
3854 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3855 *
3856 * [55] StringType ::= 'CDATA'
3857 *
3858 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3859 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3860 *
3861 * Validity constraints for attribute values syntax are checked in
3862 * xmlValidateAttributeValue()
3863 *
3864 * [ VC: ID ]
3865 * Values of type ID must match the Name production. A name must not
3866 * appear more than once in an XML document as a value of this type;
3867 * i.e., ID values must uniquely identify the elements which bear them.
3868 *
3869 * [ VC: One ID per Element Type ]
3870 * No element type may have more than one ID attribute specified.
3871 *
3872 * [ VC: ID Attribute Default ]
3873 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3874 *
3875 * [ VC: IDREF ]
3876 * Values of type IDREF must match the Name production, and values
3877 * of type IDREFS must match Names; each IDREF Name must match the value
3878 * of an ID attribute on some element in the XML document; i.e. IDREF
3879 * values must match the value of some ID attribute.
3880 *
3881 * [ VC: Entity Name ]
3882 * Values of type ENTITY must match the Name production, values
3883 * of type ENTITIES must match Names; each Entity Name must match the
3884 * name of an unparsed entity declared in the DTD.
3885 *
3886 * [ VC: Name Token ]
3887 * Values of type NMTOKEN must match the Nmtoken production; values
3888 * of type NMTOKENS must match Nmtokens.
3889 *
3890 * Returns the attribute type
3891 */
3892int
3893xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3894 SHRINK;
3895 if ((RAW == 'C') && (NXT(1) == 'D') &&
3896 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3897 (NXT(4) == 'A')) {
3898 SKIP(5);
3899 return(XML_ATTRIBUTE_CDATA);
3900 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3901 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3902 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3903 SKIP(6);
3904 return(XML_ATTRIBUTE_IDREFS);
3905 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3906 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3907 (NXT(4) == 'F')) {
3908 SKIP(5);
3909 return(XML_ATTRIBUTE_IDREF);
3910 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3911 SKIP(2);
3912 return(XML_ATTRIBUTE_ID);
3913 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3914 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3915 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3916 SKIP(6);
3917 return(XML_ATTRIBUTE_ENTITY);
3918 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3919 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3920 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3921 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3922 SKIP(8);
3923 return(XML_ATTRIBUTE_ENTITIES);
3924 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3925 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3926 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3927 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3928 SKIP(8);
3929 return(XML_ATTRIBUTE_NMTOKENS);
3930 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3931 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3932 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3933 (NXT(6) == 'N')) {
3934 SKIP(7);
3935 return(XML_ATTRIBUTE_NMTOKEN);
3936 }
3937 return(xmlParseEnumeratedType(ctxt, tree));
3938}
3939
3940/**
3941 * xmlParseAttributeListDecl:
3942 * @ctxt: an XML parser context
3943 *
3944 * : parse the Attribute list def for an element
3945 *
3946 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3947 *
3948 * [53] AttDef ::= S Name S AttType S DefaultDecl
3949 *
3950 */
3951void
3952xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3953 xmlChar *elemName;
3954 xmlChar *attrName;
3955 xmlEnumerationPtr tree;
3956
3957 if ((RAW == '<') && (NXT(1) == '!') &&
3958 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3959 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3960 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3961 (NXT(8) == 'T')) {
3962 xmlParserInputPtr input = ctxt->input;
3963
3964 SKIP(9);
3965 if (!IS_BLANK(CUR)) {
3966 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3967 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3968 ctxt->sax->error(ctxt->userData,
3969 "Space required after '<!ATTLIST'\n");
3970 ctxt->wellFormed = 0;
3971 ctxt->disableSAX = 1;
3972 }
3973 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003974 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003975 if (elemName == NULL) {
3976 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3978 ctxt->sax->error(ctxt->userData,
3979 "ATTLIST: no name for Element\n");
3980 ctxt->wellFormed = 0;
3981 ctxt->disableSAX = 1;
3982 return;
3983 }
3984 SKIP_BLANKS;
3985 GROW;
3986 while (RAW != '>') {
3987 const xmlChar *check = CUR_PTR;
3988 int type;
3989 int def;
3990 xmlChar *defaultValue = NULL;
3991
3992 GROW;
3993 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003994 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003995 if (attrName == NULL) {
3996 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3998 ctxt->sax->error(ctxt->userData,
3999 "ATTLIST: no name for Attribute\n");
4000 ctxt->wellFormed = 0;
4001 ctxt->disableSAX = 1;
4002 break;
4003 }
4004 GROW;
4005 if (!IS_BLANK(CUR)) {
4006 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4008 ctxt->sax->error(ctxt->userData,
4009 "Space required after the attribute name\n");
4010 ctxt->wellFormed = 0;
4011 ctxt->disableSAX = 1;
4012 if (attrName != NULL)
4013 xmlFree(attrName);
4014 if (defaultValue != NULL)
4015 xmlFree(defaultValue);
4016 break;
4017 }
4018 SKIP_BLANKS;
4019
4020 type = xmlParseAttributeType(ctxt, &tree);
4021 if (type <= 0) {
4022 if (attrName != NULL)
4023 xmlFree(attrName);
4024 if (defaultValue != NULL)
4025 xmlFree(defaultValue);
4026 break;
4027 }
4028
4029 GROW;
4030 if (!IS_BLANK(CUR)) {
4031 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4033 ctxt->sax->error(ctxt->userData,
4034 "Space required after the attribute type\n");
4035 ctxt->wellFormed = 0;
4036 ctxt->disableSAX = 1;
4037 if (attrName != NULL)
4038 xmlFree(attrName);
4039 if (defaultValue != NULL)
4040 xmlFree(defaultValue);
4041 if (tree != NULL)
4042 xmlFreeEnumeration(tree);
4043 break;
4044 }
4045 SKIP_BLANKS;
4046
4047 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4048 if (def <= 0) {
4049 if (attrName != NULL)
4050 xmlFree(attrName);
4051 if (defaultValue != NULL)
4052 xmlFree(defaultValue);
4053 if (tree != NULL)
4054 xmlFreeEnumeration(tree);
4055 break;
4056 }
4057
4058 GROW;
4059 if (RAW != '>') {
4060 if (!IS_BLANK(CUR)) {
4061 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4063 ctxt->sax->error(ctxt->userData,
4064 "Space required after the attribute default value\n");
4065 ctxt->wellFormed = 0;
4066 ctxt->disableSAX = 1;
4067 if (attrName != NULL)
4068 xmlFree(attrName);
4069 if (defaultValue != NULL)
4070 xmlFree(defaultValue);
4071 if (tree != NULL)
4072 xmlFreeEnumeration(tree);
4073 break;
4074 }
4075 SKIP_BLANKS;
4076 }
4077 if (check == CUR_PTR) {
4078 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4080 ctxt->sax->error(ctxt->userData,
4081 "xmlParseAttributeListDecl: detected internal error\n");
4082 if (attrName != NULL)
4083 xmlFree(attrName);
4084 if (defaultValue != NULL)
4085 xmlFree(defaultValue);
4086 if (tree != NULL)
4087 xmlFreeEnumeration(tree);
4088 break;
4089 }
4090 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4091 (ctxt->sax->attributeDecl != NULL))
4092 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4093 type, def, defaultValue, tree);
4094 if (attrName != NULL)
4095 xmlFree(attrName);
4096 if (defaultValue != NULL)
4097 xmlFree(defaultValue);
4098 GROW;
4099 }
4100 if (RAW == '>') {
4101 if (input != ctxt->input) {
4102 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4104 ctxt->sax->error(ctxt->userData,
4105"Attribute list declaration doesn't start and stop in the same entity\n");
4106 ctxt->wellFormed = 0;
4107 ctxt->disableSAX = 1;
4108 }
4109 NEXT;
4110 }
4111
4112 xmlFree(elemName);
4113 }
4114}
4115
4116/**
4117 * xmlParseElementMixedContentDecl:
4118 * @ctxt: an XML parser context
4119 *
4120 * parse the declaration for a Mixed Element content
4121 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4122 *
4123 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4124 * '(' S? '#PCDATA' S? ')'
4125 *
4126 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4127 *
4128 * [ VC: No Duplicate Types ]
4129 * The same name must not appear more than once in a single
4130 * mixed-content declaration.
4131 *
4132 * returns: the list of the xmlElementContentPtr describing the element choices
4133 */
4134xmlElementContentPtr
4135xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
4136 xmlElementContentPtr ret = NULL, cur = NULL, n;
4137 xmlChar *elem = NULL;
4138
4139 GROW;
4140 if ((RAW == '#') && (NXT(1) == 'P') &&
4141 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4142 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4143 (NXT(6) == 'A')) {
4144 SKIP(7);
4145 SKIP_BLANKS;
4146 SHRINK;
4147 if (RAW == ')') {
4148 ctxt->entity = ctxt->input;
4149 NEXT;
4150 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4151 if (RAW == '*') {
4152 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4153 NEXT;
4154 }
4155 return(ret);
4156 }
4157 if ((RAW == '(') || (RAW == '|')) {
4158 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4159 if (ret == NULL) return(NULL);
4160 }
4161 while (RAW == '|') {
4162 NEXT;
4163 if (elem == NULL) {
4164 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4165 if (ret == NULL) return(NULL);
4166 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004167 if (cur != NULL)
4168 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004169 cur = ret;
4170 } else {
4171 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4172 if (n == NULL) return(NULL);
4173 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004174 if (n->c1 != NULL)
4175 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004176 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004177 if (n != NULL)
4178 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004179 cur = n;
4180 xmlFree(elem);
4181 }
4182 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004183 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004184 if (elem == NULL) {
4185 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4187 ctxt->sax->error(ctxt->userData,
4188 "xmlParseElementMixedContentDecl : Name expected\n");
4189 ctxt->wellFormed = 0;
4190 ctxt->disableSAX = 1;
4191 xmlFreeElementContent(cur);
4192 return(NULL);
4193 }
4194 SKIP_BLANKS;
4195 GROW;
4196 }
4197 if ((RAW == ')') && (NXT(1) == '*')) {
4198 if (elem != NULL) {
4199 cur->c2 = xmlNewElementContent(elem,
4200 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004201 if (cur->c2 != NULL)
4202 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004203 xmlFree(elem);
4204 }
4205 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4206 ctxt->entity = ctxt->input;
4207 SKIP(2);
4208 } else {
4209 if (elem != NULL) xmlFree(elem);
4210 xmlFreeElementContent(ret);
4211 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4213 ctxt->sax->error(ctxt->userData,
4214 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4215 ctxt->wellFormed = 0;
4216 ctxt->disableSAX = 1;
4217 return(NULL);
4218 }
4219
4220 } else {
4221 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4223 ctxt->sax->error(ctxt->userData,
4224 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4225 ctxt->wellFormed = 0;
4226 ctxt->disableSAX = 1;
4227 }
4228 return(ret);
4229}
4230
4231/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004232 * xmlParseElementChildrenContentD:
4233 * @ctxt: an XML parser context
4234 *
4235 * VMS version of xmlParseElementChildrenContentDecl()
4236 *
4237 * Returns the tree of xmlElementContentPtr describing the element
4238 * hierarchy.
4239 */
4240/**
Owen Taylor3473f882001-02-23 17:55:21 +00004241 * xmlParseElementChildrenContentDecl:
4242 * @ctxt: an XML parser context
4243 *
4244 * parse the declaration for a Mixed Element content
4245 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4246 *
4247 *
4248 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4249 *
4250 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4251 *
4252 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4253 *
4254 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4255 *
4256 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4257 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004258 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004259 * opening or closing parentheses in a choice, seq, or Mixed
4260 * construct is contained in the replacement text for a parameter
4261 * entity, both must be contained in the same replacement text. For
4262 * interoperability, if a parameter-entity reference appears in a
4263 * choice, seq, or Mixed construct, its replacement text should not
4264 * be empty, and neither the first nor last non-blank character of
4265 * the replacement text should be a connector (| or ,).
4266 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004267 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004268 * hierarchy.
4269 */
4270xmlElementContentPtr
4271#ifdef VMS
4272xmlParseElementChildrenContentD
4273#else
4274xmlParseElementChildrenContentDecl
4275#endif
4276(xmlParserCtxtPtr ctxt) {
4277 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4278 xmlChar *elem;
4279 xmlChar type = 0;
4280
4281 SKIP_BLANKS;
4282 GROW;
4283 if (RAW == '(') {
4284 /* Recurse on first child */
4285 NEXT;
4286 SKIP_BLANKS;
4287 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4288 SKIP_BLANKS;
4289 GROW;
4290 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004291 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004292 if (elem == NULL) {
4293 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4295 ctxt->sax->error(ctxt->userData,
4296 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4297 ctxt->wellFormed = 0;
4298 ctxt->disableSAX = 1;
4299 return(NULL);
4300 }
4301 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4302 GROW;
4303 if (RAW == '?') {
4304 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4305 NEXT;
4306 } else if (RAW == '*') {
4307 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4308 NEXT;
4309 } else if (RAW == '+') {
4310 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4311 NEXT;
4312 } else {
4313 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4314 }
4315 xmlFree(elem);
4316 GROW;
4317 }
4318 SKIP_BLANKS;
4319 SHRINK;
4320 while (RAW != ')') {
4321 /*
4322 * Each loop we parse one separator and one element.
4323 */
4324 if (RAW == ',') {
4325 if (type == 0) type = CUR;
4326
4327 /*
4328 * Detect "Name | Name , Name" error
4329 */
4330 else if (type != CUR) {
4331 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4333 ctxt->sax->error(ctxt->userData,
4334 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4335 type);
4336 ctxt->wellFormed = 0;
4337 ctxt->disableSAX = 1;
4338 if ((op != NULL) && (op != ret))
4339 xmlFreeElementContent(op);
4340 if ((last != NULL) && (last != ret) &&
4341 (last != ret->c1) && (last != ret->c2))
4342 xmlFreeElementContent(last);
4343 if (ret != NULL)
4344 xmlFreeElementContent(ret);
4345 return(NULL);
4346 }
4347 NEXT;
4348
4349 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4350 if (op == NULL) {
4351 xmlFreeElementContent(ret);
4352 return(NULL);
4353 }
4354 if (last == NULL) {
4355 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004356 if (ret != NULL)
4357 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004358 ret = cur = op;
4359 } else {
4360 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004361 if (op != NULL)
4362 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004363 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004364 if (last != NULL)
4365 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004366 cur =op;
4367 last = NULL;
4368 }
4369 } else if (RAW == '|') {
4370 if (type == 0) type = CUR;
4371
4372 /*
4373 * Detect "Name , Name | Name" error
4374 */
4375 else if (type != CUR) {
4376 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4378 ctxt->sax->error(ctxt->userData,
4379 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4380 type);
4381 ctxt->wellFormed = 0;
4382 ctxt->disableSAX = 1;
4383 if ((op != NULL) && (op != ret) && (op != last))
4384 xmlFreeElementContent(op);
4385 if ((last != NULL) && (last != ret) &&
4386 (last != ret->c1) && (last != ret->c2))
4387 xmlFreeElementContent(last);
4388 if (ret != NULL)
4389 xmlFreeElementContent(ret);
4390 return(NULL);
4391 }
4392 NEXT;
4393
4394 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4395 if (op == NULL) {
4396 if ((op != NULL) && (op != ret))
4397 xmlFreeElementContent(op);
4398 if ((last != NULL) && (last != ret) &&
4399 (last != ret->c1) && (last != ret->c2))
4400 xmlFreeElementContent(last);
4401 if (ret != NULL)
4402 xmlFreeElementContent(ret);
4403 return(NULL);
4404 }
4405 if (last == NULL) {
4406 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004407 if (ret != NULL)
4408 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004409 ret = cur = op;
4410 } else {
4411 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004412 if (op != NULL)
4413 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004414 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004415 if (last != NULL)
4416 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004417 cur =op;
4418 last = NULL;
4419 }
4420 } else {
4421 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4423 ctxt->sax->error(ctxt->userData,
4424 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4425 ctxt->wellFormed = 0;
4426 ctxt->disableSAX = 1;
4427 if ((op != NULL) && (op != ret))
4428 xmlFreeElementContent(op);
4429 if ((last != NULL) && (last != ret) &&
4430 (last != ret->c1) && (last != ret->c2))
4431 xmlFreeElementContent(last);
4432 if (ret != NULL)
4433 xmlFreeElementContent(ret);
4434 return(NULL);
4435 }
4436 GROW;
4437 SKIP_BLANKS;
4438 GROW;
4439 if (RAW == '(') {
4440 /* Recurse on second child */
4441 NEXT;
4442 SKIP_BLANKS;
4443 last = xmlParseElementChildrenContentDecl(ctxt);
4444 SKIP_BLANKS;
4445 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004446 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004447 if (elem == NULL) {
4448 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4450 ctxt->sax->error(ctxt->userData,
4451 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4452 ctxt->wellFormed = 0;
4453 ctxt->disableSAX = 1;
4454 if ((op != NULL) && (op != ret))
4455 xmlFreeElementContent(op);
4456 if ((last != NULL) && (last != ret) &&
4457 (last != ret->c1) && (last != ret->c2))
4458 xmlFreeElementContent(last);
4459 if (ret != NULL)
4460 xmlFreeElementContent(ret);
4461 return(NULL);
4462 }
4463 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4464 xmlFree(elem);
4465 if (RAW == '?') {
4466 last->ocur = XML_ELEMENT_CONTENT_OPT;
4467 NEXT;
4468 } else if (RAW == '*') {
4469 last->ocur = XML_ELEMENT_CONTENT_MULT;
4470 NEXT;
4471 } else if (RAW == '+') {
4472 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4473 NEXT;
4474 } else {
4475 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4476 }
4477 }
4478 SKIP_BLANKS;
4479 GROW;
4480 }
4481 if ((cur != NULL) && (last != NULL)) {
4482 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004483 if (last != NULL)
4484 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004485 }
4486 ctxt->entity = ctxt->input;
4487 NEXT;
4488 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004489 if (ret != NULL)
4490 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004491 NEXT;
4492 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004493 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004494 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004495 cur = ret;
4496 /*
4497 * Some normalization:
4498 * (a | b* | c?)* == (a | b | c)*
4499 */
4500 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4501 if ((cur->c1 != NULL) &&
4502 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4503 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4504 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4505 if ((cur->c2 != NULL) &&
4506 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4507 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4508 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4509 cur = cur->c2;
4510 }
4511 }
Owen Taylor3473f882001-02-23 17:55:21 +00004512 NEXT;
4513 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004514 if (ret != NULL) {
4515 int found = 0;
4516
Daniel Veillarde470df72001-04-18 21:41:07 +00004517 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004518 /*
4519 * Some normalization:
4520 * (a | b*)+ == (a | b)*
4521 * (a | b?)+ == (a | b)*
4522 */
4523 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4524 if ((cur->c1 != NULL) &&
4525 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4526 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4527 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4528 found = 1;
4529 }
4530 if ((cur->c2 != NULL) &&
4531 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4532 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4533 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4534 found = 1;
4535 }
4536 cur = cur->c2;
4537 }
4538 if (found)
4539 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4540 }
Owen Taylor3473f882001-02-23 17:55:21 +00004541 NEXT;
4542 }
4543 return(ret);
4544}
4545
4546/**
4547 * xmlParseElementContentDecl:
4548 * @ctxt: an XML parser context
4549 * @name: the name of the element being defined.
4550 * @result: the Element Content pointer will be stored here if any
4551 *
4552 * parse the declaration for an Element content either Mixed or Children,
4553 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4554 *
4555 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4556 *
4557 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4558 */
4559
4560int
4561xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4562 xmlElementContentPtr *result) {
4563
4564 xmlElementContentPtr tree = NULL;
4565 xmlParserInputPtr input = ctxt->input;
4566 int res;
4567
4568 *result = NULL;
4569
4570 if (RAW != '(') {
4571 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4573 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004574 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004575 ctxt->wellFormed = 0;
4576 ctxt->disableSAX = 1;
4577 return(-1);
4578 }
4579 NEXT;
4580 GROW;
4581 SKIP_BLANKS;
4582 if ((RAW == '#') && (NXT(1) == 'P') &&
4583 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4584 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4585 (NXT(6) == 'A')) {
4586 tree = xmlParseElementMixedContentDecl(ctxt);
4587 res = XML_ELEMENT_TYPE_MIXED;
4588 } else {
4589 tree = xmlParseElementChildrenContentDecl(ctxt);
4590 res = XML_ELEMENT_TYPE_ELEMENT;
4591 }
4592 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4593 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4595 ctxt->sax->error(ctxt->userData,
4596"Element content declaration doesn't start and stop in the same entity\n");
4597 ctxt->wellFormed = 0;
4598 ctxt->disableSAX = 1;
4599 }
4600 SKIP_BLANKS;
4601 *result = tree;
4602 return(res);
4603}
4604
4605/**
4606 * xmlParseElementDecl:
4607 * @ctxt: an XML parser context
4608 *
4609 * parse an Element declaration.
4610 *
4611 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4612 *
4613 * [ VC: Unique Element Type Declaration ]
4614 * No element type may be declared more than once
4615 *
4616 * Returns the type of the element, or -1 in case of error
4617 */
4618int
4619xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4620 xmlChar *name;
4621 int ret = -1;
4622 xmlElementContentPtr content = NULL;
4623
4624 GROW;
4625 if ((RAW == '<') && (NXT(1) == '!') &&
4626 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4627 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4628 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4629 (NXT(8) == 'T')) {
4630 xmlParserInputPtr input = ctxt->input;
4631
4632 SKIP(9);
4633 if (!IS_BLANK(CUR)) {
4634 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4636 ctxt->sax->error(ctxt->userData,
4637 "Space required after 'ELEMENT'\n");
4638 ctxt->wellFormed = 0;
4639 ctxt->disableSAX = 1;
4640 }
4641 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004642 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004643 if (name == NULL) {
4644 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4646 ctxt->sax->error(ctxt->userData,
4647 "xmlParseElementDecl: no name for Element\n");
4648 ctxt->wellFormed = 0;
4649 ctxt->disableSAX = 1;
4650 return(-1);
4651 }
4652 while ((RAW == 0) && (ctxt->inputNr > 1))
4653 xmlPopInput(ctxt);
4654 if (!IS_BLANK(CUR)) {
4655 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4657 ctxt->sax->error(ctxt->userData,
4658 "Space required after the element name\n");
4659 ctxt->wellFormed = 0;
4660 ctxt->disableSAX = 1;
4661 }
4662 SKIP_BLANKS;
4663 if ((RAW == 'E') && (NXT(1) == 'M') &&
4664 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4665 (NXT(4) == 'Y')) {
4666 SKIP(5);
4667 /*
4668 * Element must always be empty.
4669 */
4670 ret = XML_ELEMENT_TYPE_EMPTY;
4671 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4672 (NXT(2) == 'Y')) {
4673 SKIP(3);
4674 /*
4675 * Element is a generic container.
4676 */
4677 ret = XML_ELEMENT_TYPE_ANY;
4678 } else if (RAW == '(') {
4679 ret = xmlParseElementContentDecl(ctxt, name, &content);
4680 } else {
4681 /*
4682 * [ WFC: PEs in Internal Subset ] error handling.
4683 */
4684 if ((RAW == '%') && (ctxt->external == 0) &&
4685 (ctxt->inputNr == 1)) {
4686 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4688 ctxt->sax->error(ctxt->userData,
4689 "PEReference: forbidden within markup decl in internal subset\n");
4690 } else {
4691 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4693 ctxt->sax->error(ctxt->userData,
4694 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4695 }
4696 ctxt->wellFormed = 0;
4697 ctxt->disableSAX = 1;
4698 if (name != NULL) xmlFree(name);
4699 return(-1);
4700 }
4701
4702 SKIP_BLANKS;
4703 /*
4704 * Pop-up of finished entities.
4705 */
4706 while ((RAW == 0) && (ctxt->inputNr > 1))
4707 xmlPopInput(ctxt);
4708 SKIP_BLANKS;
4709
4710 if (RAW != '>') {
4711 ctxt->errNo = XML_ERR_GT_REQUIRED;
4712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4713 ctxt->sax->error(ctxt->userData,
4714 "xmlParseElementDecl: expected '>' at the end\n");
4715 ctxt->wellFormed = 0;
4716 ctxt->disableSAX = 1;
4717 } else {
4718 if (input != ctxt->input) {
4719 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4721 ctxt->sax->error(ctxt->userData,
4722"Element declaration doesn't start and stop in the same entity\n");
4723 ctxt->wellFormed = 0;
4724 ctxt->disableSAX = 1;
4725 }
4726
4727 NEXT;
4728 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4729 (ctxt->sax->elementDecl != NULL))
4730 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4731 content);
4732 }
4733 if (content != NULL) {
4734 xmlFreeElementContent(content);
4735 }
4736 if (name != NULL) {
4737 xmlFree(name);
4738 }
4739 }
4740 return(ret);
4741}
4742
4743/**
Owen Taylor3473f882001-02-23 17:55:21 +00004744 * xmlParseConditionalSections
4745 * @ctxt: an XML parser context
4746 *
4747 * [61] conditionalSect ::= includeSect | ignoreSect
4748 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4749 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4750 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4751 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4752 */
4753
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004754static void
Owen Taylor3473f882001-02-23 17:55:21 +00004755xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4756 SKIP(3);
4757 SKIP_BLANKS;
4758 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4759 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4760 (NXT(6) == 'E')) {
4761 SKIP(7);
4762 SKIP_BLANKS;
4763 if (RAW != '[') {
4764 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4766 ctxt->sax->error(ctxt->userData,
4767 "XML conditional section '[' expected\n");
4768 ctxt->wellFormed = 0;
4769 ctxt->disableSAX = 1;
4770 } else {
4771 NEXT;
4772 }
4773 if (xmlParserDebugEntities) {
4774 if ((ctxt->input != NULL) && (ctxt->input->filename))
4775 xmlGenericError(xmlGenericErrorContext,
4776 "%s(%d): ", ctxt->input->filename,
4777 ctxt->input->line);
4778 xmlGenericError(xmlGenericErrorContext,
4779 "Entering INCLUDE Conditional Section\n");
4780 }
4781
4782 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4783 (NXT(2) != '>'))) {
4784 const xmlChar *check = CUR_PTR;
4785 int cons = ctxt->input->consumed;
4786 int tok = ctxt->token;
4787
4788 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4789 xmlParseConditionalSections(ctxt);
4790 } else if (IS_BLANK(CUR)) {
4791 NEXT;
4792 } else if (RAW == '%') {
4793 xmlParsePEReference(ctxt);
4794 } else
4795 xmlParseMarkupDecl(ctxt);
4796
4797 /*
4798 * Pop-up of finished entities.
4799 */
4800 while ((RAW == 0) && (ctxt->inputNr > 1))
4801 xmlPopInput(ctxt);
4802
4803 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4804 (tok == ctxt->token)) {
4805 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4807 ctxt->sax->error(ctxt->userData,
4808 "Content error in the external subset\n");
4809 ctxt->wellFormed = 0;
4810 ctxt->disableSAX = 1;
4811 break;
4812 }
4813 }
4814 if (xmlParserDebugEntities) {
4815 if ((ctxt->input != NULL) && (ctxt->input->filename))
4816 xmlGenericError(xmlGenericErrorContext,
4817 "%s(%d): ", ctxt->input->filename,
4818 ctxt->input->line);
4819 xmlGenericError(xmlGenericErrorContext,
4820 "Leaving INCLUDE Conditional Section\n");
4821 }
4822
4823 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4824 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4825 int state;
4826 int instate;
4827 int depth = 0;
4828
4829 SKIP(6);
4830 SKIP_BLANKS;
4831 if (RAW != '[') {
4832 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4834 ctxt->sax->error(ctxt->userData,
4835 "XML conditional section '[' expected\n");
4836 ctxt->wellFormed = 0;
4837 ctxt->disableSAX = 1;
4838 } else {
4839 NEXT;
4840 }
4841 if (xmlParserDebugEntities) {
4842 if ((ctxt->input != NULL) && (ctxt->input->filename))
4843 xmlGenericError(xmlGenericErrorContext,
4844 "%s(%d): ", ctxt->input->filename,
4845 ctxt->input->line);
4846 xmlGenericError(xmlGenericErrorContext,
4847 "Entering IGNORE Conditional Section\n");
4848 }
4849
4850 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004851 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004852 * But disable SAX event generating DTD building in the meantime
4853 */
4854 state = ctxt->disableSAX;
4855 instate = ctxt->instate;
4856 ctxt->disableSAX = 1;
4857 ctxt->instate = XML_PARSER_IGNORE;
4858
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004859 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004860 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4861 depth++;
4862 SKIP(3);
4863 continue;
4864 }
4865 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4866 if (--depth >= 0) SKIP(3);
4867 continue;
4868 }
4869 NEXT;
4870 continue;
4871 }
4872
4873 ctxt->disableSAX = state;
4874 ctxt->instate = instate;
4875
4876 if (xmlParserDebugEntities) {
4877 if ((ctxt->input != NULL) && (ctxt->input->filename))
4878 xmlGenericError(xmlGenericErrorContext,
4879 "%s(%d): ", ctxt->input->filename,
4880 ctxt->input->line);
4881 xmlGenericError(xmlGenericErrorContext,
4882 "Leaving IGNORE Conditional Section\n");
4883 }
4884
4885 } else {
4886 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4888 ctxt->sax->error(ctxt->userData,
4889 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4890 ctxt->wellFormed = 0;
4891 ctxt->disableSAX = 1;
4892 }
4893
4894 if (RAW == 0)
4895 SHRINK;
4896
4897 if (RAW == 0) {
4898 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4900 ctxt->sax->error(ctxt->userData,
4901 "XML conditional section not closed\n");
4902 ctxt->wellFormed = 0;
4903 ctxt->disableSAX = 1;
4904 } else {
4905 SKIP(3);
4906 }
4907}
4908
4909/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004910 * xmlParseMarkupDecl:
4911 * @ctxt: an XML parser context
4912 *
4913 * parse Markup declarations
4914 *
4915 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4916 * NotationDecl | PI | Comment
4917 *
4918 * [ VC: Proper Declaration/PE Nesting ]
4919 * Parameter-entity replacement text must be properly nested with
4920 * markup declarations. That is to say, if either the first character
4921 * or the last character of a markup declaration (markupdecl above) is
4922 * contained in the replacement text for a parameter-entity reference,
4923 * both must be contained in the same replacement text.
4924 *
4925 * [ WFC: PEs in Internal Subset ]
4926 * In the internal DTD subset, parameter-entity references can occur
4927 * only where markup declarations can occur, not within markup declarations.
4928 * (This does not apply to references that occur in external parameter
4929 * entities or to the external subset.)
4930 */
4931void
4932xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4933 GROW;
4934 xmlParseElementDecl(ctxt);
4935 xmlParseAttributeListDecl(ctxt);
4936 xmlParseEntityDecl(ctxt);
4937 xmlParseNotationDecl(ctxt);
4938 xmlParsePI(ctxt);
4939 xmlParseComment(ctxt);
4940 /*
4941 * This is only for internal subset. On external entities,
4942 * the replacement is done before parsing stage
4943 */
4944 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4945 xmlParsePEReference(ctxt);
4946
4947 /*
4948 * Conditional sections are allowed from entities included
4949 * by PE References in the internal subset.
4950 */
4951 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4952 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4953 xmlParseConditionalSections(ctxt);
4954 }
4955 }
4956
4957 ctxt->instate = XML_PARSER_DTD;
4958}
4959
4960/**
4961 * xmlParseTextDecl:
4962 * @ctxt: an XML parser context
4963 *
4964 * parse an XML declaration header for external entities
4965 *
4966 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4967 *
4968 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4969 */
4970
4971void
4972xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4973 xmlChar *version;
4974
4975 /*
4976 * We know that '<?xml' is here.
4977 */
4978 if ((RAW == '<') && (NXT(1) == '?') &&
4979 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4980 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4981 SKIP(5);
4982 } else {
4983 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4985 ctxt->sax->error(ctxt->userData,
4986 "Text declaration '<?xml' required\n");
4987 ctxt->wellFormed = 0;
4988 ctxt->disableSAX = 1;
4989
4990 return;
4991 }
4992
4993 if (!IS_BLANK(CUR)) {
4994 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4996 ctxt->sax->error(ctxt->userData,
4997 "Space needed after '<?xml'\n");
4998 ctxt->wellFormed = 0;
4999 ctxt->disableSAX = 1;
5000 }
5001 SKIP_BLANKS;
5002
5003 /*
5004 * We may have the VersionInfo here.
5005 */
5006 version = xmlParseVersionInfo(ctxt);
5007 if (version == NULL)
5008 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005009 else {
5010 if (!IS_BLANK(CUR)) {
5011 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5013 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5014 ctxt->wellFormed = 0;
5015 ctxt->disableSAX = 1;
5016 }
5017 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005018 ctxt->input->version = version;
5019
5020 /*
5021 * We must have the encoding declaration
5022 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005023 xmlParseEncodingDecl(ctxt);
5024 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5025 /*
5026 * The XML REC instructs us to stop parsing right here
5027 */
5028 return;
5029 }
5030
5031 SKIP_BLANKS;
5032 if ((RAW == '?') && (NXT(1) == '>')) {
5033 SKIP(2);
5034 } else if (RAW == '>') {
5035 /* Deprecated old WD ... */
5036 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5038 ctxt->sax->error(ctxt->userData,
5039 "XML declaration must end-up with '?>'\n");
5040 ctxt->wellFormed = 0;
5041 ctxt->disableSAX = 1;
5042 NEXT;
5043 } else {
5044 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5046 ctxt->sax->error(ctxt->userData,
5047 "parsing XML declaration: '?>' expected\n");
5048 ctxt->wellFormed = 0;
5049 ctxt->disableSAX = 1;
5050 MOVETO_ENDTAG(CUR_PTR);
5051 NEXT;
5052 }
5053}
5054
5055/**
Owen Taylor3473f882001-02-23 17:55:21 +00005056 * xmlParseExternalSubset:
5057 * @ctxt: an XML parser context
5058 * @ExternalID: the external identifier
5059 * @SystemID: the system identifier (or URL)
5060 *
5061 * parse Markup declarations from an external subset
5062 *
5063 * [30] extSubset ::= textDecl? extSubsetDecl
5064 *
5065 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5066 */
5067void
5068xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5069 const xmlChar *SystemID) {
5070 GROW;
5071 if ((RAW == '<') && (NXT(1) == '?') &&
5072 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5073 (NXT(4) == 'l')) {
5074 xmlParseTextDecl(ctxt);
5075 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5076 /*
5077 * The XML REC instructs us to stop parsing right here
5078 */
5079 ctxt->instate = XML_PARSER_EOF;
5080 return;
5081 }
5082 }
5083 if (ctxt->myDoc == NULL) {
5084 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5085 }
5086 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5087 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5088
5089 ctxt->instate = XML_PARSER_DTD;
5090 ctxt->external = 1;
5091 while (((RAW == '<') && (NXT(1) == '?')) ||
5092 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005093 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005094 const xmlChar *check = CUR_PTR;
5095 int cons = ctxt->input->consumed;
5096 int tok = ctxt->token;
5097
5098 GROW;
5099 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5100 xmlParseConditionalSections(ctxt);
5101 } else if (IS_BLANK(CUR)) {
5102 NEXT;
5103 } else if (RAW == '%') {
5104 xmlParsePEReference(ctxt);
5105 } else
5106 xmlParseMarkupDecl(ctxt);
5107
5108 /*
5109 * Pop-up of finished entities.
5110 */
5111 while ((RAW == 0) && (ctxt->inputNr > 1))
5112 xmlPopInput(ctxt);
5113
5114 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5115 (tok == ctxt->token)) {
5116 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5118 ctxt->sax->error(ctxt->userData,
5119 "Content error in the external subset\n");
5120 ctxt->wellFormed = 0;
5121 ctxt->disableSAX = 1;
5122 break;
5123 }
5124 }
5125
5126 if (RAW != 0) {
5127 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5129 ctxt->sax->error(ctxt->userData,
5130 "Extra content at the end of the document\n");
5131 ctxt->wellFormed = 0;
5132 ctxt->disableSAX = 1;
5133 }
5134
5135}
5136
5137/**
5138 * xmlParseReference:
5139 * @ctxt: an XML parser context
5140 *
5141 * parse and handle entity references in content, depending on the SAX
5142 * interface, this may end-up in a call to character() if this is a
5143 * CharRef, a predefined entity, if there is no reference() callback.
5144 * or if the parser was asked to switch to that mode.
5145 *
5146 * [67] Reference ::= EntityRef | CharRef
5147 */
5148void
5149xmlParseReference(xmlParserCtxtPtr ctxt) {
5150 xmlEntityPtr ent;
5151 xmlChar *val;
5152 if (RAW != '&') return;
5153
5154 if (NXT(1) == '#') {
5155 int i = 0;
5156 xmlChar out[10];
5157 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005158 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005159
5160 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5161 /*
5162 * So we are using non-UTF-8 buffers
5163 * Check that the char fit on 8bits, if not
5164 * generate a CharRef.
5165 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005166 if (value <= 0xFF) {
5167 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005168 out[1] = 0;
5169 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5170 (!ctxt->disableSAX))
5171 ctxt->sax->characters(ctxt->userData, out, 1);
5172 } else {
5173 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005174 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005175 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005176 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005177 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5178 (!ctxt->disableSAX))
5179 ctxt->sax->reference(ctxt->userData, out);
5180 }
5181 } else {
5182 /*
5183 * Just encode the value in UTF-8
5184 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005185 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005186 out[i] = 0;
5187 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5188 (!ctxt->disableSAX))
5189 ctxt->sax->characters(ctxt->userData, out, i);
5190 }
5191 } else {
5192 ent = xmlParseEntityRef(ctxt);
5193 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005194 if (!ctxt->wellFormed)
5195 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005196 if ((ent->name != NULL) &&
5197 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5198 xmlNodePtr list = NULL;
5199 int ret;
5200
5201
5202 /*
5203 * The first reference to the entity trigger a parsing phase
5204 * where the ent->children is filled with the result from
5205 * the parsing.
5206 */
5207 if (ent->children == NULL) {
5208 xmlChar *value;
5209 value = ent->content;
5210
5211 /*
5212 * Check that this entity is well formed
5213 */
5214 if ((value != NULL) &&
5215 (value[1] == 0) && (value[0] == '<') &&
5216 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5217 /*
5218 * DONE: get definite answer on this !!!
5219 * Lots of entity decls are used to declare a single
5220 * char
5221 * <!ENTITY lt "<">
5222 * Which seems to be valid since
5223 * 2.4: The ampersand character (&) and the left angle
5224 * bracket (<) may appear in their literal form only
5225 * when used ... They are also legal within the literal
5226 * entity value of an internal entity declaration;i
5227 * see "4.3.2 Well-Formed Parsed Entities".
5228 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5229 * Looking at the OASIS test suite and James Clark
5230 * tests, this is broken. However the XML REC uses
5231 * it. Is the XML REC not well-formed ????
5232 * This is a hack to avoid this problem
5233 *
5234 * ANSWER: since lt gt amp .. are already defined,
5235 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005236 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005237 * is lousy but acceptable.
5238 */
5239 list = xmlNewDocText(ctxt->myDoc, value);
5240 if (list != NULL) {
5241 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5242 (ent->children == NULL)) {
5243 ent->children = list;
5244 ent->last = list;
5245 list->parent = (xmlNodePtr) ent;
5246 } else {
5247 xmlFreeNodeList(list);
5248 }
5249 } else if (list != NULL) {
5250 xmlFreeNodeList(list);
5251 }
5252 } else {
5253 /*
5254 * 4.3.2: An internal general parsed entity is well-formed
5255 * if its replacement text matches the production labeled
5256 * content.
5257 */
5258 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5259 ctxt->depth++;
5260 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5261 ctxt->sax, NULL, ctxt->depth,
5262 value, &list);
5263 ctxt->depth--;
5264 } else if (ent->etype ==
5265 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5266 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005267 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005268 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005269 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005270 ctxt->depth--;
5271 } else {
5272 ret = -1;
5273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5274 ctxt->sax->error(ctxt->userData,
5275 "Internal: invalid entity type\n");
5276 }
5277 if (ret == XML_ERR_ENTITY_LOOP) {
5278 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5280 ctxt->sax->error(ctxt->userData,
5281 "Detected entity reference loop\n");
5282 ctxt->wellFormed = 0;
5283 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005284 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005285 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005286 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5287 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005288 (ent->children == NULL)) {
5289 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005290 if (ctxt->replaceEntities) {
5291 /*
5292 * Prune it directly in the generated document
5293 * except for single text nodes.
5294 */
5295 if ((list->type == XML_TEXT_NODE) &&
5296 (list->next == NULL)) {
5297 list->parent = (xmlNodePtr) ent;
5298 list = NULL;
5299 } else {
5300 while (list != NULL) {
5301 list->parent = (xmlNodePtr) ctxt->node;
5302 if (list->next == NULL)
5303 ent->last = list;
5304 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005305 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005306 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005307 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5308 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005309 }
5310 } else {
5311 while (list != NULL) {
5312 list->parent = (xmlNodePtr) ent;
5313 if (list->next == NULL)
5314 ent->last = list;
5315 list = list->next;
5316 }
Owen Taylor3473f882001-02-23 17:55:21 +00005317 }
5318 } else {
5319 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005320 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005321 }
5322 } else if (ret > 0) {
5323 ctxt->errNo = ret;
5324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5325 ctxt->sax->error(ctxt->userData,
5326 "Entity value required\n");
5327 ctxt->wellFormed = 0;
5328 ctxt->disableSAX = 1;
5329 } else if (list != NULL) {
5330 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005331 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005332 }
5333 }
5334 }
5335 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5336 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5337 /*
5338 * Create a node.
5339 */
5340 ctxt->sax->reference(ctxt->userData, ent->name);
5341 return;
5342 } else if (ctxt->replaceEntities) {
5343 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5344 /*
5345 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005346 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005347 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005348 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005349 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005350 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005351 cur = ent->children;
5352 while (cur != NULL) {
5353 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005354 if (firstChild == NULL){
5355 firstChild = new;
5356 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005357 xmlAddChild(ctxt->node, new);
5358 if (cur == ent->last)
5359 break;
5360 cur = cur->next;
5361 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005362 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5363 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005364 } else {
5365 /*
5366 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005367 * node with a possible previous text one which
5368 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005369 */
5370 if (ent->children->type == XML_TEXT_NODE)
5371 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5372 if ((ent->last != ent->children) &&
5373 (ent->last->type == XML_TEXT_NODE))
5374 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5375 xmlAddChildList(ctxt->node, ent->children);
5376 }
5377
Owen Taylor3473f882001-02-23 17:55:21 +00005378 /*
5379 * This is to avoid a nasty side effect, see
5380 * characters() in SAX.c
5381 */
5382 ctxt->nodemem = 0;
5383 ctxt->nodelen = 0;
5384 return;
5385 } else {
5386 /*
5387 * Probably running in SAX mode
5388 */
5389 xmlParserInputPtr input;
5390
5391 input = xmlNewEntityInputStream(ctxt, ent);
5392 xmlPushInput(ctxt, input);
5393 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5394 (RAW == '<') && (NXT(1) == '?') &&
5395 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5396 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5397 xmlParseTextDecl(ctxt);
5398 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5399 /*
5400 * The XML REC instructs us to stop parsing right here
5401 */
5402 ctxt->instate = XML_PARSER_EOF;
5403 return;
5404 }
5405 if (input->standalone == 1) {
5406 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5408 ctxt->sax->error(ctxt->userData,
5409 "external parsed entities cannot be standalone\n");
5410 ctxt->wellFormed = 0;
5411 ctxt->disableSAX = 1;
5412 }
5413 }
5414 return;
5415 }
5416 }
5417 } else {
5418 val = ent->content;
5419 if (val == NULL) return;
5420 /*
5421 * inline the entity.
5422 */
5423 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5424 (!ctxt->disableSAX))
5425 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5426 }
5427 }
5428}
5429
5430/**
5431 * xmlParseEntityRef:
5432 * @ctxt: an XML parser context
5433 *
5434 * parse ENTITY references declarations
5435 *
5436 * [68] EntityRef ::= '&' Name ';'
5437 *
5438 * [ WFC: Entity Declared ]
5439 * In a document without any DTD, a document with only an internal DTD
5440 * subset which contains no parameter entity references, or a document
5441 * with "standalone='yes'", the Name given in the entity reference
5442 * must match that in an entity declaration, except that well-formed
5443 * documents need not declare any of the following entities: amp, lt,
5444 * gt, apos, quot. The declaration of a parameter entity must precede
5445 * any reference to it. Similarly, the declaration of a general entity
5446 * must precede any reference to it which appears in a default value in an
5447 * attribute-list declaration. Note that if entities are declared in the
5448 * external subset or in external parameter entities, a non-validating
5449 * processor is not obligated to read and process their declarations;
5450 * for such documents, the rule that an entity must be declared is a
5451 * well-formedness constraint only if standalone='yes'.
5452 *
5453 * [ WFC: Parsed Entity ]
5454 * An entity reference must not contain the name of an unparsed entity
5455 *
5456 * Returns the xmlEntityPtr if found, or NULL otherwise.
5457 */
5458xmlEntityPtr
5459xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5460 xmlChar *name;
5461 xmlEntityPtr ent = NULL;
5462
5463 GROW;
5464
5465 if (RAW == '&') {
5466 NEXT;
5467 name = xmlParseName(ctxt);
5468 if (name == NULL) {
5469 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5471 ctxt->sax->error(ctxt->userData,
5472 "xmlParseEntityRef: no name\n");
5473 ctxt->wellFormed = 0;
5474 ctxt->disableSAX = 1;
5475 } else {
5476 if (RAW == ';') {
5477 NEXT;
5478 /*
5479 * Ask first SAX for entity resolution, otherwise try the
5480 * predefined set.
5481 */
5482 if (ctxt->sax != NULL) {
5483 if (ctxt->sax->getEntity != NULL)
5484 ent = ctxt->sax->getEntity(ctxt->userData, name);
5485 if (ent == NULL)
5486 ent = xmlGetPredefinedEntity(name);
5487 }
5488 /*
5489 * [ WFC: Entity Declared ]
5490 * In a document without any DTD, a document with only an
5491 * internal DTD subset which contains no parameter entity
5492 * references, or a document with "standalone='yes'", the
5493 * Name given in the entity reference must match that in an
5494 * entity declaration, except that well-formed documents
5495 * need not declare any of the following entities: amp, lt,
5496 * gt, apos, quot.
5497 * The declaration of a parameter entity must precede any
5498 * reference to it.
5499 * Similarly, the declaration of a general entity must
5500 * precede any reference to it which appears in a default
5501 * value in an attribute-list declaration. Note that if
5502 * entities are declared in the external subset or in
5503 * external parameter entities, a non-validating processor
5504 * is not obligated to read and process their declarations;
5505 * for such documents, the rule that an entity must be
5506 * declared is a well-formedness constraint only if
5507 * standalone='yes'.
5508 */
5509 if (ent == NULL) {
5510 if ((ctxt->standalone == 1) ||
5511 ((ctxt->hasExternalSubset == 0) &&
5512 (ctxt->hasPErefs == 0))) {
5513 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5515 ctxt->sax->error(ctxt->userData,
5516 "Entity '%s' not defined\n", name);
5517 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005518 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005519 ctxt->disableSAX = 1;
5520 } else {
5521 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005523 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005524 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005525 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005526 }
5527 }
5528
5529 /*
5530 * [ WFC: Parsed Entity ]
5531 * An entity reference must not contain the name of an
5532 * unparsed entity
5533 */
5534 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5535 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5536 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5537 ctxt->sax->error(ctxt->userData,
5538 "Entity reference to unparsed entity %s\n", name);
5539 ctxt->wellFormed = 0;
5540 ctxt->disableSAX = 1;
5541 }
5542
5543 /*
5544 * [ WFC: No External Entity References ]
5545 * Attribute values cannot contain direct or indirect
5546 * entity references to external entities.
5547 */
5548 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5549 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5550 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5552 ctxt->sax->error(ctxt->userData,
5553 "Attribute references external entity '%s'\n", name);
5554 ctxt->wellFormed = 0;
5555 ctxt->disableSAX = 1;
5556 }
5557 /*
5558 * [ WFC: No < in Attribute Values ]
5559 * The replacement text of any entity referred to directly or
5560 * indirectly in an attribute value (other than "&lt;") must
5561 * not contain a <.
5562 */
5563 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5564 (ent != NULL) &&
5565 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5566 (ent->content != NULL) &&
5567 (xmlStrchr(ent->content, '<'))) {
5568 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5570 ctxt->sax->error(ctxt->userData,
5571 "'<' in entity '%s' is not allowed in attributes values\n", name);
5572 ctxt->wellFormed = 0;
5573 ctxt->disableSAX = 1;
5574 }
5575
5576 /*
5577 * Internal check, no parameter entities here ...
5578 */
5579 else {
5580 switch (ent->etype) {
5581 case XML_INTERNAL_PARAMETER_ENTITY:
5582 case XML_EXTERNAL_PARAMETER_ENTITY:
5583 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5585 ctxt->sax->error(ctxt->userData,
5586 "Attempt to reference the parameter entity '%s'\n", name);
5587 ctxt->wellFormed = 0;
5588 ctxt->disableSAX = 1;
5589 break;
5590 default:
5591 break;
5592 }
5593 }
5594
5595 /*
5596 * [ WFC: No Recursion ]
5597 * A parsed entity must not contain a recursive reference
5598 * to itself, either directly or indirectly.
5599 * Done somewhere else
5600 */
5601
5602 } else {
5603 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5605 ctxt->sax->error(ctxt->userData,
5606 "xmlParseEntityRef: expecting ';'\n");
5607 ctxt->wellFormed = 0;
5608 ctxt->disableSAX = 1;
5609 }
5610 xmlFree(name);
5611 }
5612 }
5613 return(ent);
5614}
5615
5616/**
5617 * xmlParseStringEntityRef:
5618 * @ctxt: an XML parser context
5619 * @str: a pointer to an index in the string
5620 *
5621 * parse ENTITY references declarations, but this version parses it from
5622 * a string value.
5623 *
5624 * [68] EntityRef ::= '&' Name ';'
5625 *
5626 * [ WFC: Entity Declared ]
5627 * In a document without any DTD, a document with only an internal DTD
5628 * subset which contains no parameter entity references, or a document
5629 * with "standalone='yes'", the Name given in the entity reference
5630 * must match that in an entity declaration, except that well-formed
5631 * documents need not declare any of the following entities: amp, lt,
5632 * gt, apos, quot. The declaration of a parameter entity must precede
5633 * any reference to it. Similarly, the declaration of a general entity
5634 * must precede any reference to it which appears in a default value in an
5635 * attribute-list declaration. Note that if entities are declared in the
5636 * external subset or in external parameter entities, a non-validating
5637 * processor is not obligated to read and process their declarations;
5638 * for such documents, the rule that an entity must be declared is a
5639 * well-formedness constraint only if standalone='yes'.
5640 *
5641 * [ WFC: Parsed Entity ]
5642 * An entity reference must not contain the name of an unparsed entity
5643 *
5644 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5645 * is updated to the current location in the string.
5646 */
5647xmlEntityPtr
5648xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5649 xmlChar *name;
5650 const xmlChar *ptr;
5651 xmlChar cur;
5652 xmlEntityPtr ent = NULL;
5653
5654 if ((str == NULL) || (*str == NULL))
5655 return(NULL);
5656 ptr = *str;
5657 cur = *ptr;
5658 if (cur == '&') {
5659 ptr++;
5660 cur = *ptr;
5661 name = xmlParseStringName(ctxt, &ptr);
5662 if (name == NULL) {
5663 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5665 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005666 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005667 ctxt->wellFormed = 0;
5668 ctxt->disableSAX = 1;
5669 } else {
5670 if (*ptr == ';') {
5671 ptr++;
5672 /*
5673 * Ask first SAX for entity resolution, otherwise try the
5674 * predefined set.
5675 */
5676 if (ctxt->sax != NULL) {
5677 if (ctxt->sax->getEntity != NULL)
5678 ent = ctxt->sax->getEntity(ctxt->userData, name);
5679 if (ent == NULL)
5680 ent = xmlGetPredefinedEntity(name);
5681 }
5682 /*
5683 * [ WFC: Entity Declared ]
5684 * In a document without any DTD, a document with only an
5685 * internal DTD subset which contains no parameter entity
5686 * references, or a document with "standalone='yes'", the
5687 * Name given in the entity reference must match that in an
5688 * entity declaration, except that well-formed documents
5689 * need not declare any of the following entities: amp, lt,
5690 * gt, apos, quot.
5691 * The declaration of a parameter entity must precede any
5692 * reference to it.
5693 * Similarly, the declaration of a general entity must
5694 * precede any reference to it which appears in a default
5695 * value in an attribute-list declaration. Note that if
5696 * entities are declared in the external subset or in
5697 * external parameter entities, a non-validating processor
5698 * is not obligated to read and process their declarations;
5699 * for such documents, the rule that an entity must be
5700 * declared is a well-formedness constraint only if
5701 * standalone='yes'.
5702 */
5703 if (ent == NULL) {
5704 if ((ctxt->standalone == 1) ||
5705 ((ctxt->hasExternalSubset == 0) &&
5706 (ctxt->hasPErefs == 0))) {
5707 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5709 ctxt->sax->error(ctxt->userData,
5710 "Entity '%s' not defined\n", name);
5711 ctxt->wellFormed = 0;
5712 ctxt->disableSAX = 1;
5713 } else {
5714 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5715 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5716 ctxt->sax->warning(ctxt->userData,
5717 "Entity '%s' not defined\n", name);
5718 }
5719 }
5720
5721 /*
5722 * [ WFC: Parsed Entity ]
5723 * An entity reference must not contain the name of an
5724 * unparsed entity
5725 */
5726 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5727 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5729 ctxt->sax->error(ctxt->userData,
5730 "Entity reference to unparsed entity %s\n", name);
5731 ctxt->wellFormed = 0;
5732 ctxt->disableSAX = 1;
5733 }
5734
5735 /*
5736 * [ WFC: No External Entity References ]
5737 * Attribute values cannot contain direct or indirect
5738 * entity references to external entities.
5739 */
5740 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5741 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5742 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5744 ctxt->sax->error(ctxt->userData,
5745 "Attribute references external entity '%s'\n", name);
5746 ctxt->wellFormed = 0;
5747 ctxt->disableSAX = 1;
5748 }
5749 /*
5750 * [ WFC: No < in Attribute Values ]
5751 * The replacement text of any entity referred to directly or
5752 * indirectly in an attribute value (other than "&lt;") must
5753 * not contain a <.
5754 */
5755 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5756 (ent != NULL) &&
5757 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5758 (ent->content != NULL) &&
5759 (xmlStrchr(ent->content, '<'))) {
5760 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5762 ctxt->sax->error(ctxt->userData,
5763 "'<' in entity '%s' is not allowed in attributes values\n", name);
5764 ctxt->wellFormed = 0;
5765 ctxt->disableSAX = 1;
5766 }
5767
5768 /*
5769 * Internal check, no parameter entities here ...
5770 */
5771 else {
5772 switch (ent->etype) {
5773 case XML_INTERNAL_PARAMETER_ENTITY:
5774 case XML_EXTERNAL_PARAMETER_ENTITY:
5775 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5777 ctxt->sax->error(ctxt->userData,
5778 "Attempt to reference the parameter entity '%s'\n", name);
5779 ctxt->wellFormed = 0;
5780 ctxt->disableSAX = 1;
5781 break;
5782 default:
5783 break;
5784 }
5785 }
5786
5787 /*
5788 * [ WFC: No Recursion ]
5789 * A parsed entity must not contain a recursive reference
5790 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005791 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005792 */
5793
5794 } else {
5795 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5797 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005798 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005799 ctxt->wellFormed = 0;
5800 ctxt->disableSAX = 1;
5801 }
5802 xmlFree(name);
5803 }
5804 }
5805 *str = ptr;
5806 return(ent);
5807}
5808
5809/**
5810 * xmlParsePEReference:
5811 * @ctxt: an XML parser context
5812 *
5813 * parse PEReference declarations
5814 * The entity content is handled directly by pushing it's content as
5815 * a new input stream.
5816 *
5817 * [69] PEReference ::= '%' Name ';'
5818 *
5819 * [ WFC: No Recursion ]
5820 * A parsed entity must not contain a recursive
5821 * reference to itself, either directly or indirectly.
5822 *
5823 * [ WFC: Entity Declared ]
5824 * In a document without any DTD, a document with only an internal DTD
5825 * subset which contains no parameter entity references, or a document
5826 * with "standalone='yes'", ... ... The declaration of a parameter
5827 * entity must precede any reference to it...
5828 *
5829 * [ VC: Entity Declared ]
5830 * In a document with an external subset or external parameter entities
5831 * with "standalone='no'", ... ... The declaration of a parameter entity
5832 * must precede any reference to it...
5833 *
5834 * [ WFC: In DTD ]
5835 * Parameter-entity references may only appear in the DTD.
5836 * NOTE: misleading but this is handled.
5837 */
5838void
5839xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5840 xmlChar *name;
5841 xmlEntityPtr entity = NULL;
5842 xmlParserInputPtr input;
5843
5844 if (RAW == '%') {
5845 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005846 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005847 if (name == NULL) {
5848 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5849 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5850 ctxt->sax->error(ctxt->userData,
5851 "xmlParsePEReference: no name\n");
5852 ctxt->wellFormed = 0;
5853 ctxt->disableSAX = 1;
5854 } else {
5855 if (RAW == ';') {
5856 NEXT;
5857 if ((ctxt->sax != NULL) &&
5858 (ctxt->sax->getParameterEntity != NULL))
5859 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5860 name);
5861 if (entity == NULL) {
5862 /*
5863 * [ WFC: Entity Declared ]
5864 * In a document without any DTD, a document with only an
5865 * internal DTD subset which contains no parameter entity
5866 * references, or a document with "standalone='yes'", ...
5867 * ... The declaration of a parameter entity must precede
5868 * any reference to it...
5869 */
5870 if ((ctxt->standalone == 1) ||
5871 ((ctxt->hasExternalSubset == 0) &&
5872 (ctxt->hasPErefs == 0))) {
5873 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5874 if ((!ctxt->disableSAX) &&
5875 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5876 ctxt->sax->error(ctxt->userData,
5877 "PEReference: %%%s; not found\n", name);
5878 ctxt->wellFormed = 0;
5879 ctxt->disableSAX = 1;
5880 } else {
5881 /*
5882 * [ VC: Entity Declared ]
5883 * In a document with an external subset or external
5884 * parameter entities with "standalone='no'", ...
5885 * ... The declaration of a parameter entity must precede
5886 * any reference to it...
5887 */
5888 if ((!ctxt->disableSAX) &&
5889 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5890 ctxt->sax->warning(ctxt->userData,
5891 "PEReference: %%%s; not found\n", name);
5892 ctxt->valid = 0;
5893 }
5894 } else {
5895 /*
5896 * Internal checking in case the entity quest barfed
5897 */
5898 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5899 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5900 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5901 ctxt->sax->warning(ctxt->userData,
5902 "Internal: %%%s; is not a parameter entity\n", name);
5903 } else {
5904 /*
5905 * TODO !!!
5906 * handle the extra spaces added before and after
5907 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5908 */
5909 input = xmlNewEntityInputStream(ctxt, entity);
5910 xmlPushInput(ctxt, input);
5911 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5912 (RAW == '<') && (NXT(1) == '?') &&
5913 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5914 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5915 xmlParseTextDecl(ctxt);
5916 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5917 /*
5918 * The XML REC instructs us to stop parsing
5919 * right here
5920 */
5921 ctxt->instate = XML_PARSER_EOF;
5922 xmlFree(name);
5923 return;
5924 }
5925 }
5926 if (ctxt->token == 0)
5927 ctxt->token = ' ';
5928 }
5929 }
5930 ctxt->hasPErefs = 1;
5931 } else {
5932 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5934 ctxt->sax->error(ctxt->userData,
5935 "xmlParsePEReference: expecting ';'\n");
5936 ctxt->wellFormed = 0;
5937 ctxt->disableSAX = 1;
5938 }
5939 xmlFree(name);
5940 }
5941 }
5942}
5943
5944/**
5945 * xmlParseStringPEReference:
5946 * @ctxt: an XML parser context
5947 * @str: a pointer to an index in the string
5948 *
5949 * parse PEReference declarations
5950 *
5951 * [69] PEReference ::= '%' Name ';'
5952 *
5953 * [ WFC: No Recursion ]
5954 * A parsed entity must not contain a recursive
5955 * reference to itself, either directly or indirectly.
5956 *
5957 * [ WFC: Entity Declared ]
5958 * In a document without any DTD, a document with only an internal DTD
5959 * subset which contains no parameter entity references, or a document
5960 * with "standalone='yes'", ... ... The declaration of a parameter
5961 * entity must precede any reference to it...
5962 *
5963 * [ VC: Entity Declared ]
5964 * In a document with an external subset or external parameter entities
5965 * with "standalone='no'", ... ... The declaration of a parameter entity
5966 * must precede any reference to it...
5967 *
5968 * [ WFC: In DTD ]
5969 * Parameter-entity references may only appear in the DTD.
5970 * NOTE: misleading but this is handled.
5971 *
5972 * Returns the string of the entity content.
5973 * str is updated to the current value of the index
5974 */
5975xmlEntityPtr
5976xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5977 const xmlChar *ptr;
5978 xmlChar cur;
5979 xmlChar *name;
5980 xmlEntityPtr entity = NULL;
5981
5982 if ((str == NULL) || (*str == NULL)) return(NULL);
5983 ptr = *str;
5984 cur = *ptr;
5985 if (cur == '%') {
5986 ptr++;
5987 cur = *ptr;
5988 name = xmlParseStringName(ctxt, &ptr);
5989 if (name == NULL) {
5990 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5992 ctxt->sax->error(ctxt->userData,
5993 "xmlParseStringPEReference: no name\n");
5994 ctxt->wellFormed = 0;
5995 ctxt->disableSAX = 1;
5996 } else {
5997 cur = *ptr;
5998 if (cur == ';') {
5999 ptr++;
6000 cur = *ptr;
6001 if ((ctxt->sax != NULL) &&
6002 (ctxt->sax->getParameterEntity != NULL))
6003 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6004 name);
6005 if (entity == NULL) {
6006 /*
6007 * [ WFC: Entity Declared ]
6008 * In a document without any DTD, a document with only an
6009 * internal DTD subset which contains no parameter entity
6010 * references, or a document with "standalone='yes'", ...
6011 * ... The declaration of a parameter entity must precede
6012 * any reference to it...
6013 */
6014 if ((ctxt->standalone == 1) ||
6015 ((ctxt->hasExternalSubset == 0) &&
6016 (ctxt->hasPErefs == 0))) {
6017 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6019 ctxt->sax->error(ctxt->userData,
6020 "PEReference: %%%s; not found\n", name);
6021 ctxt->wellFormed = 0;
6022 ctxt->disableSAX = 1;
6023 } else {
6024 /*
6025 * [ VC: Entity Declared ]
6026 * In a document with an external subset or external
6027 * parameter entities with "standalone='no'", ...
6028 * ... The declaration of a parameter entity must
6029 * precede any reference to it...
6030 */
6031 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6032 ctxt->sax->warning(ctxt->userData,
6033 "PEReference: %%%s; not found\n", name);
6034 ctxt->valid = 0;
6035 }
6036 } else {
6037 /*
6038 * Internal checking in case the entity quest barfed
6039 */
6040 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6041 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6042 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6043 ctxt->sax->warning(ctxt->userData,
6044 "Internal: %%%s; is not a parameter entity\n", name);
6045 }
6046 }
6047 ctxt->hasPErefs = 1;
6048 } else {
6049 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6051 ctxt->sax->error(ctxt->userData,
6052 "xmlParseStringPEReference: expecting ';'\n");
6053 ctxt->wellFormed = 0;
6054 ctxt->disableSAX = 1;
6055 }
6056 xmlFree(name);
6057 }
6058 }
6059 *str = ptr;
6060 return(entity);
6061}
6062
6063/**
6064 * xmlParseDocTypeDecl:
6065 * @ctxt: an XML parser context
6066 *
6067 * parse a DOCTYPE declaration
6068 *
6069 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6070 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6071 *
6072 * [ VC: Root Element Type ]
6073 * The Name in the document type declaration must match the element
6074 * type of the root element.
6075 */
6076
6077void
6078xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6079 xmlChar *name = NULL;
6080 xmlChar *ExternalID = NULL;
6081 xmlChar *URI = NULL;
6082
6083 /*
6084 * We know that '<!DOCTYPE' has been detected.
6085 */
6086 SKIP(9);
6087
6088 SKIP_BLANKS;
6089
6090 /*
6091 * Parse the DOCTYPE name.
6092 */
6093 name = xmlParseName(ctxt);
6094 if (name == NULL) {
6095 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6097 ctxt->sax->error(ctxt->userData,
6098 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6099 ctxt->wellFormed = 0;
6100 ctxt->disableSAX = 1;
6101 }
6102 ctxt->intSubName = name;
6103
6104 SKIP_BLANKS;
6105
6106 /*
6107 * Check for SystemID and ExternalID
6108 */
6109 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6110
6111 if ((URI != NULL) || (ExternalID != NULL)) {
6112 ctxt->hasExternalSubset = 1;
6113 }
6114 ctxt->extSubURI = URI;
6115 ctxt->extSubSystem = ExternalID;
6116
6117 SKIP_BLANKS;
6118
6119 /*
6120 * Create and update the internal subset.
6121 */
6122 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6123 (!ctxt->disableSAX))
6124 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6125
6126 /*
6127 * Is there any internal subset declarations ?
6128 * they are handled separately in xmlParseInternalSubset()
6129 */
6130 if (RAW == '[')
6131 return;
6132
6133 /*
6134 * We should be at the end of the DOCTYPE declaration.
6135 */
6136 if (RAW != '>') {
6137 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006139 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006140 ctxt->wellFormed = 0;
6141 ctxt->disableSAX = 1;
6142 }
6143 NEXT;
6144}
6145
6146/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006147 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006148 * @ctxt: an XML parser context
6149 *
6150 * parse the internal subset declaration
6151 *
6152 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6153 */
6154
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006155static void
Owen Taylor3473f882001-02-23 17:55:21 +00006156xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6157 /*
6158 * Is there any DTD definition ?
6159 */
6160 if (RAW == '[') {
6161 ctxt->instate = XML_PARSER_DTD;
6162 NEXT;
6163 /*
6164 * Parse the succession of Markup declarations and
6165 * PEReferences.
6166 * Subsequence (markupdecl | PEReference | S)*
6167 */
6168 while (RAW != ']') {
6169 const xmlChar *check = CUR_PTR;
6170 int cons = ctxt->input->consumed;
6171
6172 SKIP_BLANKS;
6173 xmlParseMarkupDecl(ctxt);
6174 xmlParsePEReference(ctxt);
6175
6176 /*
6177 * Pop-up of finished entities.
6178 */
6179 while ((RAW == 0) && (ctxt->inputNr > 1))
6180 xmlPopInput(ctxt);
6181
6182 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6183 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6185 ctxt->sax->error(ctxt->userData,
6186 "xmlParseInternalSubset: error detected in Markup declaration\n");
6187 ctxt->wellFormed = 0;
6188 ctxt->disableSAX = 1;
6189 break;
6190 }
6191 }
6192 if (RAW == ']') {
6193 NEXT;
6194 SKIP_BLANKS;
6195 }
6196 }
6197
6198 /*
6199 * We should be at the end of the DOCTYPE declaration.
6200 */
6201 if (RAW != '>') {
6202 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006204 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006205 ctxt->wellFormed = 0;
6206 ctxt->disableSAX = 1;
6207 }
6208 NEXT;
6209}
6210
6211/**
6212 * xmlParseAttribute:
6213 * @ctxt: an XML parser context
6214 * @value: a xmlChar ** used to store the value of the attribute
6215 *
6216 * parse an attribute
6217 *
6218 * [41] Attribute ::= Name Eq AttValue
6219 *
6220 * [ WFC: No External Entity References ]
6221 * Attribute values cannot contain direct or indirect entity references
6222 * to external entities.
6223 *
6224 * [ WFC: No < in Attribute Values ]
6225 * The replacement text of any entity referred to directly or indirectly in
6226 * an attribute value (other than "&lt;") must not contain a <.
6227 *
6228 * [ VC: Attribute Value Type ]
6229 * The attribute must have been declared; the value must be of the type
6230 * declared for it.
6231 *
6232 * [25] Eq ::= S? '=' S?
6233 *
6234 * With namespace:
6235 *
6236 * [NS 11] Attribute ::= QName Eq AttValue
6237 *
6238 * Also the case QName == xmlns:??? is handled independently as a namespace
6239 * definition.
6240 *
6241 * Returns the attribute name, and the value in *value.
6242 */
6243
6244xmlChar *
6245xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6246 xmlChar *name, *val;
6247
6248 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006249 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006250 name = xmlParseName(ctxt);
6251 if (name == NULL) {
6252 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6254 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6255 ctxt->wellFormed = 0;
6256 ctxt->disableSAX = 1;
6257 return(NULL);
6258 }
6259
6260 /*
6261 * read the value
6262 */
6263 SKIP_BLANKS;
6264 if (RAW == '=') {
6265 NEXT;
6266 SKIP_BLANKS;
6267 val = xmlParseAttValue(ctxt);
6268 ctxt->instate = XML_PARSER_CONTENT;
6269 } else {
6270 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6272 ctxt->sax->error(ctxt->userData,
6273 "Specification mandate value for attribute %s\n", name);
6274 ctxt->wellFormed = 0;
6275 ctxt->disableSAX = 1;
6276 xmlFree(name);
6277 return(NULL);
6278 }
6279
6280 /*
6281 * Check that xml:lang conforms to the specification
6282 * No more registered as an error, just generate a warning now
6283 * since this was deprecated in XML second edition
6284 */
6285 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6286 if (!xmlCheckLanguageID(val)) {
6287 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6288 ctxt->sax->warning(ctxt->userData,
6289 "Malformed value for xml:lang : %s\n", val);
6290 }
6291 }
6292
6293 /*
6294 * Check that xml:space conforms to the specification
6295 */
6296 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6297 if (xmlStrEqual(val, BAD_CAST "default"))
6298 *(ctxt->space) = 0;
6299 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6300 *(ctxt->space) = 1;
6301 else {
6302 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6304 ctxt->sax->error(ctxt->userData,
6305"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6306 val);
6307 ctxt->wellFormed = 0;
6308 ctxt->disableSAX = 1;
6309 }
6310 }
6311
6312 *value = val;
6313 return(name);
6314}
6315
6316/**
6317 * xmlParseStartTag:
6318 * @ctxt: an XML parser context
6319 *
6320 * parse a start of tag either for rule element or
6321 * EmptyElement. In both case we don't parse the tag closing chars.
6322 *
6323 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6324 *
6325 * [ WFC: Unique Att Spec ]
6326 * No attribute name may appear more than once in the same start-tag or
6327 * empty-element tag.
6328 *
6329 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6330 *
6331 * [ WFC: Unique Att Spec ]
6332 * No attribute name may appear more than once in the same start-tag or
6333 * empty-element tag.
6334 *
6335 * With namespace:
6336 *
6337 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6338 *
6339 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6340 *
6341 * Returns the element name parsed
6342 */
6343
6344xmlChar *
6345xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6346 xmlChar *name;
6347 xmlChar *attname;
6348 xmlChar *attvalue;
6349 const xmlChar **atts = NULL;
6350 int nbatts = 0;
6351 int maxatts = 0;
6352 int i;
6353
6354 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006355 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006356
6357 name = xmlParseName(ctxt);
6358 if (name == NULL) {
6359 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6361 ctxt->sax->error(ctxt->userData,
6362 "xmlParseStartTag: invalid element name\n");
6363 ctxt->wellFormed = 0;
6364 ctxt->disableSAX = 1;
6365 return(NULL);
6366 }
6367
6368 /*
6369 * Now parse the attributes, it ends up with the ending
6370 *
6371 * (S Attribute)* S?
6372 */
6373 SKIP_BLANKS;
6374 GROW;
6375
Daniel Veillard21a0f912001-02-25 19:54:14 +00006376 while ((RAW != '>') &&
6377 ((RAW != '/') || (NXT(1) != '>')) &&
6378 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006379 const xmlChar *q = CUR_PTR;
6380 int cons = ctxt->input->consumed;
6381
6382 attname = xmlParseAttribute(ctxt, &attvalue);
6383 if ((attname != NULL) && (attvalue != NULL)) {
6384 /*
6385 * [ WFC: Unique Att Spec ]
6386 * No attribute name may appear more than once in the same
6387 * start-tag or empty-element tag.
6388 */
6389 for (i = 0; i < nbatts;i += 2) {
6390 if (xmlStrEqual(atts[i], attname)) {
6391 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6393 ctxt->sax->error(ctxt->userData,
6394 "Attribute %s redefined\n",
6395 attname);
6396 ctxt->wellFormed = 0;
6397 ctxt->disableSAX = 1;
6398 xmlFree(attname);
6399 xmlFree(attvalue);
6400 goto failed;
6401 }
6402 }
6403
6404 /*
6405 * Add the pair to atts
6406 */
6407 if (atts == NULL) {
6408 maxatts = 10;
6409 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6410 if (atts == NULL) {
6411 xmlGenericError(xmlGenericErrorContext,
6412 "malloc of %ld byte failed\n",
6413 maxatts * (long)sizeof(xmlChar *));
6414 return(NULL);
6415 }
6416 } else if (nbatts + 4 > maxatts) {
6417 maxatts *= 2;
6418 atts = (const xmlChar **) xmlRealloc((void *) atts,
6419 maxatts * sizeof(xmlChar *));
6420 if (atts == NULL) {
6421 xmlGenericError(xmlGenericErrorContext,
6422 "realloc of %ld byte failed\n",
6423 maxatts * (long)sizeof(xmlChar *));
6424 return(NULL);
6425 }
6426 }
6427 atts[nbatts++] = attname;
6428 atts[nbatts++] = attvalue;
6429 atts[nbatts] = NULL;
6430 atts[nbatts + 1] = NULL;
6431 } else {
6432 if (attname != NULL)
6433 xmlFree(attname);
6434 if (attvalue != NULL)
6435 xmlFree(attvalue);
6436 }
6437
6438failed:
6439
6440 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6441 break;
6442 if (!IS_BLANK(RAW)) {
6443 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6445 ctxt->sax->error(ctxt->userData,
6446 "attributes construct error\n");
6447 ctxt->wellFormed = 0;
6448 ctxt->disableSAX = 1;
6449 }
6450 SKIP_BLANKS;
6451 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6452 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6454 ctxt->sax->error(ctxt->userData,
6455 "xmlParseStartTag: problem parsing attributes\n");
6456 ctxt->wellFormed = 0;
6457 ctxt->disableSAX = 1;
6458 break;
6459 }
6460 GROW;
6461 }
6462
6463 /*
6464 * SAX: Start of Element !
6465 */
6466 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6467 (!ctxt->disableSAX))
6468 ctxt->sax->startElement(ctxt->userData, name, atts);
6469
6470 if (atts != NULL) {
6471 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6472 xmlFree((void *) atts);
6473 }
6474 return(name);
6475}
6476
6477/**
6478 * xmlParseEndTag:
6479 * @ctxt: an XML parser context
6480 *
6481 * parse an end of tag
6482 *
6483 * [42] ETag ::= '</' Name S? '>'
6484 *
6485 * With namespace
6486 *
6487 * [NS 9] ETag ::= '</' QName S? '>'
6488 */
6489
6490void
6491xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6492 xmlChar *name;
6493 xmlChar *oldname;
6494
6495 GROW;
6496 if ((RAW != '<') || (NXT(1) != '/')) {
6497 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6499 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6500 ctxt->wellFormed = 0;
6501 ctxt->disableSAX = 1;
6502 return;
6503 }
6504 SKIP(2);
6505
6506 name = xmlParseName(ctxt);
6507
6508 /*
6509 * We should definitely be at the ending "S? '>'" part
6510 */
6511 GROW;
6512 SKIP_BLANKS;
6513 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6514 ctxt->errNo = XML_ERR_GT_REQUIRED;
6515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6516 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6517 ctxt->wellFormed = 0;
6518 ctxt->disableSAX = 1;
6519 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006520 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006521
6522 /*
6523 * [ WFC: Element Type Match ]
6524 * The Name in an element's end-tag must match the element type in the
6525 * start-tag.
6526 *
6527 */
6528 if ((name == NULL) || (ctxt->name == NULL) ||
6529 (!xmlStrEqual(name, ctxt->name))) {
6530 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6532 if ((name != NULL) && (ctxt->name != NULL)) {
6533 ctxt->sax->error(ctxt->userData,
6534 "Opening and ending tag mismatch: %s and %s\n",
6535 ctxt->name, name);
6536 } else if (ctxt->name != NULL) {
6537 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006538 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006539 } else {
6540 ctxt->sax->error(ctxt->userData,
6541 "Ending tag error: internal error ???\n");
6542 }
6543
6544 }
6545 ctxt->wellFormed = 0;
6546 ctxt->disableSAX = 1;
6547 }
6548
6549 /*
6550 * SAX: End of Tag
6551 */
6552 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6553 (!ctxt->disableSAX))
6554 ctxt->sax->endElement(ctxt->userData, name);
6555
6556 if (name != NULL)
6557 xmlFree(name);
6558 oldname = namePop(ctxt);
6559 spacePop(ctxt);
6560 if (oldname != NULL) {
6561#ifdef DEBUG_STACK
6562 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6563#endif
6564 xmlFree(oldname);
6565 }
6566 return;
6567}
6568
6569/**
6570 * xmlParseCDSect:
6571 * @ctxt: an XML parser context
6572 *
6573 * Parse escaped pure raw content.
6574 *
6575 * [18] CDSect ::= CDStart CData CDEnd
6576 *
6577 * [19] CDStart ::= '<![CDATA['
6578 *
6579 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6580 *
6581 * [21] CDEnd ::= ']]>'
6582 */
6583void
6584xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6585 xmlChar *buf = NULL;
6586 int len = 0;
6587 int size = XML_PARSER_BUFFER_SIZE;
6588 int r, rl;
6589 int s, sl;
6590 int cur, l;
6591 int count = 0;
6592
6593 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6594 (NXT(2) == '[') && (NXT(3) == 'C') &&
6595 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6596 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6597 (NXT(8) == '[')) {
6598 SKIP(9);
6599 } else
6600 return;
6601
6602 ctxt->instate = XML_PARSER_CDATA_SECTION;
6603 r = CUR_CHAR(rl);
6604 if (!IS_CHAR(r)) {
6605 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6607 ctxt->sax->error(ctxt->userData,
6608 "CData section not finished\n");
6609 ctxt->wellFormed = 0;
6610 ctxt->disableSAX = 1;
6611 ctxt->instate = XML_PARSER_CONTENT;
6612 return;
6613 }
6614 NEXTL(rl);
6615 s = CUR_CHAR(sl);
6616 if (!IS_CHAR(s)) {
6617 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6619 ctxt->sax->error(ctxt->userData,
6620 "CData section not finished\n");
6621 ctxt->wellFormed = 0;
6622 ctxt->disableSAX = 1;
6623 ctxt->instate = XML_PARSER_CONTENT;
6624 return;
6625 }
6626 NEXTL(sl);
6627 cur = CUR_CHAR(l);
6628 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6629 if (buf == NULL) {
6630 xmlGenericError(xmlGenericErrorContext,
6631 "malloc of %d byte failed\n", size);
6632 return;
6633 }
6634 while (IS_CHAR(cur) &&
6635 ((r != ']') || (s != ']') || (cur != '>'))) {
6636 if (len + 5 >= size) {
6637 size *= 2;
6638 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6639 if (buf == NULL) {
6640 xmlGenericError(xmlGenericErrorContext,
6641 "realloc of %d byte failed\n", size);
6642 return;
6643 }
6644 }
6645 COPY_BUF(rl,buf,len,r);
6646 r = s;
6647 rl = sl;
6648 s = cur;
6649 sl = l;
6650 count++;
6651 if (count > 50) {
6652 GROW;
6653 count = 0;
6654 }
6655 NEXTL(l);
6656 cur = CUR_CHAR(l);
6657 }
6658 buf[len] = 0;
6659 ctxt->instate = XML_PARSER_CONTENT;
6660 if (cur != '>') {
6661 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6663 ctxt->sax->error(ctxt->userData,
6664 "CData section not finished\n%.50s\n", buf);
6665 ctxt->wellFormed = 0;
6666 ctxt->disableSAX = 1;
6667 xmlFree(buf);
6668 return;
6669 }
6670 NEXTL(l);
6671
6672 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006673 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006674 */
6675 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6676 if (ctxt->sax->cdataBlock != NULL)
6677 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006678 else if (ctxt->sax->characters != NULL)
6679 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006680 }
6681 xmlFree(buf);
6682}
6683
6684/**
6685 * xmlParseContent:
6686 * @ctxt: an XML parser context
6687 *
6688 * Parse a content:
6689 *
6690 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6691 */
6692
6693void
6694xmlParseContent(xmlParserCtxtPtr ctxt) {
6695 GROW;
6696 while (((RAW != 0) || (ctxt->token != 0)) &&
6697 ((RAW != '<') || (NXT(1) != '/'))) {
6698 const xmlChar *test = CUR_PTR;
6699 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006700 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006701 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006702
6703 /*
6704 * Handle possible processed charrefs.
6705 */
6706 if (ctxt->token != 0) {
6707 xmlParseCharData(ctxt, 0);
6708 }
6709 /*
6710 * First case : a Processing Instruction.
6711 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006712 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006713 xmlParsePI(ctxt);
6714 }
6715
6716 /*
6717 * Second case : a CDSection
6718 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006719 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006720 (NXT(2) == '[') && (NXT(3) == 'C') &&
6721 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6722 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6723 (NXT(8) == '[')) {
6724 xmlParseCDSect(ctxt);
6725 }
6726
6727 /*
6728 * Third case : a comment
6729 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006730 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006731 (NXT(2) == '-') && (NXT(3) == '-')) {
6732 xmlParseComment(ctxt);
6733 ctxt->instate = XML_PARSER_CONTENT;
6734 }
6735
6736 /*
6737 * Fourth case : a sub-element.
6738 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006739 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006740 xmlParseElement(ctxt);
6741 }
6742
6743 /*
6744 * Fifth case : a reference. If if has not been resolved,
6745 * parsing returns it's Name, create the node
6746 */
6747
Daniel Veillard21a0f912001-02-25 19:54:14 +00006748 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006749 xmlParseReference(ctxt);
6750 }
6751
6752 /*
6753 * Last case, text. Note that References are handled directly.
6754 */
6755 else {
6756 xmlParseCharData(ctxt, 0);
6757 }
6758
6759 GROW;
6760 /*
6761 * Pop-up of finished entities.
6762 */
6763 while ((RAW == 0) && (ctxt->inputNr > 1))
6764 xmlPopInput(ctxt);
6765 SHRINK;
6766
6767 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6768 (tok == ctxt->token)) {
6769 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6771 ctxt->sax->error(ctxt->userData,
6772 "detected an error in element content\n");
6773 ctxt->wellFormed = 0;
6774 ctxt->disableSAX = 1;
6775 ctxt->instate = XML_PARSER_EOF;
6776 break;
6777 }
6778 }
6779}
6780
6781/**
6782 * xmlParseElement:
6783 * @ctxt: an XML parser context
6784 *
6785 * parse an XML element, this is highly recursive
6786 *
6787 * [39] element ::= EmptyElemTag | STag content ETag
6788 *
6789 * [ WFC: Element Type Match ]
6790 * The Name in an element's end-tag must match the element type in the
6791 * start-tag.
6792 *
6793 * [ VC: Element Valid ]
6794 * An element is valid if there is a declaration matching elementdecl
6795 * where the Name matches the element type and one of the following holds:
6796 * - The declaration matches EMPTY and the element has no content.
6797 * - The declaration matches children and the sequence of child elements
6798 * belongs to the language generated by the regular expression in the
6799 * content model, with optional white space (characters matching the
6800 * nonterminal S) between each pair of child elements.
6801 * - The declaration matches Mixed and the content consists of character
6802 * data and child elements whose types match names in the content model.
6803 * - The declaration matches ANY, and the types of any child elements have
6804 * been declared.
6805 */
6806
6807void
6808xmlParseElement(xmlParserCtxtPtr ctxt) {
6809 const xmlChar *openTag = CUR_PTR;
6810 xmlChar *name;
6811 xmlChar *oldname;
6812 xmlParserNodeInfo node_info;
6813 xmlNodePtr ret;
6814
6815 /* Capture start position */
6816 if (ctxt->record_info) {
6817 node_info.begin_pos = ctxt->input->consumed +
6818 (CUR_PTR - ctxt->input->base);
6819 node_info.begin_line = ctxt->input->line;
6820 }
6821
6822 if (ctxt->spaceNr == 0)
6823 spacePush(ctxt, -1);
6824 else
6825 spacePush(ctxt, *ctxt->space);
6826
6827 name = xmlParseStartTag(ctxt);
6828 if (name == NULL) {
6829 spacePop(ctxt);
6830 return;
6831 }
6832 namePush(ctxt, name);
6833 ret = ctxt->node;
6834
6835 /*
6836 * [ VC: Root Element Type ]
6837 * The Name in the document type declaration must match the element
6838 * type of the root element.
6839 */
6840 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6841 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6842 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6843
6844 /*
6845 * Check for an Empty Element.
6846 */
6847 if ((RAW == '/') && (NXT(1) == '>')) {
6848 SKIP(2);
6849 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6850 (!ctxt->disableSAX))
6851 ctxt->sax->endElement(ctxt->userData, name);
6852 oldname = namePop(ctxt);
6853 spacePop(ctxt);
6854 if (oldname != NULL) {
6855#ifdef DEBUG_STACK
6856 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6857#endif
6858 xmlFree(oldname);
6859 }
6860 if ( ret != NULL && ctxt->record_info ) {
6861 node_info.end_pos = ctxt->input->consumed +
6862 (CUR_PTR - ctxt->input->base);
6863 node_info.end_line = ctxt->input->line;
6864 node_info.node = ret;
6865 xmlParserAddNodeInfo(ctxt, &node_info);
6866 }
6867 return;
6868 }
6869 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006870 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006871 } else {
6872 ctxt->errNo = XML_ERR_GT_REQUIRED;
6873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6874 ctxt->sax->error(ctxt->userData,
6875 "Couldn't find end of Start Tag\n%.30s\n",
6876 openTag);
6877 ctxt->wellFormed = 0;
6878 ctxt->disableSAX = 1;
6879
6880 /*
6881 * end of parsing of this node.
6882 */
6883 nodePop(ctxt);
6884 oldname = namePop(ctxt);
6885 spacePop(ctxt);
6886 if (oldname != NULL) {
6887#ifdef DEBUG_STACK
6888 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6889#endif
6890 xmlFree(oldname);
6891 }
6892
6893 /*
6894 * Capture end position and add node
6895 */
6896 if ( ret != NULL && ctxt->record_info ) {
6897 node_info.end_pos = ctxt->input->consumed +
6898 (CUR_PTR - ctxt->input->base);
6899 node_info.end_line = ctxt->input->line;
6900 node_info.node = ret;
6901 xmlParserAddNodeInfo(ctxt, &node_info);
6902 }
6903 return;
6904 }
6905
6906 /*
6907 * Parse the content of the element:
6908 */
6909 xmlParseContent(ctxt);
6910 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00006911 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00006912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6913 ctxt->sax->error(ctxt->userData,
6914 "Premature end of data in tag %.30s\n", openTag);
6915 ctxt->wellFormed = 0;
6916 ctxt->disableSAX = 1;
6917
6918 /*
6919 * end of parsing of this node.
6920 */
6921 nodePop(ctxt);
6922 oldname = namePop(ctxt);
6923 spacePop(ctxt);
6924 if (oldname != NULL) {
6925#ifdef DEBUG_STACK
6926 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6927#endif
6928 xmlFree(oldname);
6929 }
6930 return;
6931 }
6932
6933 /*
6934 * parse the end of tag: '</' should be here.
6935 */
6936 xmlParseEndTag(ctxt);
6937
6938 /*
6939 * Capture end position and add node
6940 */
6941 if ( ret != NULL && ctxt->record_info ) {
6942 node_info.end_pos = ctxt->input->consumed +
6943 (CUR_PTR - ctxt->input->base);
6944 node_info.end_line = ctxt->input->line;
6945 node_info.node = ret;
6946 xmlParserAddNodeInfo(ctxt, &node_info);
6947 }
6948}
6949
6950/**
6951 * xmlParseVersionNum:
6952 * @ctxt: an XML parser context
6953 *
6954 * parse the XML version value.
6955 *
6956 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6957 *
6958 * Returns the string giving the XML version number, or NULL
6959 */
6960xmlChar *
6961xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6962 xmlChar *buf = NULL;
6963 int len = 0;
6964 int size = 10;
6965 xmlChar cur;
6966
6967 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6968 if (buf == NULL) {
6969 xmlGenericError(xmlGenericErrorContext,
6970 "malloc of %d byte failed\n", size);
6971 return(NULL);
6972 }
6973 cur = CUR;
6974 while (((cur >= 'a') && (cur <= 'z')) ||
6975 ((cur >= 'A') && (cur <= 'Z')) ||
6976 ((cur >= '0') && (cur <= '9')) ||
6977 (cur == '_') || (cur == '.') ||
6978 (cur == ':') || (cur == '-')) {
6979 if (len + 1 >= size) {
6980 size *= 2;
6981 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6982 if (buf == NULL) {
6983 xmlGenericError(xmlGenericErrorContext,
6984 "realloc of %d byte failed\n", size);
6985 return(NULL);
6986 }
6987 }
6988 buf[len++] = cur;
6989 NEXT;
6990 cur=CUR;
6991 }
6992 buf[len] = 0;
6993 return(buf);
6994}
6995
6996/**
6997 * xmlParseVersionInfo:
6998 * @ctxt: an XML parser context
6999 *
7000 * parse the XML version.
7001 *
7002 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7003 *
7004 * [25] Eq ::= S? '=' S?
7005 *
7006 * Returns the version string, e.g. "1.0"
7007 */
7008
7009xmlChar *
7010xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7011 xmlChar *version = NULL;
7012 const xmlChar *q;
7013
7014 if ((RAW == 'v') && (NXT(1) == 'e') &&
7015 (NXT(2) == 'r') && (NXT(3) == 's') &&
7016 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7017 (NXT(6) == 'n')) {
7018 SKIP(7);
7019 SKIP_BLANKS;
7020 if (RAW != '=') {
7021 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7023 ctxt->sax->error(ctxt->userData,
7024 "xmlParseVersionInfo : expected '='\n");
7025 ctxt->wellFormed = 0;
7026 ctxt->disableSAX = 1;
7027 return(NULL);
7028 }
7029 NEXT;
7030 SKIP_BLANKS;
7031 if (RAW == '"') {
7032 NEXT;
7033 q = CUR_PTR;
7034 version = xmlParseVersionNum(ctxt);
7035 if (RAW != '"') {
7036 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7038 ctxt->sax->error(ctxt->userData,
7039 "String not closed\n%.50s\n", q);
7040 ctxt->wellFormed = 0;
7041 ctxt->disableSAX = 1;
7042 } else
7043 NEXT;
7044 } else if (RAW == '\''){
7045 NEXT;
7046 q = CUR_PTR;
7047 version = xmlParseVersionNum(ctxt);
7048 if (RAW != '\'') {
7049 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7051 ctxt->sax->error(ctxt->userData,
7052 "String not closed\n%.50s\n", q);
7053 ctxt->wellFormed = 0;
7054 ctxt->disableSAX = 1;
7055 } else
7056 NEXT;
7057 } else {
7058 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7060 ctxt->sax->error(ctxt->userData,
7061 "xmlParseVersionInfo : expected ' or \"\n");
7062 ctxt->wellFormed = 0;
7063 ctxt->disableSAX = 1;
7064 }
7065 }
7066 return(version);
7067}
7068
7069/**
7070 * xmlParseEncName:
7071 * @ctxt: an XML parser context
7072 *
7073 * parse the XML encoding name
7074 *
7075 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7076 *
7077 * Returns the encoding name value or NULL
7078 */
7079xmlChar *
7080xmlParseEncName(xmlParserCtxtPtr ctxt) {
7081 xmlChar *buf = NULL;
7082 int len = 0;
7083 int size = 10;
7084 xmlChar cur;
7085
7086 cur = CUR;
7087 if (((cur >= 'a') && (cur <= 'z')) ||
7088 ((cur >= 'A') && (cur <= 'Z'))) {
7089 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7090 if (buf == NULL) {
7091 xmlGenericError(xmlGenericErrorContext,
7092 "malloc of %d byte failed\n", size);
7093 return(NULL);
7094 }
7095
7096 buf[len++] = cur;
7097 NEXT;
7098 cur = CUR;
7099 while (((cur >= 'a') && (cur <= 'z')) ||
7100 ((cur >= 'A') && (cur <= 'Z')) ||
7101 ((cur >= '0') && (cur <= '9')) ||
7102 (cur == '.') || (cur == '_') ||
7103 (cur == '-')) {
7104 if (len + 1 >= size) {
7105 size *= 2;
7106 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7107 if (buf == NULL) {
7108 xmlGenericError(xmlGenericErrorContext,
7109 "realloc of %d byte failed\n", size);
7110 return(NULL);
7111 }
7112 }
7113 buf[len++] = cur;
7114 NEXT;
7115 cur = CUR;
7116 if (cur == 0) {
7117 SHRINK;
7118 GROW;
7119 cur = CUR;
7120 }
7121 }
7122 buf[len] = 0;
7123 } else {
7124 ctxt->errNo = XML_ERR_ENCODING_NAME;
7125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7126 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7127 ctxt->wellFormed = 0;
7128 ctxt->disableSAX = 1;
7129 }
7130 return(buf);
7131}
7132
7133/**
7134 * xmlParseEncodingDecl:
7135 * @ctxt: an XML parser context
7136 *
7137 * parse the XML encoding declaration
7138 *
7139 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7140 *
7141 * this setups the conversion filters.
7142 *
7143 * Returns the encoding value or NULL
7144 */
7145
7146xmlChar *
7147xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7148 xmlChar *encoding = NULL;
7149 const xmlChar *q;
7150
7151 SKIP_BLANKS;
7152 if ((RAW == 'e') && (NXT(1) == 'n') &&
7153 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7154 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7155 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7156 SKIP(8);
7157 SKIP_BLANKS;
7158 if (RAW != '=') {
7159 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7161 ctxt->sax->error(ctxt->userData,
7162 "xmlParseEncodingDecl : expected '='\n");
7163 ctxt->wellFormed = 0;
7164 ctxt->disableSAX = 1;
7165 return(NULL);
7166 }
7167 NEXT;
7168 SKIP_BLANKS;
7169 if (RAW == '"') {
7170 NEXT;
7171 q = CUR_PTR;
7172 encoding = xmlParseEncName(ctxt);
7173 if (RAW != '"') {
7174 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7176 ctxt->sax->error(ctxt->userData,
7177 "String not closed\n%.50s\n", q);
7178 ctxt->wellFormed = 0;
7179 ctxt->disableSAX = 1;
7180 } else
7181 NEXT;
7182 } else if (RAW == '\''){
7183 NEXT;
7184 q = CUR_PTR;
7185 encoding = xmlParseEncName(ctxt);
7186 if (RAW != '\'') {
7187 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7189 ctxt->sax->error(ctxt->userData,
7190 "String not closed\n%.50s\n", q);
7191 ctxt->wellFormed = 0;
7192 ctxt->disableSAX = 1;
7193 } else
7194 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007195 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007196 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7198 ctxt->sax->error(ctxt->userData,
7199 "xmlParseEncodingDecl : expected ' or \"\n");
7200 ctxt->wellFormed = 0;
7201 ctxt->disableSAX = 1;
7202 }
7203 if (encoding != NULL) {
7204 xmlCharEncoding enc;
7205 xmlCharEncodingHandlerPtr handler;
7206
7207 if (ctxt->input->encoding != NULL)
7208 xmlFree((xmlChar *) ctxt->input->encoding);
7209 ctxt->input->encoding = encoding;
7210
7211 enc = xmlParseCharEncoding((const char *) encoding);
7212 /*
7213 * registered set of known encodings
7214 */
7215 if (enc != XML_CHAR_ENCODING_ERROR) {
7216 xmlSwitchEncoding(ctxt, enc);
7217 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7218 xmlFree(encoding);
7219 return(NULL);
7220 }
7221 } else {
7222 /*
7223 * fallback for unknown encodings
7224 */
7225 handler = xmlFindCharEncodingHandler((const char *) encoding);
7226 if (handler != NULL) {
7227 xmlSwitchToEncoding(ctxt, handler);
7228 } else {
7229 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7231 ctxt->sax->error(ctxt->userData,
7232 "Unsupported encoding %s\n", encoding);
7233 return(NULL);
7234 }
7235 }
7236 }
7237 }
7238 return(encoding);
7239}
7240
7241/**
7242 * xmlParseSDDecl:
7243 * @ctxt: an XML parser context
7244 *
7245 * parse the XML standalone declaration
7246 *
7247 * [32] SDDecl ::= S 'standalone' Eq
7248 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7249 *
7250 * [ VC: Standalone Document Declaration ]
7251 * TODO The standalone document declaration must have the value "no"
7252 * if any external markup declarations contain declarations of:
7253 * - attributes with default values, if elements to which these
7254 * attributes apply appear in the document without specifications
7255 * of values for these attributes, or
7256 * - entities (other than amp, lt, gt, apos, quot), if references
7257 * to those entities appear in the document, or
7258 * - attributes with values subject to normalization, where the
7259 * attribute appears in the document with a value which will change
7260 * as a result of normalization, or
7261 * - element types with element content, if white space occurs directly
7262 * within any instance of those types.
7263 *
7264 * Returns 1 if standalone, 0 otherwise
7265 */
7266
7267int
7268xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7269 int standalone = -1;
7270
7271 SKIP_BLANKS;
7272 if ((RAW == 's') && (NXT(1) == 't') &&
7273 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7274 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7275 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7276 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7277 SKIP(10);
7278 SKIP_BLANKS;
7279 if (RAW != '=') {
7280 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7281 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7282 ctxt->sax->error(ctxt->userData,
7283 "XML standalone declaration : expected '='\n");
7284 ctxt->wellFormed = 0;
7285 ctxt->disableSAX = 1;
7286 return(standalone);
7287 }
7288 NEXT;
7289 SKIP_BLANKS;
7290 if (RAW == '\''){
7291 NEXT;
7292 if ((RAW == 'n') && (NXT(1) == 'o')) {
7293 standalone = 0;
7294 SKIP(2);
7295 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7296 (NXT(2) == 's')) {
7297 standalone = 1;
7298 SKIP(3);
7299 } else {
7300 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7302 ctxt->sax->error(ctxt->userData,
7303 "standalone accepts only 'yes' or 'no'\n");
7304 ctxt->wellFormed = 0;
7305 ctxt->disableSAX = 1;
7306 }
7307 if (RAW != '\'') {
7308 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7310 ctxt->sax->error(ctxt->userData, "String not closed\n");
7311 ctxt->wellFormed = 0;
7312 ctxt->disableSAX = 1;
7313 } else
7314 NEXT;
7315 } else if (RAW == '"'){
7316 NEXT;
7317 if ((RAW == 'n') && (NXT(1) == 'o')) {
7318 standalone = 0;
7319 SKIP(2);
7320 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7321 (NXT(2) == 's')) {
7322 standalone = 1;
7323 SKIP(3);
7324 } else {
7325 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7327 ctxt->sax->error(ctxt->userData,
7328 "standalone accepts only 'yes' or 'no'\n");
7329 ctxt->wellFormed = 0;
7330 ctxt->disableSAX = 1;
7331 }
7332 if (RAW != '"') {
7333 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7335 ctxt->sax->error(ctxt->userData, "String not closed\n");
7336 ctxt->wellFormed = 0;
7337 ctxt->disableSAX = 1;
7338 } else
7339 NEXT;
7340 } else {
7341 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7343 ctxt->sax->error(ctxt->userData,
7344 "Standalone value not found\n");
7345 ctxt->wellFormed = 0;
7346 ctxt->disableSAX = 1;
7347 }
7348 }
7349 return(standalone);
7350}
7351
7352/**
7353 * xmlParseXMLDecl:
7354 * @ctxt: an XML parser context
7355 *
7356 * parse an XML declaration header
7357 *
7358 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7359 */
7360
7361void
7362xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7363 xmlChar *version;
7364
7365 /*
7366 * We know that '<?xml' is here.
7367 */
7368 SKIP(5);
7369
7370 if (!IS_BLANK(RAW)) {
7371 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7373 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7374 ctxt->wellFormed = 0;
7375 ctxt->disableSAX = 1;
7376 }
7377 SKIP_BLANKS;
7378
7379 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007380 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007381 */
7382 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007383 if (version == NULL) {
7384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7385 ctxt->sax->error(ctxt->userData,
7386 "Malformed declaration expecting version\n");
7387 ctxt->wellFormed = 0;
7388 ctxt->disableSAX = 1;
7389 } else {
7390 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7391 /*
7392 * TODO: Blueberry should be detected here
7393 */
7394 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7395 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7396 version);
7397 }
7398 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007399 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007400 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007401 }
Owen Taylor3473f882001-02-23 17:55:21 +00007402
7403 /*
7404 * We may have the encoding declaration
7405 */
7406 if (!IS_BLANK(RAW)) {
7407 if ((RAW == '?') && (NXT(1) == '>')) {
7408 SKIP(2);
7409 return;
7410 }
7411 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7413 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7414 ctxt->wellFormed = 0;
7415 ctxt->disableSAX = 1;
7416 }
7417 xmlParseEncodingDecl(ctxt);
7418 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7419 /*
7420 * The XML REC instructs us to stop parsing right here
7421 */
7422 return;
7423 }
7424
7425 /*
7426 * We may have the standalone status.
7427 */
7428 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7429 if ((RAW == '?') && (NXT(1) == '>')) {
7430 SKIP(2);
7431 return;
7432 }
7433 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7435 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7436 ctxt->wellFormed = 0;
7437 ctxt->disableSAX = 1;
7438 }
7439 SKIP_BLANKS;
7440 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7441
7442 SKIP_BLANKS;
7443 if ((RAW == '?') && (NXT(1) == '>')) {
7444 SKIP(2);
7445 } else if (RAW == '>') {
7446 /* Deprecated old WD ... */
7447 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7448 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7449 ctxt->sax->error(ctxt->userData,
7450 "XML declaration must end-up with '?>'\n");
7451 ctxt->wellFormed = 0;
7452 ctxt->disableSAX = 1;
7453 NEXT;
7454 } else {
7455 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7457 ctxt->sax->error(ctxt->userData,
7458 "parsing XML declaration: '?>' expected\n");
7459 ctxt->wellFormed = 0;
7460 ctxt->disableSAX = 1;
7461 MOVETO_ENDTAG(CUR_PTR);
7462 NEXT;
7463 }
7464}
7465
7466/**
7467 * xmlParseMisc:
7468 * @ctxt: an XML parser context
7469 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007470 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007471 *
7472 * [27] Misc ::= Comment | PI | S
7473 */
7474
7475void
7476xmlParseMisc(xmlParserCtxtPtr ctxt) {
7477 while (((RAW == '<') && (NXT(1) == '?')) ||
7478 ((RAW == '<') && (NXT(1) == '!') &&
7479 (NXT(2) == '-') && (NXT(3) == '-')) ||
7480 IS_BLANK(CUR)) {
7481 if ((RAW == '<') && (NXT(1) == '?')) {
7482 xmlParsePI(ctxt);
7483 } else if (IS_BLANK(CUR)) {
7484 NEXT;
7485 } else
7486 xmlParseComment(ctxt);
7487 }
7488}
7489
7490/**
7491 * xmlParseDocument:
7492 * @ctxt: an XML parser context
7493 *
7494 * parse an XML document (and build a tree if using the standard SAX
7495 * interface).
7496 *
7497 * [1] document ::= prolog element Misc*
7498 *
7499 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7500 *
7501 * Returns 0, -1 in case of error. the parser context is augmented
7502 * as a result of the parsing.
7503 */
7504
7505int
7506xmlParseDocument(xmlParserCtxtPtr ctxt) {
7507 xmlChar start[4];
7508 xmlCharEncoding enc;
7509
7510 xmlInitParser();
7511
7512 GROW;
7513
7514 /*
7515 * SAX: beginning of the document processing.
7516 */
7517 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7518 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7519
Daniel Veillard50f34372001-08-03 12:06:36 +00007520 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007521 /*
7522 * Get the 4 first bytes and decode the charset
7523 * if enc != XML_CHAR_ENCODING_NONE
7524 * plug some encoding conversion routines.
7525 */
7526 start[0] = RAW;
7527 start[1] = NXT(1);
7528 start[2] = NXT(2);
7529 start[3] = NXT(3);
7530 enc = xmlDetectCharEncoding(start, 4);
7531 if (enc != XML_CHAR_ENCODING_NONE) {
7532 xmlSwitchEncoding(ctxt, enc);
7533 }
Owen Taylor3473f882001-02-23 17:55:21 +00007534 }
7535
7536
7537 if (CUR == 0) {
7538 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7539 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7540 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7541 ctxt->wellFormed = 0;
7542 ctxt->disableSAX = 1;
7543 }
7544
7545 /*
7546 * Check for the XMLDecl in the Prolog.
7547 */
7548 GROW;
7549 if ((RAW == '<') && (NXT(1) == '?') &&
7550 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7551 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7552
7553 /*
7554 * Note that we will switch encoding on the fly.
7555 */
7556 xmlParseXMLDecl(ctxt);
7557 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7558 /*
7559 * The XML REC instructs us to stop parsing right here
7560 */
7561 return(-1);
7562 }
7563 ctxt->standalone = ctxt->input->standalone;
7564 SKIP_BLANKS;
7565 } else {
7566 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7567 }
7568 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7569 ctxt->sax->startDocument(ctxt->userData);
7570
7571 /*
7572 * The Misc part of the Prolog
7573 */
7574 GROW;
7575 xmlParseMisc(ctxt);
7576
7577 /*
7578 * Then possibly doc type declaration(s) and more Misc
7579 * (doctypedecl Misc*)?
7580 */
7581 GROW;
7582 if ((RAW == '<') && (NXT(1) == '!') &&
7583 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7584 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7585 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7586 (NXT(8) == 'E')) {
7587
7588 ctxt->inSubset = 1;
7589 xmlParseDocTypeDecl(ctxt);
7590 if (RAW == '[') {
7591 ctxt->instate = XML_PARSER_DTD;
7592 xmlParseInternalSubset(ctxt);
7593 }
7594
7595 /*
7596 * Create and update the external subset.
7597 */
7598 ctxt->inSubset = 2;
7599 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7600 (!ctxt->disableSAX))
7601 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7602 ctxt->extSubSystem, ctxt->extSubURI);
7603 ctxt->inSubset = 0;
7604
7605
7606 ctxt->instate = XML_PARSER_PROLOG;
7607 xmlParseMisc(ctxt);
7608 }
7609
7610 /*
7611 * Time to start parsing the tree itself
7612 */
7613 GROW;
7614 if (RAW != '<') {
7615 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7617 ctxt->sax->error(ctxt->userData,
7618 "Start tag expected, '<' not found\n");
7619 ctxt->wellFormed = 0;
7620 ctxt->disableSAX = 1;
7621 ctxt->instate = XML_PARSER_EOF;
7622 } else {
7623 ctxt->instate = XML_PARSER_CONTENT;
7624 xmlParseElement(ctxt);
7625 ctxt->instate = XML_PARSER_EPILOG;
7626
7627
7628 /*
7629 * The Misc part at the end
7630 */
7631 xmlParseMisc(ctxt);
7632
7633 if (RAW != 0) {
7634 ctxt->errNo = XML_ERR_DOCUMENT_END;
7635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7636 ctxt->sax->error(ctxt->userData,
7637 "Extra content at the end of the document\n");
7638 ctxt->wellFormed = 0;
7639 ctxt->disableSAX = 1;
7640 }
7641 ctxt->instate = XML_PARSER_EOF;
7642 }
7643
7644 /*
7645 * SAX: end of the document processing.
7646 */
7647 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7648 (!ctxt->disableSAX))
7649 ctxt->sax->endDocument(ctxt->userData);
7650
Daniel Veillardc7612992002-02-17 22:47:37 +00007651 if (! ctxt->wellFormed) {
7652 ctxt->valid = 0;
7653 return(-1);
7654 }
Owen Taylor3473f882001-02-23 17:55:21 +00007655 return(0);
7656}
7657
7658/**
7659 * xmlParseExtParsedEnt:
7660 * @ctxt: an XML parser context
7661 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007662 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007663 * An external general parsed entity is well-formed if it matches the
7664 * production labeled extParsedEnt.
7665 *
7666 * [78] extParsedEnt ::= TextDecl? content
7667 *
7668 * Returns 0, -1 in case of error. the parser context is augmented
7669 * as a result of the parsing.
7670 */
7671
7672int
7673xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7674 xmlChar start[4];
7675 xmlCharEncoding enc;
7676
7677 xmlDefaultSAXHandlerInit();
7678
7679 GROW;
7680
7681 /*
7682 * SAX: beginning of the document processing.
7683 */
7684 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7685 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7686
7687 /*
7688 * Get the 4 first bytes and decode the charset
7689 * if enc != XML_CHAR_ENCODING_NONE
7690 * plug some encoding conversion routines.
7691 */
7692 start[0] = RAW;
7693 start[1] = NXT(1);
7694 start[2] = NXT(2);
7695 start[3] = NXT(3);
7696 enc = xmlDetectCharEncoding(start, 4);
7697 if (enc != XML_CHAR_ENCODING_NONE) {
7698 xmlSwitchEncoding(ctxt, enc);
7699 }
7700
7701
7702 if (CUR == 0) {
7703 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7705 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7706 ctxt->wellFormed = 0;
7707 ctxt->disableSAX = 1;
7708 }
7709
7710 /*
7711 * Check for the XMLDecl in the Prolog.
7712 */
7713 GROW;
7714 if ((RAW == '<') && (NXT(1) == '?') &&
7715 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7716 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7717
7718 /*
7719 * Note that we will switch encoding on the fly.
7720 */
7721 xmlParseXMLDecl(ctxt);
7722 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7723 /*
7724 * The XML REC instructs us to stop parsing right here
7725 */
7726 return(-1);
7727 }
7728 SKIP_BLANKS;
7729 } else {
7730 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7731 }
7732 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7733 ctxt->sax->startDocument(ctxt->userData);
7734
7735 /*
7736 * Doing validity checking on chunk doesn't make sense
7737 */
7738 ctxt->instate = XML_PARSER_CONTENT;
7739 ctxt->validate = 0;
7740 ctxt->loadsubset = 0;
7741 ctxt->depth = 0;
7742
7743 xmlParseContent(ctxt);
7744
7745 if ((RAW == '<') && (NXT(1) == '/')) {
7746 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7748 ctxt->sax->error(ctxt->userData,
7749 "chunk is not well balanced\n");
7750 ctxt->wellFormed = 0;
7751 ctxt->disableSAX = 1;
7752 } else if (RAW != 0) {
7753 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7755 ctxt->sax->error(ctxt->userData,
7756 "extra content at the end of well balanced chunk\n");
7757 ctxt->wellFormed = 0;
7758 ctxt->disableSAX = 1;
7759 }
7760
7761 /*
7762 * SAX: end of the document processing.
7763 */
7764 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7765 (!ctxt->disableSAX))
7766 ctxt->sax->endDocument(ctxt->userData);
7767
7768 if (! ctxt->wellFormed) return(-1);
7769 return(0);
7770}
7771
7772/************************************************************************
7773 * *
7774 * Progressive parsing interfaces *
7775 * *
7776 ************************************************************************/
7777
7778/**
7779 * xmlParseLookupSequence:
7780 * @ctxt: an XML parser context
7781 * @first: the first char to lookup
7782 * @next: the next char to lookup or zero
7783 * @third: the next char to lookup or zero
7784 *
7785 * Try to find if a sequence (first, next, third) or just (first next) or
7786 * (first) is available in the input stream.
7787 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7788 * to avoid rescanning sequences of bytes, it DOES change the state of the
7789 * parser, do not use liberally.
7790 *
7791 * Returns the index to the current parsing point if the full sequence
7792 * is available, -1 otherwise.
7793 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007794static int
Owen Taylor3473f882001-02-23 17:55:21 +00007795xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7796 xmlChar next, xmlChar third) {
7797 int base, len;
7798 xmlParserInputPtr in;
7799 const xmlChar *buf;
7800
7801 in = ctxt->input;
7802 if (in == NULL) return(-1);
7803 base = in->cur - in->base;
7804 if (base < 0) return(-1);
7805 if (ctxt->checkIndex > base)
7806 base = ctxt->checkIndex;
7807 if (in->buf == NULL) {
7808 buf = in->base;
7809 len = in->length;
7810 } else {
7811 buf = in->buf->buffer->content;
7812 len = in->buf->buffer->use;
7813 }
7814 /* take into account the sequence length */
7815 if (third) len -= 2;
7816 else if (next) len --;
7817 for (;base < len;base++) {
7818 if (buf[base] == first) {
7819 if (third != 0) {
7820 if ((buf[base + 1] != next) ||
7821 (buf[base + 2] != third)) continue;
7822 } else if (next != 0) {
7823 if (buf[base + 1] != next) continue;
7824 }
7825 ctxt->checkIndex = 0;
7826#ifdef DEBUG_PUSH
7827 if (next == 0)
7828 xmlGenericError(xmlGenericErrorContext,
7829 "PP: lookup '%c' found at %d\n",
7830 first, base);
7831 else if (third == 0)
7832 xmlGenericError(xmlGenericErrorContext,
7833 "PP: lookup '%c%c' found at %d\n",
7834 first, next, base);
7835 else
7836 xmlGenericError(xmlGenericErrorContext,
7837 "PP: lookup '%c%c%c' found at %d\n",
7838 first, next, third, base);
7839#endif
7840 return(base - (in->cur - in->base));
7841 }
7842 }
7843 ctxt->checkIndex = base;
7844#ifdef DEBUG_PUSH
7845 if (next == 0)
7846 xmlGenericError(xmlGenericErrorContext,
7847 "PP: lookup '%c' failed\n", first);
7848 else if (third == 0)
7849 xmlGenericError(xmlGenericErrorContext,
7850 "PP: lookup '%c%c' failed\n", first, next);
7851 else
7852 xmlGenericError(xmlGenericErrorContext,
7853 "PP: lookup '%c%c%c' failed\n", first, next, third);
7854#endif
7855 return(-1);
7856}
7857
7858/**
7859 * xmlParseTryOrFinish:
7860 * @ctxt: an XML parser context
7861 * @terminate: last chunk indicator
7862 *
7863 * Try to progress on parsing
7864 *
7865 * Returns zero if no parsing was possible
7866 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007867static int
Owen Taylor3473f882001-02-23 17:55:21 +00007868xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7869 int ret = 0;
7870 int avail;
7871 xmlChar cur, next;
7872
7873#ifdef DEBUG_PUSH
7874 switch (ctxt->instate) {
7875 case XML_PARSER_EOF:
7876 xmlGenericError(xmlGenericErrorContext,
7877 "PP: try EOF\n"); break;
7878 case XML_PARSER_START:
7879 xmlGenericError(xmlGenericErrorContext,
7880 "PP: try START\n"); break;
7881 case XML_PARSER_MISC:
7882 xmlGenericError(xmlGenericErrorContext,
7883 "PP: try MISC\n");break;
7884 case XML_PARSER_COMMENT:
7885 xmlGenericError(xmlGenericErrorContext,
7886 "PP: try COMMENT\n");break;
7887 case XML_PARSER_PROLOG:
7888 xmlGenericError(xmlGenericErrorContext,
7889 "PP: try PROLOG\n");break;
7890 case XML_PARSER_START_TAG:
7891 xmlGenericError(xmlGenericErrorContext,
7892 "PP: try START_TAG\n");break;
7893 case XML_PARSER_CONTENT:
7894 xmlGenericError(xmlGenericErrorContext,
7895 "PP: try CONTENT\n");break;
7896 case XML_PARSER_CDATA_SECTION:
7897 xmlGenericError(xmlGenericErrorContext,
7898 "PP: try CDATA_SECTION\n");break;
7899 case XML_PARSER_END_TAG:
7900 xmlGenericError(xmlGenericErrorContext,
7901 "PP: try END_TAG\n");break;
7902 case XML_PARSER_ENTITY_DECL:
7903 xmlGenericError(xmlGenericErrorContext,
7904 "PP: try ENTITY_DECL\n");break;
7905 case XML_PARSER_ENTITY_VALUE:
7906 xmlGenericError(xmlGenericErrorContext,
7907 "PP: try ENTITY_VALUE\n");break;
7908 case XML_PARSER_ATTRIBUTE_VALUE:
7909 xmlGenericError(xmlGenericErrorContext,
7910 "PP: try ATTRIBUTE_VALUE\n");break;
7911 case XML_PARSER_DTD:
7912 xmlGenericError(xmlGenericErrorContext,
7913 "PP: try DTD\n");break;
7914 case XML_PARSER_EPILOG:
7915 xmlGenericError(xmlGenericErrorContext,
7916 "PP: try EPILOG\n");break;
7917 case XML_PARSER_PI:
7918 xmlGenericError(xmlGenericErrorContext,
7919 "PP: try PI\n");break;
7920 case XML_PARSER_IGNORE:
7921 xmlGenericError(xmlGenericErrorContext,
7922 "PP: try IGNORE\n");break;
7923 }
7924#endif
7925
7926 while (1) {
7927 /*
7928 * Pop-up of finished entities.
7929 */
7930 while ((RAW == 0) && (ctxt->inputNr > 1))
7931 xmlPopInput(ctxt);
7932
7933 if (ctxt->input ==NULL) break;
7934 if (ctxt->input->buf == NULL)
7935 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7936 else
7937 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7938 if (avail < 1)
7939 goto done;
7940 switch (ctxt->instate) {
7941 case XML_PARSER_EOF:
7942 /*
7943 * Document parsing is done !
7944 */
7945 goto done;
7946 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007947 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7948 xmlChar start[4];
7949 xmlCharEncoding enc;
7950
7951 /*
7952 * Very first chars read from the document flow.
7953 */
7954 if (avail < 4)
7955 goto done;
7956
7957 /*
7958 * Get the 4 first bytes and decode the charset
7959 * if enc != XML_CHAR_ENCODING_NONE
7960 * plug some encoding conversion routines.
7961 */
7962 start[0] = RAW;
7963 start[1] = NXT(1);
7964 start[2] = NXT(2);
7965 start[3] = NXT(3);
7966 enc = xmlDetectCharEncoding(start, 4);
7967 if (enc != XML_CHAR_ENCODING_NONE) {
7968 xmlSwitchEncoding(ctxt, enc);
7969 }
7970 break;
7971 }
Owen Taylor3473f882001-02-23 17:55:21 +00007972
7973 cur = ctxt->input->cur[0];
7974 next = ctxt->input->cur[1];
7975 if (cur == 0) {
7976 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7977 ctxt->sax->setDocumentLocator(ctxt->userData,
7978 &xmlDefaultSAXLocator);
7979 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7981 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7982 ctxt->wellFormed = 0;
7983 ctxt->disableSAX = 1;
7984 ctxt->instate = XML_PARSER_EOF;
7985#ifdef DEBUG_PUSH
7986 xmlGenericError(xmlGenericErrorContext,
7987 "PP: entering EOF\n");
7988#endif
7989 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7990 ctxt->sax->endDocument(ctxt->userData);
7991 goto done;
7992 }
7993 if ((cur == '<') && (next == '?')) {
7994 /* PI or XML decl */
7995 if (avail < 5) return(ret);
7996 if ((!terminate) &&
7997 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7998 return(ret);
7999 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8000 ctxt->sax->setDocumentLocator(ctxt->userData,
8001 &xmlDefaultSAXLocator);
8002 if ((ctxt->input->cur[2] == 'x') &&
8003 (ctxt->input->cur[3] == 'm') &&
8004 (ctxt->input->cur[4] == 'l') &&
8005 (IS_BLANK(ctxt->input->cur[5]))) {
8006 ret += 5;
8007#ifdef DEBUG_PUSH
8008 xmlGenericError(xmlGenericErrorContext,
8009 "PP: Parsing XML Decl\n");
8010#endif
8011 xmlParseXMLDecl(ctxt);
8012 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8013 /*
8014 * The XML REC instructs us to stop parsing right
8015 * here
8016 */
8017 ctxt->instate = XML_PARSER_EOF;
8018 return(0);
8019 }
8020 ctxt->standalone = ctxt->input->standalone;
8021 if ((ctxt->encoding == NULL) &&
8022 (ctxt->input->encoding != NULL))
8023 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8024 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8025 (!ctxt->disableSAX))
8026 ctxt->sax->startDocument(ctxt->userData);
8027 ctxt->instate = XML_PARSER_MISC;
8028#ifdef DEBUG_PUSH
8029 xmlGenericError(xmlGenericErrorContext,
8030 "PP: entering MISC\n");
8031#endif
8032 } else {
8033 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8034 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8035 (!ctxt->disableSAX))
8036 ctxt->sax->startDocument(ctxt->userData);
8037 ctxt->instate = XML_PARSER_MISC;
8038#ifdef DEBUG_PUSH
8039 xmlGenericError(xmlGenericErrorContext,
8040 "PP: entering MISC\n");
8041#endif
8042 }
8043 } else {
8044 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8045 ctxt->sax->setDocumentLocator(ctxt->userData,
8046 &xmlDefaultSAXLocator);
8047 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8048 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8049 (!ctxt->disableSAX))
8050 ctxt->sax->startDocument(ctxt->userData);
8051 ctxt->instate = XML_PARSER_MISC;
8052#ifdef DEBUG_PUSH
8053 xmlGenericError(xmlGenericErrorContext,
8054 "PP: entering MISC\n");
8055#endif
8056 }
8057 break;
8058 case XML_PARSER_MISC:
8059 SKIP_BLANKS;
8060 if (ctxt->input->buf == NULL)
8061 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8062 else
8063 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8064 if (avail < 2)
8065 goto done;
8066 cur = ctxt->input->cur[0];
8067 next = ctxt->input->cur[1];
8068 if ((cur == '<') && (next == '?')) {
8069 if ((!terminate) &&
8070 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8071 goto done;
8072#ifdef DEBUG_PUSH
8073 xmlGenericError(xmlGenericErrorContext,
8074 "PP: Parsing PI\n");
8075#endif
8076 xmlParsePI(ctxt);
8077 } else if ((cur == '<') && (next == '!') &&
8078 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8079 if ((!terminate) &&
8080 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8081 goto done;
8082#ifdef DEBUG_PUSH
8083 xmlGenericError(xmlGenericErrorContext,
8084 "PP: Parsing Comment\n");
8085#endif
8086 xmlParseComment(ctxt);
8087 ctxt->instate = XML_PARSER_MISC;
8088 } else if ((cur == '<') && (next == '!') &&
8089 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8090 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8091 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8092 (ctxt->input->cur[8] == 'E')) {
8093 if ((!terminate) &&
8094 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8095 goto done;
8096#ifdef DEBUG_PUSH
8097 xmlGenericError(xmlGenericErrorContext,
8098 "PP: Parsing internal subset\n");
8099#endif
8100 ctxt->inSubset = 1;
8101 xmlParseDocTypeDecl(ctxt);
8102 if (RAW == '[') {
8103 ctxt->instate = XML_PARSER_DTD;
8104#ifdef DEBUG_PUSH
8105 xmlGenericError(xmlGenericErrorContext,
8106 "PP: entering DTD\n");
8107#endif
8108 } else {
8109 /*
8110 * Create and update the external subset.
8111 */
8112 ctxt->inSubset = 2;
8113 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8114 (ctxt->sax->externalSubset != NULL))
8115 ctxt->sax->externalSubset(ctxt->userData,
8116 ctxt->intSubName, ctxt->extSubSystem,
8117 ctxt->extSubURI);
8118 ctxt->inSubset = 0;
8119 ctxt->instate = XML_PARSER_PROLOG;
8120#ifdef DEBUG_PUSH
8121 xmlGenericError(xmlGenericErrorContext,
8122 "PP: entering PROLOG\n");
8123#endif
8124 }
8125 } else if ((cur == '<') && (next == '!') &&
8126 (avail < 9)) {
8127 goto done;
8128 } else {
8129 ctxt->instate = XML_PARSER_START_TAG;
8130#ifdef DEBUG_PUSH
8131 xmlGenericError(xmlGenericErrorContext,
8132 "PP: entering START_TAG\n");
8133#endif
8134 }
8135 break;
8136 case XML_PARSER_IGNORE:
8137 xmlGenericError(xmlGenericErrorContext,
8138 "PP: internal error, state == IGNORE");
8139 ctxt->instate = XML_PARSER_DTD;
8140#ifdef DEBUG_PUSH
8141 xmlGenericError(xmlGenericErrorContext,
8142 "PP: entering DTD\n");
8143#endif
8144 break;
8145 case XML_PARSER_PROLOG:
8146 SKIP_BLANKS;
8147 if (ctxt->input->buf == NULL)
8148 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8149 else
8150 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8151 if (avail < 2)
8152 goto done;
8153 cur = ctxt->input->cur[0];
8154 next = ctxt->input->cur[1];
8155 if ((cur == '<') && (next == '?')) {
8156 if ((!terminate) &&
8157 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8158 goto done;
8159#ifdef DEBUG_PUSH
8160 xmlGenericError(xmlGenericErrorContext,
8161 "PP: Parsing PI\n");
8162#endif
8163 xmlParsePI(ctxt);
8164 } else if ((cur == '<') && (next == '!') &&
8165 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8166 if ((!terminate) &&
8167 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8168 goto done;
8169#ifdef DEBUG_PUSH
8170 xmlGenericError(xmlGenericErrorContext,
8171 "PP: Parsing Comment\n");
8172#endif
8173 xmlParseComment(ctxt);
8174 ctxt->instate = XML_PARSER_PROLOG;
8175 } else if ((cur == '<') && (next == '!') &&
8176 (avail < 4)) {
8177 goto done;
8178 } else {
8179 ctxt->instate = XML_PARSER_START_TAG;
8180#ifdef DEBUG_PUSH
8181 xmlGenericError(xmlGenericErrorContext,
8182 "PP: entering START_TAG\n");
8183#endif
8184 }
8185 break;
8186 case XML_PARSER_EPILOG:
8187 SKIP_BLANKS;
8188 if (ctxt->input->buf == NULL)
8189 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8190 else
8191 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8192 if (avail < 2)
8193 goto done;
8194 cur = ctxt->input->cur[0];
8195 next = ctxt->input->cur[1];
8196 if ((cur == '<') && (next == '?')) {
8197 if ((!terminate) &&
8198 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8199 goto done;
8200#ifdef DEBUG_PUSH
8201 xmlGenericError(xmlGenericErrorContext,
8202 "PP: Parsing PI\n");
8203#endif
8204 xmlParsePI(ctxt);
8205 ctxt->instate = XML_PARSER_EPILOG;
8206 } else if ((cur == '<') && (next == '!') &&
8207 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8208 if ((!terminate) &&
8209 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8210 goto done;
8211#ifdef DEBUG_PUSH
8212 xmlGenericError(xmlGenericErrorContext,
8213 "PP: Parsing Comment\n");
8214#endif
8215 xmlParseComment(ctxt);
8216 ctxt->instate = XML_PARSER_EPILOG;
8217 } else if ((cur == '<') && (next == '!') &&
8218 (avail < 4)) {
8219 goto done;
8220 } else {
8221 ctxt->errNo = XML_ERR_DOCUMENT_END;
8222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8223 ctxt->sax->error(ctxt->userData,
8224 "Extra content at the end of the document\n");
8225 ctxt->wellFormed = 0;
8226 ctxt->disableSAX = 1;
8227 ctxt->instate = XML_PARSER_EOF;
8228#ifdef DEBUG_PUSH
8229 xmlGenericError(xmlGenericErrorContext,
8230 "PP: entering EOF\n");
8231#endif
8232 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8233 (!ctxt->disableSAX))
8234 ctxt->sax->endDocument(ctxt->userData);
8235 goto done;
8236 }
8237 break;
8238 case XML_PARSER_START_TAG: {
8239 xmlChar *name, *oldname;
8240
8241 if ((avail < 2) && (ctxt->inputNr == 1))
8242 goto done;
8243 cur = ctxt->input->cur[0];
8244 if (cur != '<') {
8245 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8247 ctxt->sax->error(ctxt->userData,
8248 "Start tag expect, '<' not found\n");
8249 ctxt->wellFormed = 0;
8250 ctxt->disableSAX = 1;
8251 ctxt->instate = XML_PARSER_EOF;
8252#ifdef DEBUG_PUSH
8253 xmlGenericError(xmlGenericErrorContext,
8254 "PP: entering EOF\n");
8255#endif
8256 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8257 (!ctxt->disableSAX))
8258 ctxt->sax->endDocument(ctxt->userData);
8259 goto done;
8260 }
8261 if ((!terminate) &&
8262 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8263 goto done;
8264 if (ctxt->spaceNr == 0)
8265 spacePush(ctxt, -1);
8266 else
8267 spacePush(ctxt, *ctxt->space);
8268 name = xmlParseStartTag(ctxt);
8269 if (name == NULL) {
8270 spacePop(ctxt);
8271 ctxt->instate = XML_PARSER_EOF;
8272#ifdef DEBUG_PUSH
8273 xmlGenericError(xmlGenericErrorContext,
8274 "PP: entering EOF\n");
8275#endif
8276 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8277 (!ctxt->disableSAX))
8278 ctxt->sax->endDocument(ctxt->userData);
8279 goto done;
8280 }
8281 namePush(ctxt, xmlStrdup(name));
8282
8283 /*
8284 * [ VC: Root Element Type ]
8285 * The Name in the document type declaration must match
8286 * the element type of the root element.
8287 */
8288 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8289 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8290 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8291
8292 /*
8293 * Check for an Empty Element.
8294 */
8295 if ((RAW == '/') && (NXT(1) == '>')) {
8296 SKIP(2);
8297 if ((ctxt->sax != NULL) &&
8298 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8299 ctxt->sax->endElement(ctxt->userData, name);
8300 xmlFree(name);
8301 oldname = namePop(ctxt);
8302 spacePop(ctxt);
8303 if (oldname != NULL) {
8304#ifdef DEBUG_STACK
8305 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8306#endif
8307 xmlFree(oldname);
8308 }
8309 if (ctxt->name == NULL) {
8310 ctxt->instate = XML_PARSER_EPILOG;
8311#ifdef DEBUG_PUSH
8312 xmlGenericError(xmlGenericErrorContext,
8313 "PP: entering EPILOG\n");
8314#endif
8315 } else {
8316 ctxt->instate = XML_PARSER_CONTENT;
8317#ifdef DEBUG_PUSH
8318 xmlGenericError(xmlGenericErrorContext,
8319 "PP: entering CONTENT\n");
8320#endif
8321 }
8322 break;
8323 }
8324 if (RAW == '>') {
8325 NEXT;
8326 } else {
8327 ctxt->errNo = XML_ERR_GT_REQUIRED;
8328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8329 ctxt->sax->error(ctxt->userData,
8330 "Couldn't find end of Start Tag %s\n",
8331 name);
8332 ctxt->wellFormed = 0;
8333 ctxt->disableSAX = 1;
8334
8335 /*
8336 * end of parsing of this node.
8337 */
8338 nodePop(ctxt);
8339 oldname = namePop(ctxt);
8340 spacePop(ctxt);
8341 if (oldname != NULL) {
8342#ifdef DEBUG_STACK
8343 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8344#endif
8345 xmlFree(oldname);
8346 }
8347 }
8348 xmlFree(name);
8349 ctxt->instate = XML_PARSER_CONTENT;
8350#ifdef DEBUG_PUSH
8351 xmlGenericError(xmlGenericErrorContext,
8352 "PP: entering CONTENT\n");
8353#endif
8354 break;
8355 }
8356 case XML_PARSER_CONTENT: {
8357 const xmlChar *test;
8358 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008359 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008360
8361 /*
8362 * Handle preparsed entities and charRef
8363 */
8364 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008365 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008366
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008367 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008368 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8369 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008370 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008371 ctxt->token = 0;
8372 }
8373 if ((avail < 2) && (ctxt->inputNr == 1))
8374 goto done;
8375 cur = ctxt->input->cur[0];
8376 next = ctxt->input->cur[1];
8377
8378 test = CUR_PTR;
8379 cons = ctxt->input->consumed;
8380 tok = ctxt->token;
8381 if ((cur == '<') && (next == '?')) {
8382 if ((!terminate) &&
8383 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8384 goto done;
8385#ifdef DEBUG_PUSH
8386 xmlGenericError(xmlGenericErrorContext,
8387 "PP: Parsing PI\n");
8388#endif
8389 xmlParsePI(ctxt);
8390 } else if ((cur == '<') && (next == '!') &&
8391 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8392 if ((!terminate) &&
8393 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8394 goto done;
8395#ifdef DEBUG_PUSH
8396 xmlGenericError(xmlGenericErrorContext,
8397 "PP: Parsing Comment\n");
8398#endif
8399 xmlParseComment(ctxt);
8400 ctxt->instate = XML_PARSER_CONTENT;
8401 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8402 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8403 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8404 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8405 (ctxt->input->cur[8] == '[')) {
8406 SKIP(9);
8407 ctxt->instate = XML_PARSER_CDATA_SECTION;
8408#ifdef DEBUG_PUSH
8409 xmlGenericError(xmlGenericErrorContext,
8410 "PP: entering CDATA_SECTION\n");
8411#endif
8412 break;
8413 } else if ((cur == '<') && (next == '!') &&
8414 (avail < 9)) {
8415 goto done;
8416 } else if ((cur == '<') && (next == '/')) {
8417 ctxt->instate = XML_PARSER_END_TAG;
8418#ifdef DEBUG_PUSH
8419 xmlGenericError(xmlGenericErrorContext,
8420 "PP: entering END_TAG\n");
8421#endif
8422 break;
8423 } else if (cur == '<') {
8424 ctxt->instate = XML_PARSER_START_TAG;
8425#ifdef DEBUG_PUSH
8426 xmlGenericError(xmlGenericErrorContext,
8427 "PP: entering START_TAG\n");
8428#endif
8429 break;
8430 } else if (cur == '&') {
8431 if ((!terminate) &&
8432 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8433 goto done;
8434#ifdef DEBUG_PUSH
8435 xmlGenericError(xmlGenericErrorContext,
8436 "PP: Parsing Reference\n");
8437#endif
8438 xmlParseReference(ctxt);
8439 } else {
8440 /* TODO Avoid the extra copy, handle directly !!! */
8441 /*
8442 * Goal of the following test is:
8443 * - minimize calls to the SAX 'character' callback
8444 * when they are mergeable
8445 * - handle an problem for isBlank when we only parse
8446 * a sequence of blank chars and the next one is
8447 * not available to check against '<' presence.
8448 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008449 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008450 * of the parser.
8451 */
8452 if ((ctxt->inputNr == 1) &&
8453 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8454 if ((!terminate) &&
8455 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8456 goto done;
8457 }
8458 ctxt->checkIndex = 0;
8459#ifdef DEBUG_PUSH
8460 xmlGenericError(xmlGenericErrorContext,
8461 "PP: Parsing char data\n");
8462#endif
8463 xmlParseCharData(ctxt, 0);
8464 }
8465 /*
8466 * Pop-up of finished entities.
8467 */
8468 while ((RAW == 0) && (ctxt->inputNr > 1))
8469 xmlPopInput(ctxt);
8470 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8471 (tok == ctxt->token)) {
8472 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8473 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8474 ctxt->sax->error(ctxt->userData,
8475 "detected an error in element content\n");
8476 ctxt->wellFormed = 0;
8477 ctxt->disableSAX = 1;
8478 ctxt->instate = XML_PARSER_EOF;
8479 break;
8480 }
8481 break;
8482 }
8483 case XML_PARSER_CDATA_SECTION: {
8484 /*
8485 * The Push mode need to have the SAX callback for
8486 * cdataBlock merge back contiguous callbacks.
8487 */
8488 int base;
8489
8490 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8491 if (base < 0) {
8492 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8493 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8494 if (ctxt->sax->cdataBlock != NULL)
8495 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8496 XML_PARSER_BIG_BUFFER_SIZE);
8497 }
8498 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8499 ctxt->checkIndex = 0;
8500 }
8501 goto done;
8502 } else {
8503 if ((ctxt->sax != NULL) && (base > 0) &&
8504 (!ctxt->disableSAX)) {
8505 if (ctxt->sax->cdataBlock != NULL)
8506 ctxt->sax->cdataBlock(ctxt->userData,
8507 ctxt->input->cur, base);
8508 }
8509 SKIP(base + 3);
8510 ctxt->checkIndex = 0;
8511 ctxt->instate = XML_PARSER_CONTENT;
8512#ifdef DEBUG_PUSH
8513 xmlGenericError(xmlGenericErrorContext,
8514 "PP: entering CONTENT\n");
8515#endif
8516 }
8517 break;
8518 }
8519 case XML_PARSER_END_TAG:
8520 if (avail < 2)
8521 goto done;
8522 if ((!terminate) &&
8523 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8524 goto done;
8525 xmlParseEndTag(ctxt);
8526 if (ctxt->name == NULL) {
8527 ctxt->instate = XML_PARSER_EPILOG;
8528#ifdef DEBUG_PUSH
8529 xmlGenericError(xmlGenericErrorContext,
8530 "PP: entering EPILOG\n");
8531#endif
8532 } else {
8533 ctxt->instate = XML_PARSER_CONTENT;
8534#ifdef DEBUG_PUSH
8535 xmlGenericError(xmlGenericErrorContext,
8536 "PP: entering CONTENT\n");
8537#endif
8538 }
8539 break;
8540 case XML_PARSER_DTD: {
8541 /*
8542 * Sorry but progressive parsing of the internal subset
8543 * is not expected to be supported. We first check that
8544 * the full content of the internal subset is available and
8545 * the parsing is launched only at that point.
8546 * Internal subset ends up with "']' S? '>'" in an unescaped
8547 * section and not in a ']]>' sequence which are conditional
8548 * sections (whoever argued to keep that crap in XML deserve
8549 * a place in hell !).
8550 */
8551 int base, i;
8552 xmlChar *buf;
8553 xmlChar quote = 0;
8554
8555 base = ctxt->input->cur - ctxt->input->base;
8556 if (base < 0) return(0);
8557 if (ctxt->checkIndex > base)
8558 base = ctxt->checkIndex;
8559 buf = ctxt->input->buf->buffer->content;
8560 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8561 base++) {
8562 if (quote != 0) {
8563 if (buf[base] == quote)
8564 quote = 0;
8565 continue;
8566 }
8567 if (buf[base] == '"') {
8568 quote = '"';
8569 continue;
8570 }
8571 if (buf[base] == '\'') {
8572 quote = '\'';
8573 continue;
8574 }
8575 if (buf[base] == ']') {
8576 if ((unsigned int) base +1 >=
8577 ctxt->input->buf->buffer->use)
8578 break;
8579 if (buf[base + 1] == ']') {
8580 /* conditional crap, skip both ']' ! */
8581 base++;
8582 continue;
8583 }
8584 for (i = 0;
8585 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8586 i++) {
8587 if (buf[base + i] == '>')
8588 goto found_end_int_subset;
8589 }
8590 break;
8591 }
8592 }
8593 /*
8594 * We didn't found the end of the Internal subset
8595 */
8596 if (quote == 0)
8597 ctxt->checkIndex = base;
8598#ifdef DEBUG_PUSH
8599 if (next == 0)
8600 xmlGenericError(xmlGenericErrorContext,
8601 "PP: lookup of int subset end filed\n");
8602#endif
8603 goto done;
8604
8605found_end_int_subset:
8606 xmlParseInternalSubset(ctxt);
8607 ctxt->inSubset = 2;
8608 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8609 (ctxt->sax->externalSubset != NULL))
8610 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8611 ctxt->extSubSystem, ctxt->extSubURI);
8612 ctxt->inSubset = 0;
8613 ctxt->instate = XML_PARSER_PROLOG;
8614 ctxt->checkIndex = 0;
8615#ifdef DEBUG_PUSH
8616 xmlGenericError(xmlGenericErrorContext,
8617 "PP: entering PROLOG\n");
8618#endif
8619 break;
8620 }
8621 case XML_PARSER_COMMENT:
8622 xmlGenericError(xmlGenericErrorContext,
8623 "PP: internal error, state == COMMENT\n");
8624 ctxt->instate = XML_PARSER_CONTENT;
8625#ifdef DEBUG_PUSH
8626 xmlGenericError(xmlGenericErrorContext,
8627 "PP: entering CONTENT\n");
8628#endif
8629 break;
8630 case XML_PARSER_PI:
8631 xmlGenericError(xmlGenericErrorContext,
8632 "PP: internal error, state == PI\n");
8633 ctxt->instate = XML_PARSER_CONTENT;
8634#ifdef DEBUG_PUSH
8635 xmlGenericError(xmlGenericErrorContext,
8636 "PP: entering CONTENT\n");
8637#endif
8638 break;
8639 case XML_PARSER_ENTITY_DECL:
8640 xmlGenericError(xmlGenericErrorContext,
8641 "PP: internal error, state == ENTITY_DECL\n");
8642 ctxt->instate = XML_PARSER_DTD;
8643#ifdef DEBUG_PUSH
8644 xmlGenericError(xmlGenericErrorContext,
8645 "PP: entering DTD\n");
8646#endif
8647 break;
8648 case XML_PARSER_ENTITY_VALUE:
8649 xmlGenericError(xmlGenericErrorContext,
8650 "PP: internal error, state == ENTITY_VALUE\n");
8651 ctxt->instate = XML_PARSER_CONTENT;
8652#ifdef DEBUG_PUSH
8653 xmlGenericError(xmlGenericErrorContext,
8654 "PP: entering DTD\n");
8655#endif
8656 break;
8657 case XML_PARSER_ATTRIBUTE_VALUE:
8658 xmlGenericError(xmlGenericErrorContext,
8659 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8660 ctxt->instate = XML_PARSER_START_TAG;
8661#ifdef DEBUG_PUSH
8662 xmlGenericError(xmlGenericErrorContext,
8663 "PP: entering START_TAG\n");
8664#endif
8665 break;
8666 case XML_PARSER_SYSTEM_LITERAL:
8667 xmlGenericError(xmlGenericErrorContext,
8668 "PP: internal error, state == SYSTEM_LITERAL\n");
8669 ctxt->instate = XML_PARSER_START_TAG;
8670#ifdef DEBUG_PUSH
8671 xmlGenericError(xmlGenericErrorContext,
8672 "PP: entering START_TAG\n");
8673#endif
8674 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008675 case XML_PARSER_PUBLIC_LITERAL:
8676 xmlGenericError(xmlGenericErrorContext,
8677 "PP: internal error, state == PUBLIC_LITERAL\n");
8678 ctxt->instate = XML_PARSER_START_TAG;
8679#ifdef DEBUG_PUSH
8680 xmlGenericError(xmlGenericErrorContext,
8681 "PP: entering START_TAG\n");
8682#endif
8683 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008684 }
8685 }
8686done:
8687#ifdef DEBUG_PUSH
8688 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8689#endif
8690 return(ret);
8691}
8692
8693/**
Owen Taylor3473f882001-02-23 17:55:21 +00008694 * xmlParseChunk:
8695 * @ctxt: an XML parser context
8696 * @chunk: an char array
8697 * @size: the size in byte of the chunk
8698 * @terminate: last chunk indicator
8699 *
8700 * Parse a Chunk of memory
8701 *
8702 * Returns zero if no error, the xmlParserErrors otherwise.
8703 */
8704int
8705xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8706 int terminate) {
8707 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8708 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8709 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8710 int cur = ctxt->input->cur - ctxt->input->base;
8711
8712 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8713 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8714 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008715 ctxt->input->end =
8716 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008717#ifdef DEBUG_PUSH
8718 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8719#endif
8720
8721 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8722 xmlParseTryOrFinish(ctxt, terminate);
8723 } else if (ctxt->instate != XML_PARSER_EOF) {
8724 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8725 xmlParserInputBufferPtr in = ctxt->input->buf;
8726 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8727 (in->raw != NULL)) {
8728 int nbchars;
8729
8730 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8731 if (nbchars < 0) {
8732 xmlGenericError(xmlGenericErrorContext,
8733 "xmlParseChunk: encoder error\n");
8734 return(XML_ERR_INVALID_ENCODING);
8735 }
8736 }
8737 }
8738 }
8739 xmlParseTryOrFinish(ctxt, terminate);
8740 if (terminate) {
8741 /*
8742 * Check for termination
8743 */
8744 if ((ctxt->instate != XML_PARSER_EOF) &&
8745 (ctxt->instate != XML_PARSER_EPILOG)) {
8746 ctxt->errNo = XML_ERR_DOCUMENT_END;
8747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8748 ctxt->sax->error(ctxt->userData,
8749 "Extra content at the end of the document\n");
8750 ctxt->wellFormed = 0;
8751 ctxt->disableSAX = 1;
8752 }
8753 if (ctxt->instate != XML_PARSER_EOF) {
8754 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8755 (!ctxt->disableSAX))
8756 ctxt->sax->endDocument(ctxt->userData);
8757 }
8758 ctxt->instate = XML_PARSER_EOF;
8759 }
8760 return((xmlParserErrors) ctxt->errNo);
8761}
8762
8763/************************************************************************
8764 * *
8765 * I/O front end functions to the parser *
8766 * *
8767 ************************************************************************/
8768
8769/**
8770 * xmlStopParser:
8771 * @ctxt: an XML parser context
8772 *
8773 * Blocks further parser processing
8774 */
8775void
8776xmlStopParser(xmlParserCtxtPtr ctxt) {
8777 ctxt->instate = XML_PARSER_EOF;
8778 if (ctxt->input != NULL)
8779 ctxt->input->cur = BAD_CAST"";
8780}
8781
8782/**
8783 * xmlCreatePushParserCtxt:
8784 * @sax: a SAX handler
8785 * @user_data: The user data returned on SAX callbacks
8786 * @chunk: a pointer to an array of chars
8787 * @size: number of chars in the array
8788 * @filename: an optional file name or URI
8789 *
8790 * Create a parser context for using the XML parser in push mode
8791 * To allow content encoding detection, @size should be >= 4
8792 * The value of @filename is used for fetching external entities
8793 * and error/warning reports.
8794 *
8795 * Returns the new parser context or NULL
8796 */
8797xmlParserCtxtPtr
8798xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8799 const char *chunk, int size, const char *filename) {
8800 xmlParserCtxtPtr ctxt;
8801 xmlParserInputPtr inputStream;
8802 xmlParserInputBufferPtr buf;
8803 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8804
8805 /*
8806 * plug some encoding conversion routines
8807 */
8808 if ((chunk != NULL) && (size >= 4))
8809 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8810
8811 buf = xmlAllocParserInputBuffer(enc);
8812 if (buf == NULL) return(NULL);
8813
8814 ctxt = xmlNewParserCtxt();
8815 if (ctxt == NULL) {
8816 xmlFree(buf);
8817 return(NULL);
8818 }
8819 if (sax != NULL) {
8820 if (ctxt->sax != &xmlDefaultSAXHandler)
8821 xmlFree(ctxt->sax);
8822 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8823 if (ctxt->sax == NULL) {
8824 xmlFree(buf);
8825 xmlFree(ctxt);
8826 return(NULL);
8827 }
8828 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8829 if (user_data != NULL)
8830 ctxt->userData = user_data;
8831 }
8832 if (filename == NULL) {
8833 ctxt->directory = NULL;
8834 } else {
8835 ctxt->directory = xmlParserGetDirectory(filename);
8836 }
8837
8838 inputStream = xmlNewInputStream(ctxt);
8839 if (inputStream == NULL) {
8840 xmlFreeParserCtxt(ctxt);
8841 return(NULL);
8842 }
8843
8844 if (filename == NULL)
8845 inputStream->filename = NULL;
8846 else
8847 inputStream->filename = xmlMemStrdup(filename);
8848 inputStream->buf = buf;
8849 inputStream->base = inputStream->buf->buffer->content;
8850 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008851 inputStream->end =
8852 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008853
8854 inputPush(ctxt, inputStream);
8855
8856 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8857 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008858 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8859 int cur = ctxt->input->cur - ctxt->input->base;
8860
Owen Taylor3473f882001-02-23 17:55:21 +00008861 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008862
8863 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8864 ctxt->input->cur = ctxt->input->base + cur;
8865 ctxt->input->end =
8866 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008867#ifdef DEBUG_PUSH
8868 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8869#endif
8870 }
8871
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008872 if (enc != XML_CHAR_ENCODING_NONE) {
8873 xmlSwitchEncoding(ctxt, enc);
8874 }
8875
Owen Taylor3473f882001-02-23 17:55:21 +00008876 return(ctxt);
8877}
8878
8879/**
8880 * xmlCreateIOParserCtxt:
8881 * @sax: a SAX handler
8882 * @user_data: The user data returned on SAX callbacks
8883 * @ioread: an I/O read function
8884 * @ioclose: an I/O close function
8885 * @ioctx: an I/O handler
8886 * @enc: the charset encoding if known
8887 *
8888 * Create a parser context for using the XML parser with an existing
8889 * I/O stream
8890 *
8891 * Returns the new parser context or NULL
8892 */
8893xmlParserCtxtPtr
8894xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8895 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8896 void *ioctx, xmlCharEncoding enc) {
8897 xmlParserCtxtPtr ctxt;
8898 xmlParserInputPtr inputStream;
8899 xmlParserInputBufferPtr buf;
8900
8901 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8902 if (buf == NULL) return(NULL);
8903
8904 ctxt = xmlNewParserCtxt();
8905 if (ctxt == NULL) {
8906 xmlFree(buf);
8907 return(NULL);
8908 }
8909 if (sax != NULL) {
8910 if (ctxt->sax != &xmlDefaultSAXHandler)
8911 xmlFree(ctxt->sax);
8912 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8913 if (ctxt->sax == NULL) {
8914 xmlFree(buf);
8915 xmlFree(ctxt);
8916 return(NULL);
8917 }
8918 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8919 if (user_data != NULL)
8920 ctxt->userData = user_data;
8921 }
8922
8923 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8924 if (inputStream == NULL) {
8925 xmlFreeParserCtxt(ctxt);
8926 return(NULL);
8927 }
8928 inputPush(ctxt, inputStream);
8929
8930 return(ctxt);
8931}
8932
8933/************************************************************************
8934 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008935 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00008936 * *
8937 ************************************************************************/
8938
8939/**
8940 * xmlIOParseDTD:
8941 * @sax: the SAX handler block or NULL
8942 * @input: an Input Buffer
8943 * @enc: the charset encoding if known
8944 *
8945 * Load and parse a DTD
8946 *
8947 * Returns the resulting xmlDtdPtr or NULL in case of error.
8948 * @input will be freed at parsing end.
8949 */
8950
8951xmlDtdPtr
8952xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8953 xmlCharEncoding enc) {
8954 xmlDtdPtr ret = NULL;
8955 xmlParserCtxtPtr ctxt;
8956 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008957 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008958
8959 if (input == NULL)
8960 return(NULL);
8961
8962 ctxt = xmlNewParserCtxt();
8963 if (ctxt == NULL) {
8964 return(NULL);
8965 }
8966
8967 /*
8968 * Set-up the SAX context
8969 */
8970 if (sax != NULL) {
8971 if (ctxt->sax != NULL)
8972 xmlFree(ctxt->sax);
8973 ctxt->sax = sax;
8974 ctxt->userData = NULL;
8975 }
8976
8977 /*
8978 * generate a parser input from the I/O handler
8979 */
8980
8981 pinput = xmlNewIOInputStream(ctxt, input, enc);
8982 if (pinput == NULL) {
8983 if (sax != NULL) ctxt->sax = NULL;
8984 xmlFreeParserCtxt(ctxt);
8985 return(NULL);
8986 }
8987
8988 /*
8989 * plug some encoding conversion routines here.
8990 */
8991 xmlPushInput(ctxt, pinput);
8992
8993 pinput->filename = NULL;
8994 pinput->line = 1;
8995 pinput->col = 1;
8996 pinput->base = ctxt->input->cur;
8997 pinput->cur = ctxt->input->cur;
8998 pinput->free = NULL;
8999
9000 /*
9001 * let's parse that entity knowing it's an external subset.
9002 */
9003 ctxt->inSubset = 2;
9004 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9005 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9006 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009007
9008 if (enc == XML_CHAR_ENCODING_NONE) {
9009 /*
9010 * Get the 4 first bytes and decode the charset
9011 * if enc != XML_CHAR_ENCODING_NONE
9012 * plug some encoding conversion routines.
9013 */
9014 start[0] = RAW;
9015 start[1] = NXT(1);
9016 start[2] = NXT(2);
9017 start[3] = NXT(3);
9018 enc = xmlDetectCharEncoding(start, 4);
9019 if (enc != XML_CHAR_ENCODING_NONE) {
9020 xmlSwitchEncoding(ctxt, enc);
9021 }
9022 }
9023
Owen Taylor3473f882001-02-23 17:55:21 +00009024 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9025
9026 if (ctxt->myDoc != NULL) {
9027 if (ctxt->wellFormed) {
9028 ret = ctxt->myDoc->extSubset;
9029 ctxt->myDoc->extSubset = NULL;
9030 } else {
9031 ret = NULL;
9032 }
9033 xmlFreeDoc(ctxt->myDoc);
9034 ctxt->myDoc = NULL;
9035 }
9036 if (sax != NULL) ctxt->sax = NULL;
9037 xmlFreeParserCtxt(ctxt);
9038
9039 return(ret);
9040}
9041
9042/**
9043 * xmlSAXParseDTD:
9044 * @sax: the SAX handler block
9045 * @ExternalID: a NAME* containing the External ID of the DTD
9046 * @SystemID: a NAME* containing the URL to the DTD
9047 *
9048 * Load and parse an external subset.
9049 *
9050 * Returns the resulting xmlDtdPtr or NULL in case of error.
9051 */
9052
9053xmlDtdPtr
9054xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9055 const xmlChar *SystemID) {
9056 xmlDtdPtr ret = NULL;
9057 xmlParserCtxtPtr ctxt;
9058 xmlParserInputPtr input = NULL;
9059 xmlCharEncoding enc;
9060
9061 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9062
9063 ctxt = xmlNewParserCtxt();
9064 if (ctxt == NULL) {
9065 return(NULL);
9066 }
9067
9068 /*
9069 * Set-up the SAX context
9070 */
9071 if (sax != NULL) {
9072 if (ctxt->sax != NULL)
9073 xmlFree(ctxt->sax);
9074 ctxt->sax = sax;
9075 ctxt->userData = NULL;
9076 }
9077
9078 /*
9079 * Ask the Entity resolver to load the damn thing
9080 */
9081
9082 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9083 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9084 if (input == NULL) {
9085 if (sax != NULL) ctxt->sax = NULL;
9086 xmlFreeParserCtxt(ctxt);
9087 return(NULL);
9088 }
9089
9090 /*
9091 * plug some encoding conversion routines here.
9092 */
9093 xmlPushInput(ctxt, input);
9094 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9095 xmlSwitchEncoding(ctxt, enc);
9096
9097 if (input->filename == NULL)
9098 input->filename = (char *) xmlStrdup(SystemID);
9099 input->line = 1;
9100 input->col = 1;
9101 input->base = ctxt->input->cur;
9102 input->cur = ctxt->input->cur;
9103 input->free = NULL;
9104
9105 /*
9106 * let's parse that entity knowing it's an external subset.
9107 */
9108 ctxt->inSubset = 2;
9109 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9110 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9111 ExternalID, SystemID);
9112 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9113
9114 if (ctxt->myDoc != NULL) {
9115 if (ctxt->wellFormed) {
9116 ret = ctxt->myDoc->extSubset;
9117 ctxt->myDoc->extSubset = NULL;
9118 } else {
9119 ret = NULL;
9120 }
9121 xmlFreeDoc(ctxt->myDoc);
9122 ctxt->myDoc = NULL;
9123 }
9124 if (sax != NULL) ctxt->sax = NULL;
9125 xmlFreeParserCtxt(ctxt);
9126
9127 return(ret);
9128}
9129
9130/**
9131 * xmlParseDTD:
9132 * @ExternalID: a NAME* containing the External ID of the DTD
9133 * @SystemID: a NAME* containing the URL to the DTD
9134 *
9135 * Load and parse an external subset.
9136 *
9137 * Returns the resulting xmlDtdPtr or NULL in case of error.
9138 */
9139
9140xmlDtdPtr
9141xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9142 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9143}
9144
9145/************************************************************************
9146 * *
9147 * Front ends when parsing an Entity *
9148 * *
9149 ************************************************************************/
9150
9151/**
Owen Taylor3473f882001-02-23 17:55:21 +00009152 * xmlParseCtxtExternalEntity:
9153 * @ctx: the existing parsing context
9154 * @URL: the URL for the entity to load
9155 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009156 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009157 *
9158 * Parse an external general entity within an existing parsing context
9159 * An external general parsed entity is well-formed if it matches the
9160 * production labeled extParsedEnt.
9161 *
9162 * [78] extParsedEnt ::= TextDecl? content
9163 *
9164 * Returns 0 if the entity is well formed, -1 in case of args problem and
9165 * the parser error code otherwise
9166 */
9167
9168int
9169xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009170 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009171 xmlParserCtxtPtr ctxt;
9172 xmlDocPtr newDoc;
9173 xmlSAXHandlerPtr oldsax = NULL;
9174 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009175 xmlChar start[4];
9176 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009177
9178 if (ctx->depth > 40) {
9179 return(XML_ERR_ENTITY_LOOP);
9180 }
9181
Daniel Veillardcda96922001-08-21 10:56:31 +00009182 if (lst != NULL)
9183 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009184 if ((URL == NULL) && (ID == NULL))
9185 return(-1);
9186 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9187 return(-1);
9188
9189
9190 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9191 if (ctxt == NULL) return(-1);
9192 ctxt->userData = ctxt;
9193 oldsax = ctxt->sax;
9194 ctxt->sax = ctx->sax;
9195 newDoc = xmlNewDoc(BAD_CAST "1.0");
9196 if (newDoc == NULL) {
9197 xmlFreeParserCtxt(ctxt);
9198 return(-1);
9199 }
9200 if (ctx->myDoc != NULL) {
9201 newDoc->intSubset = ctx->myDoc->intSubset;
9202 newDoc->extSubset = ctx->myDoc->extSubset;
9203 }
9204 if (ctx->myDoc->URL != NULL) {
9205 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9206 }
9207 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9208 if (newDoc->children == NULL) {
9209 ctxt->sax = oldsax;
9210 xmlFreeParserCtxt(ctxt);
9211 newDoc->intSubset = NULL;
9212 newDoc->extSubset = NULL;
9213 xmlFreeDoc(newDoc);
9214 return(-1);
9215 }
9216 nodePush(ctxt, newDoc->children);
9217 if (ctx->myDoc == NULL) {
9218 ctxt->myDoc = newDoc;
9219 } else {
9220 ctxt->myDoc = ctx->myDoc;
9221 newDoc->children->doc = ctx->myDoc;
9222 }
9223
Daniel Veillard87a764e2001-06-20 17:41:10 +00009224 /*
9225 * Get the 4 first bytes and decode the charset
9226 * if enc != XML_CHAR_ENCODING_NONE
9227 * plug some encoding conversion routines.
9228 */
9229 GROW
9230 start[0] = RAW;
9231 start[1] = NXT(1);
9232 start[2] = NXT(2);
9233 start[3] = NXT(3);
9234 enc = xmlDetectCharEncoding(start, 4);
9235 if (enc != XML_CHAR_ENCODING_NONE) {
9236 xmlSwitchEncoding(ctxt, enc);
9237 }
9238
Owen Taylor3473f882001-02-23 17:55:21 +00009239 /*
9240 * Parse a possible text declaration first
9241 */
Owen Taylor3473f882001-02-23 17:55:21 +00009242 if ((RAW == '<') && (NXT(1) == '?') &&
9243 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9244 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9245 xmlParseTextDecl(ctxt);
9246 }
9247
9248 /*
9249 * Doing validity checking on chunk doesn't make sense
9250 */
9251 ctxt->instate = XML_PARSER_CONTENT;
9252 ctxt->validate = ctx->validate;
9253 ctxt->loadsubset = ctx->loadsubset;
9254 ctxt->depth = ctx->depth + 1;
9255 ctxt->replaceEntities = ctx->replaceEntities;
9256 if (ctxt->validate) {
9257 ctxt->vctxt.error = ctx->vctxt.error;
9258 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009259 } else {
9260 ctxt->vctxt.error = NULL;
9261 ctxt->vctxt.warning = NULL;
9262 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009263 ctxt->vctxt.nodeTab = NULL;
9264 ctxt->vctxt.nodeNr = 0;
9265 ctxt->vctxt.nodeMax = 0;
9266 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009267
9268 xmlParseContent(ctxt);
9269
9270 if ((RAW == '<') && (NXT(1) == '/')) {
9271 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9273 ctxt->sax->error(ctxt->userData,
9274 "chunk is not well balanced\n");
9275 ctxt->wellFormed = 0;
9276 ctxt->disableSAX = 1;
9277 } else if (RAW != 0) {
9278 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9280 ctxt->sax->error(ctxt->userData,
9281 "extra content at the end of well balanced chunk\n");
9282 ctxt->wellFormed = 0;
9283 ctxt->disableSAX = 1;
9284 }
9285 if (ctxt->node != newDoc->children) {
9286 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9287 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9288 ctxt->sax->error(ctxt->userData,
9289 "chunk is not well balanced\n");
9290 ctxt->wellFormed = 0;
9291 ctxt->disableSAX = 1;
9292 }
9293
9294 if (!ctxt->wellFormed) {
9295 if (ctxt->errNo == 0)
9296 ret = 1;
9297 else
9298 ret = ctxt->errNo;
9299 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009300 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009301 xmlNodePtr cur;
9302
9303 /*
9304 * Return the newly created nodeset after unlinking it from
9305 * they pseudo parent.
9306 */
9307 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009308 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009309 while (cur != NULL) {
9310 cur->parent = NULL;
9311 cur = cur->next;
9312 }
9313 newDoc->children->children = NULL;
9314 }
9315 ret = 0;
9316 }
9317 ctxt->sax = oldsax;
9318 xmlFreeParserCtxt(ctxt);
9319 newDoc->intSubset = NULL;
9320 newDoc->extSubset = NULL;
9321 xmlFreeDoc(newDoc);
9322
9323 return(ret);
9324}
9325
9326/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009327 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009328 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009329 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009330 * @sax: the SAX handler bloc (possibly NULL)
9331 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9332 * @depth: Used for loop detection, use 0
9333 * @URL: the URL for the entity to load
9334 * @ID: the System ID for the entity to load
9335 * @list: the return value for the set of parsed nodes
9336 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009337 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009338 *
9339 * Returns 0 if the entity is well formed, -1 in case of args problem and
9340 * the parser error code otherwise
9341 */
9342
Daniel Veillard257d9102001-05-08 10:41:44 +00009343static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009344xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9345 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009346 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009347 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009348 xmlParserCtxtPtr ctxt;
9349 xmlDocPtr newDoc;
9350 xmlSAXHandlerPtr oldsax = NULL;
9351 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009352 xmlChar start[4];
9353 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009354
9355 if (depth > 40) {
9356 return(XML_ERR_ENTITY_LOOP);
9357 }
9358
9359
9360
9361 if (list != NULL)
9362 *list = NULL;
9363 if ((URL == NULL) && (ID == NULL))
9364 return(-1);
9365 if (doc == NULL) /* @@ relax but check for dereferences */
9366 return(-1);
9367
9368
9369 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9370 if (ctxt == NULL) return(-1);
9371 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009372 if (oldctxt != NULL) {
9373 ctxt->_private = oldctxt->_private;
9374 ctxt->loadsubset = oldctxt->loadsubset;
9375 ctxt->validate = oldctxt->validate;
9376 ctxt->external = oldctxt->external;
9377 } else {
9378 /*
9379 * Doing validity checking on chunk without context
9380 * doesn't make sense
9381 */
9382 ctxt->_private = NULL;
9383 ctxt->validate = 0;
9384 ctxt->external = 2;
9385 ctxt->loadsubset = 0;
9386 }
Owen Taylor3473f882001-02-23 17:55:21 +00009387 if (sax != NULL) {
9388 oldsax = ctxt->sax;
9389 ctxt->sax = sax;
9390 if (user_data != NULL)
9391 ctxt->userData = user_data;
9392 }
9393 newDoc = xmlNewDoc(BAD_CAST "1.0");
9394 if (newDoc == NULL) {
9395 xmlFreeParserCtxt(ctxt);
9396 return(-1);
9397 }
9398 if (doc != NULL) {
9399 newDoc->intSubset = doc->intSubset;
9400 newDoc->extSubset = doc->extSubset;
9401 }
9402 if (doc->URL != NULL) {
9403 newDoc->URL = xmlStrdup(doc->URL);
9404 }
9405 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9406 if (newDoc->children == NULL) {
9407 if (sax != NULL)
9408 ctxt->sax = oldsax;
9409 xmlFreeParserCtxt(ctxt);
9410 newDoc->intSubset = NULL;
9411 newDoc->extSubset = NULL;
9412 xmlFreeDoc(newDoc);
9413 return(-1);
9414 }
9415 nodePush(ctxt, newDoc->children);
9416 if (doc == NULL) {
9417 ctxt->myDoc = newDoc;
9418 } else {
9419 ctxt->myDoc = doc;
9420 newDoc->children->doc = doc;
9421 }
9422
Daniel Veillard87a764e2001-06-20 17:41:10 +00009423 /*
9424 * Get the 4 first bytes and decode the charset
9425 * if enc != XML_CHAR_ENCODING_NONE
9426 * plug some encoding conversion routines.
9427 */
9428 GROW;
9429 start[0] = RAW;
9430 start[1] = NXT(1);
9431 start[2] = NXT(2);
9432 start[3] = NXT(3);
9433 enc = xmlDetectCharEncoding(start, 4);
9434 if (enc != XML_CHAR_ENCODING_NONE) {
9435 xmlSwitchEncoding(ctxt, enc);
9436 }
9437
Owen Taylor3473f882001-02-23 17:55:21 +00009438 /*
9439 * Parse a possible text declaration first
9440 */
Owen Taylor3473f882001-02-23 17:55:21 +00009441 if ((RAW == '<') && (NXT(1) == '?') &&
9442 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9443 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9444 xmlParseTextDecl(ctxt);
9445 }
9446
Owen Taylor3473f882001-02-23 17:55:21 +00009447 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009448 ctxt->depth = depth;
9449
9450 xmlParseContent(ctxt);
9451
9452 if ((RAW == '<') && (NXT(1) == '/')) {
9453 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9455 ctxt->sax->error(ctxt->userData,
9456 "chunk is not well balanced\n");
9457 ctxt->wellFormed = 0;
9458 ctxt->disableSAX = 1;
9459 } else if (RAW != 0) {
9460 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9462 ctxt->sax->error(ctxt->userData,
9463 "extra content at the end of well balanced chunk\n");
9464 ctxt->wellFormed = 0;
9465 ctxt->disableSAX = 1;
9466 }
9467 if (ctxt->node != newDoc->children) {
9468 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9470 ctxt->sax->error(ctxt->userData,
9471 "chunk is not well balanced\n");
9472 ctxt->wellFormed = 0;
9473 ctxt->disableSAX = 1;
9474 }
9475
9476 if (!ctxt->wellFormed) {
9477 if (ctxt->errNo == 0)
9478 ret = 1;
9479 else
9480 ret = ctxt->errNo;
9481 } else {
9482 if (list != NULL) {
9483 xmlNodePtr cur;
9484
9485 /*
9486 * Return the newly created nodeset after unlinking it from
9487 * they pseudo parent.
9488 */
9489 cur = newDoc->children->children;
9490 *list = cur;
9491 while (cur != NULL) {
9492 cur->parent = NULL;
9493 cur = cur->next;
9494 }
9495 newDoc->children->children = NULL;
9496 }
9497 ret = 0;
9498 }
9499 if (sax != NULL)
9500 ctxt->sax = oldsax;
9501 xmlFreeParserCtxt(ctxt);
9502 newDoc->intSubset = NULL;
9503 newDoc->extSubset = NULL;
9504 xmlFreeDoc(newDoc);
9505
9506 return(ret);
9507}
9508
9509/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009510 * xmlParseExternalEntity:
9511 * @doc: the document the chunk pertains to
9512 * @sax: the SAX handler bloc (possibly NULL)
9513 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9514 * @depth: Used for loop detection, use 0
9515 * @URL: the URL for the entity to load
9516 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009517 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009518 *
9519 * Parse an external general entity
9520 * An external general parsed entity is well-formed if it matches the
9521 * production labeled extParsedEnt.
9522 *
9523 * [78] extParsedEnt ::= TextDecl? content
9524 *
9525 * Returns 0 if the entity is well formed, -1 in case of args problem and
9526 * the parser error code otherwise
9527 */
9528
9529int
9530xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009531 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009532 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009533 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009534}
9535
9536/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009537 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009538 * @doc: the document the chunk pertains to
9539 * @sax: the SAX handler bloc (possibly NULL)
9540 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9541 * @depth: Used for loop detection, use 0
9542 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009543 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009544 *
9545 * Parse a well-balanced chunk of an XML document
9546 * called by the parser
9547 * The allowed sequence for the Well Balanced Chunk is the one defined by
9548 * the content production in the XML grammar:
9549 *
9550 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9551 *
9552 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9553 * the parser error code otherwise
9554 */
9555
9556int
9557xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009558 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009559 xmlParserCtxtPtr ctxt;
9560 xmlDocPtr newDoc;
9561 xmlSAXHandlerPtr oldsax = NULL;
9562 int size;
9563 int ret = 0;
9564
9565 if (depth > 40) {
9566 return(XML_ERR_ENTITY_LOOP);
9567 }
9568
9569
Daniel Veillardcda96922001-08-21 10:56:31 +00009570 if (lst != NULL)
9571 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009572 if (string == NULL)
9573 return(-1);
9574
9575 size = xmlStrlen(string);
9576
9577 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9578 if (ctxt == NULL) return(-1);
9579 ctxt->userData = ctxt;
9580 if (sax != NULL) {
9581 oldsax = ctxt->sax;
9582 ctxt->sax = sax;
9583 if (user_data != NULL)
9584 ctxt->userData = user_data;
9585 }
9586 newDoc = xmlNewDoc(BAD_CAST "1.0");
9587 if (newDoc == NULL) {
9588 xmlFreeParserCtxt(ctxt);
9589 return(-1);
9590 }
9591 if (doc != NULL) {
9592 newDoc->intSubset = doc->intSubset;
9593 newDoc->extSubset = doc->extSubset;
9594 }
9595 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9596 if (newDoc->children == NULL) {
9597 if (sax != NULL)
9598 ctxt->sax = oldsax;
9599 xmlFreeParserCtxt(ctxt);
9600 newDoc->intSubset = NULL;
9601 newDoc->extSubset = NULL;
9602 xmlFreeDoc(newDoc);
9603 return(-1);
9604 }
9605 nodePush(ctxt, newDoc->children);
9606 if (doc == NULL) {
9607 ctxt->myDoc = newDoc;
9608 } else {
9609 ctxt->myDoc = doc;
9610 newDoc->children->doc = doc;
9611 }
9612 ctxt->instate = XML_PARSER_CONTENT;
9613 ctxt->depth = depth;
9614
9615 /*
9616 * Doing validity checking on chunk doesn't make sense
9617 */
9618 ctxt->validate = 0;
9619 ctxt->loadsubset = 0;
9620
9621 xmlParseContent(ctxt);
9622
9623 if ((RAW == '<') && (NXT(1) == '/')) {
9624 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9626 ctxt->sax->error(ctxt->userData,
9627 "chunk is not well balanced\n");
9628 ctxt->wellFormed = 0;
9629 ctxt->disableSAX = 1;
9630 } else if (RAW != 0) {
9631 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9633 ctxt->sax->error(ctxt->userData,
9634 "extra content at the end of well balanced chunk\n");
9635 ctxt->wellFormed = 0;
9636 ctxt->disableSAX = 1;
9637 }
9638 if (ctxt->node != newDoc->children) {
9639 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9641 ctxt->sax->error(ctxt->userData,
9642 "chunk is not well balanced\n");
9643 ctxt->wellFormed = 0;
9644 ctxt->disableSAX = 1;
9645 }
9646
9647 if (!ctxt->wellFormed) {
9648 if (ctxt->errNo == 0)
9649 ret = 1;
9650 else
9651 ret = ctxt->errNo;
9652 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009653 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009654 xmlNodePtr cur;
9655
9656 /*
9657 * Return the newly created nodeset after unlinking it from
9658 * they pseudo parent.
9659 */
9660 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009661 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009662 while (cur != NULL) {
9663 cur->parent = NULL;
9664 cur = cur->next;
9665 }
9666 newDoc->children->children = NULL;
9667 }
9668 ret = 0;
9669 }
9670 if (sax != NULL)
9671 ctxt->sax = oldsax;
9672 xmlFreeParserCtxt(ctxt);
9673 newDoc->intSubset = NULL;
9674 newDoc->extSubset = NULL;
9675 xmlFreeDoc(newDoc);
9676
9677 return(ret);
9678}
9679
9680/**
9681 * xmlSAXParseEntity:
9682 * @sax: the SAX handler block
9683 * @filename: the filename
9684 *
9685 * parse an XML external entity out of context and build a tree.
9686 * It use the given SAX function block to handle the parsing callback.
9687 * If sax is NULL, fallback to the default DOM tree building routines.
9688 *
9689 * [78] extParsedEnt ::= TextDecl? content
9690 *
9691 * This correspond to a "Well Balanced" chunk
9692 *
9693 * Returns the resulting document tree
9694 */
9695
9696xmlDocPtr
9697xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9698 xmlDocPtr ret;
9699 xmlParserCtxtPtr ctxt;
9700 char *directory = NULL;
9701
9702 ctxt = xmlCreateFileParserCtxt(filename);
9703 if (ctxt == NULL) {
9704 return(NULL);
9705 }
9706 if (sax != NULL) {
9707 if (ctxt->sax != NULL)
9708 xmlFree(ctxt->sax);
9709 ctxt->sax = sax;
9710 ctxt->userData = NULL;
9711 }
9712
9713 if ((ctxt->directory == NULL) && (directory == NULL))
9714 directory = xmlParserGetDirectory(filename);
9715
9716 xmlParseExtParsedEnt(ctxt);
9717
9718 if (ctxt->wellFormed)
9719 ret = ctxt->myDoc;
9720 else {
9721 ret = NULL;
9722 xmlFreeDoc(ctxt->myDoc);
9723 ctxt->myDoc = NULL;
9724 }
9725 if (sax != NULL)
9726 ctxt->sax = NULL;
9727 xmlFreeParserCtxt(ctxt);
9728
9729 return(ret);
9730}
9731
9732/**
9733 * xmlParseEntity:
9734 * @filename: the filename
9735 *
9736 * parse an XML external entity out of context and build a tree.
9737 *
9738 * [78] extParsedEnt ::= TextDecl? content
9739 *
9740 * This correspond to a "Well Balanced" chunk
9741 *
9742 * Returns the resulting document tree
9743 */
9744
9745xmlDocPtr
9746xmlParseEntity(const char *filename) {
9747 return(xmlSAXParseEntity(NULL, filename));
9748}
9749
9750/**
9751 * xmlCreateEntityParserCtxt:
9752 * @URL: the entity URL
9753 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009754 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009755 *
9756 * Create a parser context for an external entity
9757 * Automatic support for ZLIB/Compress compressed document is provided
9758 * by default if found at compile-time.
9759 *
9760 * Returns the new parser context or NULL
9761 */
9762xmlParserCtxtPtr
9763xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9764 const xmlChar *base) {
9765 xmlParserCtxtPtr ctxt;
9766 xmlParserInputPtr inputStream;
9767 char *directory = NULL;
9768 xmlChar *uri;
9769
9770 ctxt = xmlNewParserCtxt();
9771 if (ctxt == NULL) {
9772 return(NULL);
9773 }
9774
9775 uri = xmlBuildURI(URL, base);
9776
9777 if (uri == NULL) {
9778 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9779 if (inputStream == NULL) {
9780 xmlFreeParserCtxt(ctxt);
9781 return(NULL);
9782 }
9783
9784 inputPush(ctxt, inputStream);
9785
9786 if ((ctxt->directory == NULL) && (directory == NULL))
9787 directory = xmlParserGetDirectory((char *)URL);
9788 if ((ctxt->directory == NULL) && (directory != NULL))
9789 ctxt->directory = directory;
9790 } else {
9791 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9792 if (inputStream == NULL) {
9793 xmlFree(uri);
9794 xmlFreeParserCtxt(ctxt);
9795 return(NULL);
9796 }
9797
9798 inputPush(ctxt, inputStream);
9799
9800 if ((ctxt->directory == NULL) && (directory == NULL))
9801 directory = xmlParserGetDirectory((char *)uri);
9802 if ((ctxt->directory == NULL) && (directory != NULL))
9803 ctxt->directory = directory;
9804 xmlFree(uri);
9805 }
9806
9807 return(ctxt);
9808}
9809
9810/************************************************************************
9811 * *
9812 * Front ends when parsing from a file *
9813 * *
9814 ************************************************************************/
9815
9816/**
9817 * xmlCreateFileParserCtxt:
9818 * @filename: the filename
9819 *
9820 * Create a parser context for a file content.
9821 * Automatic support for ZLIB/Compress compressed document is provided
9822 * by default if found at compile-time.
9823 *
9824 * Returns the new parser context or NULL
9825 */
9826xmlParserCtxtPtr
9827xmlCreateFileParserCtxt(const char *filename)
9828{
9829 xmlParserCtxtPtr ctxt;
9830 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009831 char *directory = NULL;
9832
Owen Taylor3473f882001-02-23 17:55:21 +00009833 ctxt = xmlNewParserCtxt();
9834 if (ctxt == NULL) {
9835 if (xmlDefaultSAXHandler.error != NULL) {
9836 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9837 }
9838 return(NULL);
9839 }
9840
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009841 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009842 if (inputStream == NULL) {
9843 xmlFreeParserCtxt(ctxt);
9844 return(NULL);
9845 }
9846
Owen Taylor3473f882001-02-23 17:55:21 +00009847 inputPush(ctxt, inputStream);
9848 if ((ctxt->directory == NULL) && (directory == NULL))
9849 directory = xmlParserGetDirectory(filename);
9850 if ((ctxt->directory == NULL) && (directory != NULL))
9851 ctxt->directory = directory;
9852
9853 return(ctxt);
9854}
9855
9856/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009857 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009858 * @sax: the SAX handler block
9859 * @filename: the filename
9860 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9861 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009862 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009863 *
9864 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9865 * compressed document is provided by default if found at compile-time.
9866 * It use the given SAX function block to handle the parsing callback.
9867 * If sax is NULL, fallback to the default DOM tree building routines.
9868 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009869 * User data (void *) is stored within the parser context, so it is
9870 * available nearly everywhere in libxml.
9871 *
Owen Taylor3473f882001-02-23 17:55:21 +00009872 * Returns the resulting document tree
9873 */
9874
9875xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009876xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9877 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009878 xmlDocPtr ret;
9879 xmlParserCtxtPtr ctxt;
9880 char *directory = NULL;
9881
Daniel Veillard635ef722001-10-29 11:48:19 +00009882 xmlInitParser();
9883
Owen Taylor3473f882001-02-23 17:55:21 +00009884 ctxt = xmlCreateFileParserCtxt(filename);
9885 if (ctxt == NULL) {
9886 return(NULL);
9887 }
9888 if (sax != NULL) {
9889 if (ctxt->sax != NULL)
9890 xmlFree(ctxt->sax);
9891 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009892 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009893 if (data!=NULL) {
9894 ctxt->_private=data;
9895 }
Owen Taylor3473f882001-02-23 17:55:21 +00009896
9897 if ((ctxt->directory == NULL) && (directory == NULL))
9898 directory = xmlParserGetDirectory(filename);
9899 if ((ctxt->directory == NULL) && (directory != NULL))
9900 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9901
9902 xmlParseDocument(ctxt);
9903
9904 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9905 else {
9906 ret = NULL;
9907 xmlFreeDoc(ctxt->myDoc);
9908 ctxt->myDoc = NULL;
9909 }
9910 if (sax != NULL)
9911 ctxt->sax = NULL;
9912 xmlFreeParserCtxt(ctxt);
9913
9914 return(ret);
9915}
9916
9917/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009918 * xmlSAXParseFile:
9919 * @sax: the SAX handler block
9920 * @filename: the filename
9921 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9922 * documents
9923 *
9924 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9925 * compressed document is provided by default if found at compile-time.
9926 * It use the given SAX function block to handle the parsing callback.
9927 * If sax is NULL, fallback to the default DOM tree building routines.
9928 *
9929 * Returns the resulting document tree
9930 */
9931
9932xmlDocPtr
9933xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9934 int recovery) {
9935 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
9936}
9937
9938/**
Owen Taylor3473f882001-02-23 17:55:21 +00009939 * xmlRecoverDoc:
9940 * @cur: a pointer to an array of xmlChar
9941 *
9942 * parse an XML in-memory document and build a tree.
9943 * In the case the document is not Well Formed, a tree is built anyway
9944 *
9945 * Returns the resulting document tree
9946 */
9947
9948xmlDocPtr
9949xmlRecoverDoc(xmlChar *cur) {
9950 return(xmlSAXParseDoc(NULL, cur, 1));
9951}
9952
9953/**
9954 * xmlParseFile:
9955 * @filename: the filename
9956 *
9957 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9958 * compressed document is provided by default if found at compile-time.
9959 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009960 * Returns the resulting document tree if the file was wellformed,
9961 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009962 */
9963
9964xmlDocPtr
9965xmlParseFile(const char *filename) {
9966 return(xmlSAXParseFile(NULL, filename, 0));
9967}
9968
9969/**
9970 * xmlRecoverFile:
9971 * @filename: the filename
9972 *
9973 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9974 * compressed document is provided by default if found at compile-time.
9975 * In the case the document is not Well Formed, a tree is built anyway
9976 *
9977 * Returns the resulting document tree
9978 */
9979
9980xmlDocPtr
9981xmlRecoverFile(const char *filename) {
9982 return(xmlSAXParseFile(NULL, filename, 1));
9983}
9984
9985
9986/**
9987 * xmlSetupParserForBuffer:
9988 * @ctxt: an XML parser context
9989 * @buffer: a xmlChar * buffer
9990 * @filename: a file name
9991 *
9992 * Setup the parser context to parse a new buffer; Clears any prior
9993 * contents from the parser context. The buffer parameter must not be
9994 * NULL, but the filename parameter can be
9995 */
9996void
9997xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9998 const char* filename)
9999{
10000 xmlParserInputPtr input;
10001
10002 input = xmlNewInputStream(ctxt);
10003 if (input == NULL) {
10004 perror("malloc");
10005 xmlFree(ctxt);
10006 return;
10007 }
10008
10009 xmlClearParserCtxt(ctxt);
10010 if (filename != NULL)
10011 input->filename = xmlMemStrdup(filename);
10012 input->base = buffer;
10013 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010014 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010015 inputPush(ctxt, input);
10016}
10017
10018/**
10019 * xmlSAXUserParseFile:
10020 * @sax: a SAX handler
10021 * @user_data: The user data returned on SAX callbacks
10022 * @filename: a file name
10023 *
10024 * parse an XML file and call the given SAX handler routines.
10025 * Automatic support for ZLIB/Compress compressed document is provided
10026 *
10027 * Returns 0 in case of success or a error number otherwise
10028 */
10029int
10030xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10031 const char *filename) {
10032 int ret = 0;
10033 xmlParserCtxtPtr ctxt;
10034
10035 ctxt = xmlCreateFileParserCtxt(filename);
10036 if (ctxt == NULL) return -1;
10037 if (ctxt->sax != &xmlDefaultSAXHandler)
10038 xmlFree(ctxt->sax);
10039 ctxt->sax = sax;
10040 if (user_data != NULL)
10041 ctxt->userData = user_data;
10042
10043 xmlParseDocument(ctxt);
10044
10045 if (ctxt->wellFormed)
10046 ret = 0;
10047 else {
10048 if (ctxt->errNo != 0)
10049 ret = ctxt->errNo;
10050 else
10051 ret = -1;
10052 }
10053 if (sax != NULL)
10054 ctxt->sax = NULL;
10055 xmlFreeParserCtxt(ctxt);
10056
10057 return ret;
10058}
10059
10060/************************************************************************
10061 * *
10062 * Front ends when parsing from memory *
10063 * *
10064 ************************************************************************/
10065
10066/**
10067 * xmlCreateMemoryParserCtxt:
10068 * @buffer: a pointer to a char array
10069 * @size: the size of the array
10070 *
10071 * Create a parser context for an XML in-memory document.
10072 *
10073 * Returns the new parser context or NULL
10074 */
10075xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010076xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010077 xmlParserCtxtPtr ctxt;
10078 xmlParserInputPtr input;
10079 xmlParserInputBufferPtr buf;
10080
10081 if (buffer == NULL)
10082 return(NULL);
10083 if (size <= 0)
10084 return(NULL);
10085
10086 ctxt = xmlNewParserCtxt();
10087 if (ctxt == NULL)
10088 return(NULL);
10089
10090 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10091 if (buf == NULL) return(NULL);
10092
10093 input = xmlNewInputStream(ctxt);
10094 if (input == NULL) {
10095 xmlFreeParserCtxt(ctxt);
10096 return(NULL);
10097 }
10098
10099 input->filename = NULL;
10100 input->buf = buf;
10101 input->base = input->buf->buffer->content;
10102 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010103 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010104
10105 inputPush(ctxt, input);
10106 return(ctxt);
10107}
10108
10109/**
10110 * xmlSAXParseMemory:
10111 * @sax: the SAX handler block
10112 * @buffer: an pointer to a char array
10113 * @size: the size of the array
10114 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10115 * documents
10116 *
10117 * parse an XML in-memory block and use the given SAX function block
10118 * to handle the parsing callback. If sax is NULL, fallback to the default
10119 * DOM tree building routines.
10120 *
10121 * Returns the resulting document tree
10122 */
10123xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010124xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10125 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010126 xmlDocPtr ret;
10127 xmlParserCtxtPtr ctxt;
10128
10129 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10130 if (ctxt == NULL) return(NULL);
10131 if (sax != NULL) {
10132 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010133 }
10134
10135 xmlParseDocument(ctxt);
10136
10137 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10138 else {
10139 ret = NULL;
10140 xmlFreeDoc(ctxt->myDoc);
10141 ctxt->myDoc = NULL;
10142 }
10143 if (sax != NULL)
10144 ctxt->sax = NULL;
10145 xmlFreeParserCtxt(ctxt);
10146
10147 return(ret);
10148}
10149
10150/**
10151 * xmlParseMemory:
10152 * @buffer: an pointer to a char array
10153 * @size: the size of the array
10154 *
10155 * parse an XML in-memory block and build a tree.
10156 *
10157 * Returns the resulting document tree
10158 */
10159
Daniel Veillard50822cb2001-07-26 20:05:51 +000010160xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010161 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10162}
10163
10164/**
10165 * xmlRecoverMemory:
10166 * @buffer: an pointer to a char array
10167 * @size: the size of the array
10168 *
10169 * parse an XML in-memory block and build a tree.
10170 * In the case the document is not Well Formed, a tree is built anyway
10171 *
10172 * Returns the resulting document tree
10173 */
10174
Daniel Veillard50822cb2001-07-26 20:05:51 +000010175xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010176 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10177}
10178
10179/**
10180 * xmlSAXUserParseMemory:
10181 * @sax: a SAX handler
10182 * @user_data: The user data returned on SAX callbacks
10183 * @buffer: an in-memory XML document input
10184 * @size: the length of the XML document in bytes
10185 *
10186 * A better SAX parsing routine.
10187 * parse an XML in-memory buffer and call the given SAX handler routines.
10188 *
10189 * Returns 0 in case of success or a error number otherwise
10190 */
10191int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010192 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010193 int ret = 0;
10194 xmlParserCtxtPtr ctxt;
10195 xmlSAXHandlerPtr oldsax = NULL;
10196
10197 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10198 if (ctxt == NULL) return -1;
10199 if (sax != NULL) {
10200 oldsax = ctxt->sax;
10201 ctxt->sax = sax;
10202 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010203 if (user_data != NULL)
10204 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010205
10206 xmlParseDocument(ctxt);
10207
10208 if (ctxt->wellFormed)
10209 ret = 0;
10210 else {
10211 if (ctxt->errNo != 0)
10212 ret = ctxt->errNo;
10213 else
10214 ret = -1;
10215 }
10216 if (sax != NULL) {
10217 ctxt->sax = oldsax;
10218 }
10219 xmlFreeParserCtxt(ctxt);
10220
10221 return ret;
10222}
10223
10224/**
10225 * xmlCreateDocParserCtxt:
10226 * @cur: a pointer to an array of xmlChar
10227 *
10228 * Creates a parser context for an XML in-memory document.
10229 *
10230 * Returns the new parser context or NULL
10231 */
10232xmlParserCtxtPtr
10233xmlCreateDocParserCtxt(xmlChar *cur) {
10234 int len;
10235
10236 if (cur == NULL)
10237 return(NULL);
10238 len = xmlStrlen(cur);
10239 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10240}
10241
10242/**
10243 * xmlSAXParseDoc:
10244 * @sax: the SAX handler block
10245 * @cur: a pointer to an array of xmlChar
10246 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10247 * documents
10248 *
10249 * parse an XML in-memory document and build a tree.
10250 * It use the given SAX function block to handle the parsing callback.
10251 * If sax is NULL, fallback to the default DOM tree building routines.
10252 *
10253 * Returns the resulting document tree
10254 */
10255
10256xmlDocPtr
10257xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10258 xmlDocPtr ret;
10259 xmlParserCtxtPtr ctxt;
10260
10261 if (cur == NULL) return(NULL);
10262
10263
10264 ctxt = xmlCreateDocParserCtxt(cur);
10265 if (ctxt == NULL) return(NULL);
10266 if (sax != NULL) {
10267 ctxt->sax = sax;
10268 ctxt->userData = NULL;
10269 }
10270
10271 xmlParseDocument(ctxt);
10272 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10273 else {
10274 ret = NULL;
10275 xmlFreeDoc(ctxt->myDoc);
10276 ctxt->myDoc = NULL;
10277 }
10278 if (sax != NULL)
10279 ctxt->sax = NULL;
10280 xmlFreeParserCtxt(ctxt);
10281
10282 return(ret);
10283}
10284
10285/**
10286 * xmlParseDoc:
10287 * @cur: a pointer to an array of xmlChar
10288 *
10289 * parse an XML in-memory document and build a tree.
10290 *
10291 * Returns the resulting document tree
10292 */
10293
10294xmlDocPtr
10295xmlParseDoc(xmlChar *cur) {
10296 return(xmlSAXParseDoc(NULL, cur, 0));
10297}
10298
Daniel Veillard8107a222002-01-13 14:10:10 +000010299/************************************************************************
10300 * *
10301 * Specific function to keep track of entities references *
10302 * and used by the XSLT debugger *
10303 * *
10304 ************************************************************************/
10305
10306static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10307
10308/**
10309 * xmlAddEntityReference:
10310 * @ent : A valid entity
10311 * @firstNode : A valid first node for children of entity
10312 * @lastNode : A valid last node of children entity
10313 *
10314 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10315 */
10316static void
10317xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10318 xmlNodePtr lastNode)
10319{
10320 if (xmlEntityRefFunc != NULL) {
10321 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10322 }
10323}
10324
10325
10326/**
10327 * xmlSetEntityReferenceFunc:
10328 * @func : A valid function
10329 *
10330 * Set the function to call call back when a xml reference has been made
10331 */
10332void
10333xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10334{
10335 xmlEntityRefFunc = func;
10336}
Owen Taylor3473f882001-02-23 17:55:21 +000010337
10338/************************************************************************
10339 * *
10340 * Miscellaneous *
10341 * *
10342 ************************************************************************/
10343
10344#ifdef LIBXML_XPATH_ENABLED
10345#include <libxml/xpath.h>
10346#endif
10347
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010348extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010349static int xmlParserInitialized = 0;
10350
10351/**
10352 * xmlInitParser:
10353 *
10354 * Initialization function for the XML parser.
10355 * This is not reentrant. Call once before processing in case of
10356 * use in multithreaded programs.
10357 */
10358
10359void
10360xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010361 if (xmlParserInitialized != 0)
10362 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010363
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010364 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10365 (xmlGenericError == NULL))
10366 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010367 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010368 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010369 xmlInitCharEncodingHandlers();
10370 xmlInitializePredefinedEntities();
10371 xmlDefaultSAXHandlerInit();
10372 xmlRegisterDefaultInputCallbacks();
10373 xmlRegisterDefaultOutputCallbacks();
10374#ifdef LIBXML_HTML_ENABLED
10375 htmlInitAutoClose();
10376 htmlDefaultSAXHandlerInit();
10377#endif
10378#ifdef LIBXML_XPATH_ENABLED
10379 xmlXPathInit();
10380#endif
10381 xmlParserInitialized = 1;
10382}
10383
10384/**
10385 * xmlCleanupParser:
10386 *
10387 * Cleanup function for the XML parser. It tries to reclaim all
10388 * parsing related global memory allocated for the parser processing.
10389 * It doesn't deallocate any document related memory. Calling this
10390 * function should not prevent reusing the parser.
10391 */
10392
10393void
10394xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010395 xmlCleanupCharEncodingHandlers();
10396 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010397#ifdef LIBXML_CATALOG_ENABLED
10398 xmlCatalogCleanup();
10399#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010400 xmlCleanupThreads();
10401 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010402}