blob: 8a783efeac265983baf79429ccdd4d21fc30be05 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Daniel Veillard3c5ed912002-01-08 10:36:16 +000035#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000044#include <libxml/threads.h>
45#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/tree.h>
47#include <libxml/parser.h>
48#include <libxml/parserInternals.h>
49#include <libxml/valid.h>
50#include <libxml/entities.h>
51#include <libxml/xmlerror.h>
52#include <libxml/encoding.h>
53#include <libxml/xmlIO.h>
54#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000055#ifdef LIBXML_CATALOG_ENABLED
56#include <libxml/catalog.h>
57#endif
Owen Taylor3473f882001-02-23 17:55:21 +000058
59#ifdef HAVE_CTYPE_H
60#include <ctype.h>
61#endif
62#ifdef HAVE_STDLIB_H
63#include <stdlib.h>
64#endif
65#ifdef HAVE_SYS_STAT_H
66#include <sys/stat.h>
67#endif
68#ifdef HAVE_FCNTL_H
69#include <fcntl.h>
70#endif
71#ifdef HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74#ifdef HAVE_ZLIB_H
75#include <zlib.h>
76#endif
77
78
Daniel Veillard21a0f912001-02-25 19:54:14 +000079#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000080#define XML_PARSER_BUFFER_SIZE 100
81
82/*
Owen Taylor3473f882001-02-23 17:55:21 +000083 * List of XML prefixed PI allowed by W3C specs
84 */
85
Daniel Veillardb44025c2001-10-11 22:55:55 +000086static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000087 "xml-stylesheet",
88 NULL
89};
90
91/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000092xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
93 const xmlChar **str);
94
Daniel Veillard257d9102001-05-08 10:41:44 +000095static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000096xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
97 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000098 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000100
Daniel Veillard8107a222002-01-13 14:10:10 +0000101static void
102xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
103 xmlNodePtr lastNode);
104
Owen Taylor3473f882001-02-23 17:55:21 +0000105/************************************************************************
106 * *
107 * Parser stacks related functions and macros *
108 * *
109 ************************************************************************/
110
111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
112 const xmlChar ** str);
113
114/*
115 * Generic function for accessing stacks in the Parser Context
116 */
117
118#define PUSH_AND_POP(scope, type, name) \
119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
120 if (ctxt->name##Nr >= ctxt->name##Max) { \
121 ctxt->name##Max *= 2; \
122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
124 if (ctxt->name##Tab == NULL) { \
125 xmlGenericError(xmlGenericErrorContext, \
126 "realloc failed !\n"); \
127 return(0); \
128 } \
129 } \
130 ctxt->name##Tab[ctxt->name##Nr] = value; \
131 ctxt->name = value; \
132 return(ctxt->name##Nr++); \
133} \
134scope type name##Pop(xmlParserCtxtPtr ctxt) { \
135 type ret; \
136 if (ctxt->name##Nr <= 0) return(0); \
137 ctxt->name##Nr--; \
138 if (ctxt->name##Nr > 0) \
139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
140 else \
141 ctxt->name = NULL; \
142 ret = ctxt->name##Tab[ctxt->name##Nr]; \
143 ctxt->name##Tab[ctxt->name##Nr] = 0; \
144 return(ret); \
145} \
146
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
148 * inputPop:
149 * @ctxt: an XML parser context
150 *
151 * Pops the top parser input from the input stack
152 *
153 * Returns the input just removed
154 */
155/**
156 * inputPush:
157 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000158 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000159 *
160 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 *
162 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000163 */
164/**
165 * namePop:
166 * @ctxt: an XML parser context
167 *
168 * Pops the top element name from the name stack
169 *
170 * Returns the name just removed
171 */
172/**
173 * namePush:
174 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000175 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000176 *
177 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 *
179 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000180 */
181/**
182 * nodePop:
183 * @ctxt: an XML parser context
184 *
185 * Pops the top element node from the node stack
186 *
187 * Returns the node just removed
188 */
189/**
190 * nodePush:
191 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000192 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000193 *
194 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 *
196 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000197 */
Owen Taylor3473f882001-02-23 17:55:21 +0000198/*
199 * Those macros actually generate the functions
200 */
201PUSH_AND_POP(extern, xmlParserInputPtr, input)
202PUSH_AND_POP(extern, xmlNodePtr, node)
203PUSH_AND_POP(extern, xmlChar*, name)
204
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000205static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000206 if (ctxt->spaceNr >= ctxt->spaceMax) {
207 ctxt->spaceMax *= 2;
208 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
209 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
210 if (ctxt->spaceTab == NULL) {
211 xmlGenericError(xmlGenericErrorContext,
212 "realloc failed !\n");
213 return(0);
214 }
215 }
216 ctxt->spaceTab[ctxt->spaceNr] = val;
217 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
218 return(ctxt->spaceNr++);
219}
220
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000221static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000222 int ret;
223 if (ctxt->spaceNr <= 0) return(0);
224 ctxt->spaceNr--;
225 if (ctxt->spaceNr > 0)
226 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
227 else
228 ctxt->space = NULL;
229 ret = ctxt->spaceTab[ctxt->spaceNr];
230 ctxt->spaceTab[ctxt->spaceNr] = -1;
231 return(ret);
232}
233
234/*
235 * Macros for accessing the content. Those should be used only by the parser,
236 * and not exported.
237 *
238 * Dirty macros, i.e. one often need to make assumption on the context to
239 * use them
240 *
241 * CUR_PTR return the current pointer to the xmlChar to be parsed.
242 * To be used with extreme caution since operations consuming
243 * characters may move the input buffer to a different location !
244 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
245 * This should be used internally by the parser
246 * only to compare to ASCII values otherwise it would break when
247 * running with UTF-8 encoding.
248 * RAW same as CUR but in the input buffer, bypass any token
249 * extraction that may have been done
250 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
251 * to compare on ASCII based substring.
252 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
253 * strings within the parser.
254 *
255 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
256 *
257 * NEXT Skip to the next character, this does the proper decoding
258 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000259 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000260 * CUR_CHAR(l) returns the current unicode character (int), set l
261 * to the number of xmlChars used for the encoding [0-5].
262 * CUR_SCHAR same but operate on a string instead of the context
263 * COPY_BUF copy the current unicode char to the target buffer, increment
264 * the index
265 * GROW, SHRINK handling of input buffers
266 */
267
268#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
269#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
270#define NXT(val) ctxt->input->cur[(val)]
271#define CUR_PTR ctxt->input->cur
272
273#define SKIP(val) do { \
274 ctxt->nbChars += (val),ctxt->input->cur += (val); \
275 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000276 if ((*ctxt->input->cur == 0) && \
277 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
278 xmlPopInput(ctxt); \
279 } while (0)
280
Daniel Veillard48b2f892001-02-25 16:11:03 +0000281#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000282 xmlParserInputShrink(ctxt->input); \
283 if ((*ctxt->input->cur == 0) && \
284 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
285 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000286 }
Owen Taylor3473f882001-02-23 17:55:21 +0000287
Daniel Veillard48b2f892001-02-25 16:11:03 +0000288#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000289 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
290 if ((*ctxt->input->cur == 0) && \
291 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
292 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000293 }
Owen Taylor3473f882001-02-23 17:55:21 +0000294
295#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
296
297#define NEXT xmlNextChar(ctxt)
298
Daniel Veillard21a0f912001-02-25 19:54:14 +0000299#define NEXT1 { \
300 ctxt->input->cur++; \
301 ctxt->nbChars++; \
302 if (*ctxt->input->cur == 0) \
303 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
304 }
305
Owen Taylor3473f882001-02-23 17:55:21 +0000306#define NEXTL(l) do { \
307 if (*(ctxt->input->cur) == '\n') { \
308 ctxt->input->line++; ctxt->input->col = 1; \
309 } else ctxt->input->col++; \
310 ctxt->token = 0; ctxt->input->cur += l; \
311 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000312 } while (0)
313
314#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
315#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
316
317#define COPY_BUF(l,b,i,v) \
318 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000319 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000320
321/**
322 * xmlSkipBlankChars:
323 * @ctxt: the XML parser context
324 *
325 * skip all blanks character found at that point in the input streams.
326 * It pops up finished entities in the process if allowable at that point.
327 *
328 * Returns the number of space chars skipped
329 */
330
331int
332xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000333 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000334
Daniel Veillard02141ea2001-04-30 11:46:40 +0000335 if (ctxt->token != 0) {
336 if (!IS_BLANK(ctxt->token))
337 return(0);
338 ctxt->token = 0;
339 res++;
340 }
Owen Taylor3473f882001-02-23 17:55:21 +0000341 /*
342 * It's Okay to use CUR/NEXT here since all the blanks are on
343 * the ASCII range.
344 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000345 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
346 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000347 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000348 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000349 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000350 cur = ctxt->input->cur;
351 while (IS_BLANK(*cur)) {
352 if (*cur == '\n') {
353 ctxt->input->line++; ctxt->input->col = 1;
354 }
355 cur++;
356 res++;
357 if (*cur == 0) {
358 ctxt->input->cur = cur;
359 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
360 cur = ctxt->input->cur;
361 }
362 }
363 ctxt->input->cur = cur;
364 } else {
365 int cur;
366 do {
367 cur = CUR;
368 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
369 NEXT;
370 cur = CUR;
371 res++;
372 }
373 while ((cur == 0) && (ctxt->inputNr > 1) &&
374 (ctxt->instate != XML_PARSER_COMMENT)) {
375 xmlPopInput(ctxt);
376 cur = CUR;
377 }
378 /*
379 * Need to handle support of entities branching here
380 */
381 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
382 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
383 }
Owen Taylor3473f882001-02-23 17:55:21 +0000384 return(res);
385}
386
387/************************************************************************
388 * *
389 * Commodity functions to handle entities *
390 * *
391 ************************************************************************/
392
393/**
394 * xmlPopInput:
395 * @ctxt: an XML parser context
396 *
397 * xmlPopInput: the current input pointed by ctxt->input came to an end
398 * pop it and return the next char.
399 *
400 * Returns the current xmlChar in the parser context
401 */
402xmlChar
403xmlPopInput(xmlParserCtxtPtr ctxt) {
404 if (ctxt->inputNr == 1) return(0); /* End of main Input */
405 if (xmlParserDebugEntities)
406 xmlGenericError(xmlGenericErrorContext,
407 "Popping input %d\n", ctxt->inputNr);
408 xmlFreeInputStream(inputPop(ctxt));
409 if ((*ctxt->input->cur == 0) &&
410 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
411 return(xmlPopInput(ctxt));
412 return(CUR);
413}
414
415/**
416 * xmlPushInput:
417 * @ctxt: an XML parser context
418 * @input: an XML parser input fragment (entity, XML fragment ...).
419 *
420 * xmlPushInput: switch to a new input stream which is stacked on top
421 * of the previous one(s).
422 */
423void
424xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
425 if (input == NULL) return;
426
427 if (xmlParserDebugEntities) {
428 if ((ctxt->input != NULL) && (ctxt->input->filename))
429 xmlGenericError(xmlGenericErrorContext,
430 "%s(%d): ", ctxt->input->filename,
431 ctxt->input->line);
432 xmlGenericError(xmlGenericErrorContext,
433 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
434 }
435 inputPush(ctxt, input);
436 GROW;
437}
438
439/**
440 * xmlParseCharRef:
441 * @ctxt: an XML parser context
442 *
443 * parse Reference declarations
444 *
445 * [66] CharRef ::= '&#' [0-9]+ ';' |
446 * '&#x' [0-9a-fA-F]+ ';'
447 *
448 * [ WFC: Legal Character ]
449 * Characters referred to using character references must match the
450 * production for Char.
451 *
452 * Returns the value parsed (as an int), 0 in case of error
453 */
454int
455xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000456 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000457 int count = 0;
458
459 if (ctxt->token != 0) {
460 val = ctxt->token;
461 ctxt->token = 0;
462 return(val);
463 }
464 /*
465 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
466 */
467 if ((RAW == '&') && (NXT(1) == '#') &&
468 (NXT(2) == 'x')) {
469 SKIP(3);
470 GROW;
471 while (RAW != ';') { /* loop blocked by count */
472 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
473 val = val * 16 + (CUR - '0');
474 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
475 val = val * 16 + (CUR - 'a') + 10;
476 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
477 val = val * 16 + (CUR - 'A') + 10;
478 else {
479 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
480 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
481 ctxt->sax->error(ctxt->userData,
482 "xmlParseCharRef: invalid hexadecimal value\n");
483 ctxt->wellFormed = 0;
484 ctxt->disableSAX = 1;
485 val = 0;
486 break;
487 }
488 NEXT;
489 count++;
490 }
491 if (RAW == ';') {
492 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
493 ctxt->nbChars ++;
494 ctxt->input->cur++;
495 }
496 } else if ((RAW == '&') && (NXT(1) == '#')) {
497 SKIP(2);
498 GROW;
499 while (RAW != ';') { /* loop blocked by count */
500 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
501 val = val * 10 + (CUR - '0');
502 else {
503 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
505 ctxt->sax->error(ctxt->userData,
506 "xmlParseCharRef: invalid decimal value\n");
507 ctxt->wellFormed = 0;
508 ctxt->disableSAX = 1;
509 val = 0;
510 break;
511 }
512 NEXT;
513 count++;
514 }
515 if (RAW == ';') {
516 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
517 ctxt->nbChars ++;
518 ctxt->input->cur++;
519 }
520 } else {
521 ctxt->errNo = XML_ERR_INVALID_CHARREF;
522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
523 ctxt->sax->error(ctxt->userData,
524 "xmlParseCharRef: invalid value\n");
525 ctxt->wellFormed = 0;
526 ctxt->disableSAX = 1;
527 }
528
529 /*
530 * [ WFC: Legal Character ]
531 * Characters referred to using character references must match the
532 * production for Char.
533 */
534 if (IS_CHAR(val)) {
535 return(val);
536 } else {
537 ctxt->errNo = XML_ERR_INVALID_CHAR;
538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000539 ctxt->sax->error(ctxt->userData,
540 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000541 val);
542 ctxt->wellFormed = 0;
543 ctxt->disableSAX = 1;
544 }
545 return(0);
546}
547
548/**
549 * xmlParseStringCharRef:
550 * @ctxt: an XML parser context
551 * @str: a pointer to an index in the string
552 *
553 * parse Reference declarations, variant parsing from a string rather
554 * than an an input flow.
555 *
556 * [66] CharRef ::= '&#' [0-9]+ ';' |
557 * '&#x' [0-9a-fA-F]+ ';'
558 *
559 * [ WFC: Legal Character ]
560 * Characters referred to using character references must match the
561 * production for Char.
562 *
563 * Returns the value parsed (as an int), 0 in case of error, str will be
564 * updated to the current value of the index
565 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000566static int
Owen Taylor3473f882001-02-23 17:55:21 +0000567xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
568 const xmlChar *ptr;
569 xmlChar cur;
570 int val = 0;
571
572 if ((str == NULL) || (*str == NULL)) return(0);
573 ptr = *str;
574 cur = *ptr;
575 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
576 ptr += 3;
577 cur = *ptr;
578 while (cur != ';') { /* Non input consuming loop */
579 if ((cur >= '0') && (cur <= '9'))
580 val = val * 16 + (cur - '0');
581 else if ((cur >= 'a') && (cur <= 'f'))
582 val = val * 16 + (cur - 'a') + 10;
583 else if ((cur >= 'A') && (cur <= 'F'))
584 val = val * 16 + (cur - 'A') + 10;
585 else {
586 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
588 ctxt->sax->error(ctxt->userData,
589 "xmlParseStringCharRef: invalid hexadecimal value\n");
590 ctxt->wellFormed = 0;
591 ctxt->disableSAX = 1;
592 val = 0;
593 break;
594 }
595 ptr++;
596 cur = *ptr;
597 }
598 if (cur == ';')
599 ptr++;
600 } else if ((cur == '&') && (ptr[1] == '#')){
601 ptr += 2;
602 cur = *ptr;
603 while (cur != ';') { /* Non input consuming loops */
604 if ((cur >= '0') && (cur <= '9'))
605 val = val * 10 + (cur - '0');
606 else {
607 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
609 ctxt->sax->error(ctxt->userData,
610 "xmlParseStringCharRef: invalid decimal value\n");
611 ctxt->wellFormed = 0;
612 ctxt->disableSAX = 1;
613 val = 0;
614 break;
615 }
616 ptr++;
617 cur = *ptr;
618 }
619 if (cur == ';')
620 ptr++;
621 } else {
622 ctxt->errNo = XML_ERR_INVALID_CHARREF;
623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
624 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000625 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000626 ctxt->wellFormed = 0;
627 ctxt->disableSAX = 1;
628 return(0);
629 }
630 *str = ptr;
631
632 /*
633 * [ WFC: Legal Character ]
634 * Characters referred to using character references must match the
635 * production for Char.
636 */
637 if (IS_CHAR(val)) {
638 return(val);
639 } else {
640 ctxt->errNo = XML_ERR_INVALID_CHAR;
641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
642 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000643 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000644 ctxt->wellFormed = 0;
645 ctxt->disableSAX = 1;
646 }
647 return(0);
648}
649
650/**
651 * xmlParserHandlePEReference:
652 * @ctxt: the parser context
653 *
654 * [69] PEReference ::= '%' Name ';'
655 *
656 * [ WFC: No Recursion ]
657 * A parsed entity must not contain a recursive
658 * reference to itself, either directly or indirectly.
659 *
660 * [ WFC: Entity Declared ]
661 * In a document without any DTD, a document with only an internal DTD
662 * subset which contains no parameter entity references, or a document
663 * with "standalone='yes'", ... ... The declaration of a parameter
664 * entity must precede any reference to it...
665 *
666 * [ VC: Entity Declared ]
667 * In a document with an external subset or external parameter entities
668 * with "standalone='no'", ... ... The declaration of a parameter entity
669 * must precede any reference to it...
670 *
671 * [ WFC: In DTD ]
672 * Parameter-entity references may only appear in the DTD.
673 * NOTE: misleading but this is handled.
674 *
675 * A PEReference may have been detected in the current input stream
676 * the handling is done accordingly to
677 * http://www.w3.org/TR/REC-xml#entproc
678 * i.e.
679 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000680 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000681 */
682void
683xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
684 xmlChar *name;
685 xmlEntityPtr entity = NULL;
686 xmlParserInputPtr input;
687
688 if (ctxt->token != 0) {
689 return;
690 }
691 if (RAW != '%') return;
692 switch(ctxt->instate) {
693 case XML_PARSER_CDATA_SECTION:
694 return;
695 case XML_PARSER_COMMENT:
696 return;
697 case XML_PARSER_START_TAG:
698 return;
699 case XML_PARSER_END_TAG:
700 return;
701 case XML_PARSER_EOF:
702 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
704 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
705 ctxt->wellFormed = 0;
706 ctxt->disableSAX = 1;
707 return;
708 case XML_PARSER_PROLOG:
709 case XML_PARSER_START:
710 case XML_PARSER_MISC:
711 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
713 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
714 ctxt->wellFormed = 0;
715 ctxt->disableSAX = 1;
716 return;
717 case XML_PARSER_ENTITY_DECL:
718 case XML_PARSER_CONTENT:
719 case XML_PARSER_ATTRIBUTE_VALUE:
720 case XML_PARSER_PI:
721 case XML_PARSER_SYSTEM_LITERAL:
722 /* we just ignore it there */
723 return;
724 case XML_PARSER_EPILOG:
725 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
727 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
728 ctxt->wellFormed = 0;
729 ctxt->disableSAX = 1;
730 return;
731 case XML_PARSER_ENTITY_VALUE:
732 /*
733 * NOTE: in the case of entity values, we don't do the
734 * substitution here since we need the literal
735 * entity value to be able to save the internal
736 * subset of the document.
737 * This will be handled by xmlStringDecodeEntities
738 */
739 return;
740 case XML_PARSER_DTD:
741 /*
742 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
743 * In the internal DTD subset, parameter-entity references
744 * can occur only where markup declarations can occur, not
745 * within markup declarations.
746 * In that case this is handled in xmlParseMarkupDecl
747 */
748 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
749 return;
750 break;
751 case XML_PARSER_IGNORE:
752 return;
753 }
754
755 NEXT;
756 name = xmlParseName(ctxt);
757 if (xmlParserDebugEntities)
758 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000759 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000760 if (name == NULL) {
761 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000763 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000764 ctxt->wellFormed = 0;
765 ctxt->disableSAX = 1;
766 } else {
767 if (RAW == ';') {
768 NEXT;
769 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
770 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
771 if (entity == NULL) {
772
773 /*
774 * [ WFC: Entity Declared ]
775 * In a document without any DTD, a document with only an
776 * internal DTD subset which contains no parameter entity
777 * references, or a document with "standalone='yes'", ...
778 * ... The declaration of a parameter entity must precede
779 * any reference to it...
780 */
781 if ((ctxt->standalone == 1) ||
782 ((ctxt->hasExternalSubset == 0) &&
783 (ctxt->hasPErefs == 0))) {
784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
785 ctxt->sax->error(ctxt->userData,
786 "PEReference: %%%s; not found\n", name);
787 ctxt->wellFormed = 0;
788 ctxt->disableSAX = 1;
789 } else {
790 /*
791 * [ VC: Entity Declared ]
792 * In a document with an external subset or external
793 * parameter entities with "standalone='no'", ...
794 * ... The declaration of a parameter entity must precede
795 * any reference to it...
796 */
797 if ((!ctxt->disableSAX) &&
798 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
799 ctxt->vctxt.error(ctxt->vctxt.userData,
800 "PEReference: %%%s; not found\n", name);
801 } else if ((!ctxt->disableSAX) &&
802 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
803 ctxt->sax->warning(ctxt->userData,
804 "PEReference: %%%s; not found\n", name);
805 ctxt->valid = 0;
806 }
807 } else {
808 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
809 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000810 xmlChar start[4];
811 xmlCharEncoding enc;
812
Owen Taylor3473f882001-02-23 17:55:21 +0000813 /*
814 * handle the extra spaces added before and after
815 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000816 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000817 */
818 input = xmlNewEntityInputStream(ctxt, entity);
819 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000820
821 /*
822 * Get the 4 first bytes and decode the charset
823 * if enc != XML_CHAR_ENCODING_NONE
824 * plug some encoding conversion routines.
825 */
826 GROW
827 start[0] = RAW;
828 start[1] = NXT(1);
829 start[2] = NXT(2);
830 start[3] = NXT(3);
831 enc = xmlDetectCharEncoding(start, 4);
832 if (enc != XML_CHAR_ENCODING_NONE) {
833 xmlSwitchEncoding(ctxt, enc);
834 }
835
Owen Taylor3473f882001-02-23 17:55:21 +0000836 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
837 (RAW == '<') && (NXT(1) == '?') &&
838 (NXT(2) == 'x') && (NXT(3) == 'm') &&
839 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
840 xmlParseTextDecl(ctxt);
841 }
842 if (ctxt->token == 0)
843 ctxt->token = ' ';
844 } else {
845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
846 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000847 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000848 name);
849 ctxt->wellFormed = 0;
850 ctxt->disableSAX = 1;
851 }
852 }
853 } else {
854 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
856 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000857 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000858 ctxt->wellFormed = 0;
859 ctxt->disableSAX = 1;
860 }
861 xmlFree(name);
862 }
863}
864
865/*
866 * Macro used to grow the current buffer.
867 */
868#define growBuffer(buffer) { \
869 buffer##_size *= 2; \
870 buffer = (xmlChar *) \
871 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
872 if (buffer == NULL) { \
873 perror("realloc failed"); \
874 return(NULL); \
875 } \
876}
877
878/**
879 * xmlStringDecodeEntities:
880 * @ctxt: the parser context
881 * @str: the input string
882 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
883 * @end: an end marker xmlChar, 0 if none
884 * @end2: an end marker xmlChar, 0 if none
885 * @end3: an end marker xmlChar, 0 if none
886 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000887 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000888 *
889 * [67] Reference ::= EntityRef | CharRef
890 *
891 * [69] PEReference ::= '%' Name ';'
892 *
893 * Returns A newly allocated string with the substitution done. The caller
894 * must deallocate it !
895 */
896xmlChar *
897xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
898 xmlChar end, xmlChar end2, xmlChar end3) {
899 xmlChar *buffer = NULL;
900 int buffer_size = 0;
901
902 xmlChar *current = NULL;
903 xmlEntityPtr ent;
904 int c,l;
905 int nbchars = 0;
906
907 if (str == NULL)
908 return(NULL);
909
910 if (ctxt->depth > 40) {
911 ctxt->errNo = XML_ERR_ENTITY_LOOP;
912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
913 ctxt->sax->error(ctxt->userData,
914 "Detected entity reference loop\n");
915 ctxt->wellFormed = 0;
916 ctxt->disableSAX = 1;
917 return(NULL);
918 }
919
920 /*
921 * allocate a translation buffer.
922 */
923 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
924 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
925 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000926 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000927 return(NULL);
928 }
929
930 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000931 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000932 * we are operating on already parsed values.
933 */
934 c = CUR_SCHAR(str, l);
935 while ((c != 0) && (c != end) && /* non input consuming loop */
936 (c != end2) && (c != end3)) {
937
938 if (c == 0) break;
939 if ((c == '&') && (str[1] == '#')) {
940 int val = xmlParseStringCharRef(ctxt, &str);
941 if (val != 0) {
942 COPY_BUF(0,buffer,nbchars,val);
943 }
944 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
945 if (xmlParserDebugEntities)
946 xmlGenericError(xmlGenericErrorContext,
947 "String decoding Entity Reference: %.30s\n",
948 str);
949 ent = xmlParseStringEntityRef(ctxt, &str);
950 if ((ent != NULL) &&
951 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
952 if (ent->content != NULL) {
953 COPY_BUF(0,buffer,nbchars,ent->content[0]);
954 } else {
955 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
956 ctxt->sax->error(ctxt->userData,
957 "internal error entity has no content\n");
958 }
959 } else if ((ent != NULL) && (ent->content != NULL)) {
960 xmlChar *rep;
961
962 ctxt->depth++;
963 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
964 0, 0, 0);
965 ctxt->depth--;
966 if (rep != NULL) {
967 current = rep;
968 while (*current != 0) { /* non input consuming loop */
969 buffer[nbchars++] = *current++;
970 if (nbchars >
971 buffer_size - XML_PARSER_BUFFER_SIZE) {
972 growBuffer(buffer);
973 }
974 }
975 xmlFree(rep);
976 }
977 } else if (ent != NULL) {
978 int i = xmlStrlen(ent->name);
979 const xmlChar *cur = ent->name;
980
981 buffer[nbchars++] = '&';
982 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
983 growBuffer(buffer);
984 }
985 for (;i > 0;i--)
986 buffer[nbchars++] = *cur++;
987 buffer[nbchars++] = ';';
988 }
989 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
990 if (xmlParserDebugEntities)
991 xmlGenericError(xmlGenericErrorContext,
992 "String decoding PE Reference: %.30s\n", str);
993 ent = xmlParseStringPEReference(ctxt, &str);
994 if (ent != NULL) {
995 xmlChar *rep;
996
997 ctxt->depth++;
998 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
999 0, 0, 0);
1000 ctxt->depth--;
1001 if (rep != NULL) {
1002 current = rep;
1003 while (*current != 0) { /* non input consuming loop */
1004 buffer[nbchars++] = *current++;
1005 if (nbchars >
1006 buffer_size - XML_PARSER_BUFFER_SIZE) {
1007 growBuffer(buffer);
1008 }
1009 }
1010 xmlFree(rep);
1011 }
1012 }
1013 } else {
1014 COPY_BUF(l,buffer,nbchars,c);
1015 str += l;
1016 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1017 growBuffer(buffer);
1018 }
1019 }
1020 c = CUR_SCHAR(str, l);
1021 }
1022 buffer[nbchars++] = 0;
1023 return(buffer);
1024}
1025
1026
1027/************************************************************************
1028 * *
1029 * Commodity functions to handle xmlChars *
1030 * *
1031 ************************************************************************/
1032
1033/**
1034 * xmlStrndup:
1035 * @cur: the input xmlChar *
1036 * @len: the len of @cur
1037 *
1038 * a strndup for array of xmlChar's
1039 *
1040 * Returns a new xmlChar * or NULL
1041 */
1042xmlChar *
1043xmlStrndup(const xmlChar *cur, int len) {
1044 xmlChar *ret;
1045
1046 if ((cur == NULL) || (len < 0)) return(NULL);
1047 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1048 if (ret == NULL) {
1049 xmlGenericError(xmlGenericErrorContext,
1050 "malloc of %ld byte failed\n",
1051 (len + 1) * (long)sizeof(xmlChar));
1052 return(NULL);
1053 }
1054 memcpy(ret, cur, len * sizeof(xmlChar));
1055 ret[len] = 0;
1056 return(ret);
1057}
1058
1059/**
1060 * xmlStrdup:
1061 * @cur: the input xmlChar *
1062 *
1063 * a strdup for array of xmlChar's. Since they are supposed to be
1064 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1065 * a termination mark of '0'.
1066 *
1067 * Returns a new xmlChar * or NULL
1068 */
1069xmlChar *
1070xmlStrdup(const xmlChar *cur) {
1071 const xmlChar *p = cur;
1072
1073 if (cur == NULL) return(NULL);
1074 while (*p != 0) p++; /* non input consuming */
1075 return(xmlStrndup(cur, p - cur));
1076}
1077
1078/**
1079 * xmlCharStrndup:
1080 * @cur: the input char *
1081 * @len: the len of @cur
1082 *
1083 * a strndup for char's to xmlChar's
1084 *
1085 * Returns a new xmlChar * or NULL
1086 */
1087
1088xmlChar *
1089xmlCharStrndup(const char *cur, int len) {
1090 int i;
1091 xmlChar *ret;
1092
1093 if ((cur == NULL) || (len < 0)) return(NULL);
1094 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1095 if (ret == NULL) {
1096 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1097 (len + 1) * (long)sizeof(xmlChar));
1098 return(NULL);
1099 }
1100 for (i = 0;i < len;i++)
1101 ret[i] = (xmlChar) cur[i];
1102 ret[len] = 0;
1103 return(ret);
1104}
1105
1106/**
1107 * xmlCharStrdup:
1108 * @cur: the input char *
1109 * @len: the len of @cur
1110 *
1111 * a strdup for char's to xmlChar's
1112 *
1113 * Returns a new xmlChar * or NULL
1114 */
1115
1116xmlChar *
1117xmlCharStrdup(const char *cur) {
1118 const char *p = cur;
1119
1120 if (cur == NULL) return(NULL);
1121 while (*p != '\0') p++; /* non input consuming */
1122 return(xmlCharStrndup(cur, p - cur));
1123}
1124
1125/**
1126 * xmlStrcmp:
1127 * @str1: the first xmlChar *
1128 * @str2: the second xmlChar *
1129 *
1130 * a strcmp for xmlChar's
1131 *
1132 * Returns the integer result of the comparison
1133 */
1134
1135int
1136xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1137 register int tmp;
1138
1139 if (str1 == str2) return(0);
1140 if (str1 == NULL) return(-1);
1141 if (str2 == NULL) return(1);
1142 do {
1143 tmp = *str1++ - *str2;
1144 if (tmp != 0) return(tmp);
1145 } while (*str2++ != 0);
1146 return 0;
1147}
1148
1149/**
1150 * xmlStrEqual:
1151 * @str1: the first xmlChar *
1152 * @str2: the second xmlChar *
1153 *
1154 * Check if both string are equal of have same content
1155 * Should be a bit more readable and faster than xmlStrEqual()
1156 *
1157 * Returns 1 if they are equal, 0 if they are different
1158 */
1159
1160int
1161xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1162 if (str1 == str2) return(1);
1163 if (str1 == NULL) return(0);
1164 if (str2 == NULL) return(0);
1165 do {
1166 if (*str1++ != *str2) return(0);
1167 } while (*str2++);
1168 return(1);
1169}
1170
1171/**
1172 * xmlStrncmp:
1173 * @str1: the first xmlChar *
1174 * @str2: the second xmlChar *
1175 * @len: the max comparison length
1176 *
1177 * a strncmp for xmlChar's
1178 *
1179 * Returns the integer result of the comparison
1180 */
1181
1182int
1183xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1184 register int tmp;
1185
1186 if (len <= 0) return(0);
1187 if (str1 == str2) return(0);
1188 if (str1 == NULL) return(-1);
1189 if (str2 == NULL) return(1);
1190 do {
1191 tmp = *str1++ - *str2;
1192 if (tmp != 0 || --len == 0) return(tmp);
1193 } while (*str2++ != 0);
1194 return 0;
1195}
1196
Daniel Veillardb44025c2001-10-11 22:55:55 +00001197static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001198 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1199 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1200 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1201 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1202 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1203 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1204 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1205 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1206 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1207 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1208 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1209 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1210 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1211 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1212 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1213 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1214 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1215 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1216 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1217 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1218 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1219 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1220 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1221 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1222 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1223 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1224 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1225 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1226 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1227 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1228 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1229 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1230};
1231
1232/**
1233 * xmlStrcasecmp:
1234 * @str1: the first xmlChar *
1235 * @str2: the second xmlChar *
1236 *
1237 * a strcasecmp for xmlChar's
1238 *
1239 * Returns the integer result of the comparison
1240 */
1241
1242int
1243xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1244 register int tmp;
1245
1246 if (str1 == str2) return(0);
1247 if (str1 == NULL) return(-1);
1248 if (str2 == NULL) return(1);
1249 do {
1250 tmp = casemap[*str1++] - casemap[*str2];
1251 if (tmp != 0) return(tmp);
1252 } while (*str2++ != 0);
1253 return 0;
1254}
1255
1256/**
1257 * xmlStrncasecmp:
1258 * @str1: the first xmlChar *
1259 * @str2: the second xmlChar *
1260 * @len: the max comparison length
1261 *
1262 * a strncasecmp for xmlChar's
1263 *
1264 * Returns the integer result of the comparison
1265 */
1266
1267int
1268xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1269 register int tmp;
1270
1271 if (len <= 0) return(0);
1272 if (str1 == str2) return(0);
1273 if (str1 == NULL) return(-1);
1274 if (str2 == NULL) return(1);
1275 do {
1276 tmp = casemap[*str1++] - casemap[*str2];
1277 if (tmp != 0 || --len == 0) return(tmp);
1278 } while (*str2++ != 0);
1279 return 0;
1280}
1281
1282/**
1283 * xmlStrchr:
1284 * @str: the xmlChar * array
1285 * @val: the xmlChar to search
1286 *
1287 * a strchr for xmlChar's
1288 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001289 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001290 */
1291
1292const xmlChar *
1293xmlStrchr(const xmlChar *str, xmlChar val) {
1294 if (str == NULL) return(NULL);
1295 while (*str != 0) { /* non input consuming */
1296 if (*str == val) return((xmlChar *) str);
1297 str++;
1298 }
1299 return(NULL);
1300}
1301
1302/**
1303 * xmlStrstr:
1304 * @str: the xmlChar * array (haystack)
1305 * @val: the xmlChar to search (needle)
1306 *
1307 * a strstr for xmlChar's
1308 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001309 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001310 */
1311
1312const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001313xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001314 int n;
1315
1316 if (str == NULL) return(NULL);
1317 if (val == NULL) return(NULL);
1318 n = xmlStrlen(val);
1319
1320 if (n == 0) return(str);
1321 while (*str != 0) { /* non input consuming */
1322 if (*str == *val) {
1323 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1324 }
1325 str++;
1326 }
1327 return(NULL);
1328}
1329
1330/**
1331 * xmlStrcasestr:
1332 * @str: the xmlChar * array (haystack)
1333 * @val: the xmlChar to search (needle)
1334 *
1335 * a case-ignoring strstr for xmlChar's
1336 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001337 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001338 */
1339
1340const xmlChar *
1341xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1342 int n;
1343
1344 if (str == NULL) return(NULL);
1345 if (val == NULL) return(NULL);
1346 n = xmlStrlen(val);
1347
1348 if (n == 0) return(str);
1349 while (*str != 0) { /* non input consuming */
1350 if (casemap[*str] == casemap[*val])
1351 if (!xmlStrncasecmp(str, val, n)) return(str);
1352 str++;
1353 }
1354 return(NULL);
1355}
1356
1357/**
1358 * xmlStrsub:
1359 * @str: the xmlChar * array (haystack)
1360 * @start: the index of the first char (zero based)
1361 * @len: the length of the substring
1362 *
1363 * Extract a substring of a given string
1364 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001365 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001366 */
1367
1368xmlChar *
1369xmlStrsub(const xmlChar *str, int start, int len) {
1370 int i;
1371
1372 if (str == NULL) return(NULL);
1373 if (start < 0) return(NULL);
1374 if (len < 0) return(NULL);
1375
1376 for (i = 0;i < start;i++) {
1377 if (*str == 0) return(NULL);
1378 str++;
1379 }
1380 if (*str == 0) return(NULL);
1381 return(xmlStrndup(str, len));
1382}
1383
1384/**
1385 * xmlStrlen:
1386 * @str: the xmlChar * array
1387 *
1388 * length of a xmlChar's string
1389 *
1390 * Returns the number of xmlChar contained in the ARRAY.
1391 */
1392
1393int
1394xmlStrlen(const xmlChar *str) {
1395 int len = 0;
1396
1397 if (str == NULL) return(0);
1398 while (*str != 0) { /* non input consuming */
1399 str++;
1400 len++;
1401 }
1402 return(len);
1403}
1404
1405/**
1406 * xmlStrncat:
1407 * @cur: the original xmlChar * array
1408 * @add: the xmlChar * array added
1409 * @len: the length of @add
1410 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001411 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001412 * first bytes of @add.
1413 *
1414 * Returns a new xmlChar *, the original @cur is reallocated if needed
1415 * and should not be freed
1416 */
1417
1418xmlChar *
1419xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1420 int size;
1421 xmlChar *ret;
1422
1423 if ((add == NULL) || (len == 0))
1424 return(cur);
1425 if (cur == NULL)
1426 return(xmlStrndup(add, len));
1427
1428 size = xmlStrlen(cur);
1429 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1430 if (ret == NULL) {
1431 xmlGenericError(xmlGenericErrorContext,
1432 "xmlStrncat: realloc of %ld byte failed\n",
1433 (size + len + 1) * (long)sizeof(xmlChar));
1434 return(cur);
1435 }
1436 memcpy(&ret[size], add, len * sizeof(xmlChar));
1437 ret[size + len] = 0;
1438 return(ret);
1439}
1440
1441/**
1442 * xmlStrcat:
1443 * @cur: the original xmlChar * array
1444 * @add: the xmlChar * array added
1445 *
1446 * a strcat for array of xmlChar's. Since they are supposed to be
1447 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1448 * a termination mark of '0'.
1449 *
1450 * Returns a new xmlChar * containing the concatenated string.
1451 */
1452xmlChar *
1453xmlStrcat(xmlChar *cur, const xmlChar *add) {
1454 const xmlChar *p = add;
1455
1456 if (add == NULL) return(cur);
1457 if (cur == NULL)
1458 return(xmlStrdup(add));
1459
1460 while (*p != 0) p++; /* non input consuming */
1461 return(xmlStrncat(cur, add, p - add));
1462}
1463
1464/************************************************************************
1465 * *
1466 * Commodity functions, cleanup needed ? *
1467 * *
1468 ************************************************************************/
1469
1470/**
1471 * areBlanks:
1472 * @ctxt: an XML parser context
1473 * @str: a xmlChar *
1474 * @len: the size of @str
1475 *
1476 * Is this a sequence of blank chars that one can ignore ?
1477 *
1478 * Returns 1 if ignorable 0 otherwise.
1479 */
1480
1481static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1482 int i, ret;
1483 xmlNodePtr lastChild;
1484
Daniel Veillard05c13a22001-09-09 08:38:09 +00001485 /*
1486 * Don't spend time trying to differentiate them, the same callback is
1487 * used !
1488 */
1489 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001490 return(0);
1491
Owen Taylor3473f882001-02-23 17:55:21 +00001492 /*
1493 * Check for xml:space value.
1494 */
1495 if (*(ctxt->space) == 1)
1496 return(0);
1497
1498 /*
1499 * Check that the string is made of blanks
1500 */
1501 for (i = 0;i < len;i++)
1502 if (!(IS_BLANK(str[i]))) return(0);
1503
1504 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001505 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001506 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001507 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001508 if (ctxt->myDoc != NULL) {
1509 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1510 if (ret == 0) return(1);
1511 if (ret == 1) return(0);
1512 }
1513
1514 /*
1515 * Otherwise, heuristic :-\
1516 */
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001518 if ((ctxt->node->children == NULL) &&
1519 (RAW == '<') && (NXT(1) == '/')) return(0);
1520
1521 lastChild = xmlGetLastChild(ctxt->node);
1522 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001523 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1524 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001525 } else if (xmlNodeIsText(lastChild))
1526 return(0);
1527 else if ((ctxt->node->children != NULL) &&
1528 (xmlNodeIsText(ctxt->node->children)))
1529 return(0);
1530 return(1);
1531}
1532
Owen Taylor3473f882001-02-23 17:55:21 +00001533/************************************************************************
1534 * *
1535 * Extra stuff for namespace support *
1536 * Relates to http://www.w3.org/TR/WD-xml-names *
1537 * *
1538 ************************************************************************/
1539
1540/**
1541 * xmlSplitQName:
1542 * @ctxt: an XML parser context
1543 * @name: an XML parser context
1544 * @prefix: a xmlChar **
1545 *
1546 * parse an UTF8 encoded XML qualified name string
1547 *
1548 * [NS 5] QName ::= (Prefix ':')? LocalPart
1549 *
1550 * [NS 6] Prefix ::= NCName
1551 *
1552 * [NS 7] LocalPart ::= NCName
1553 *
1554 * Returns the local part, and prefix is updated
1555 * to get the Prefix if any.
1556 */
1557
1558xmlChar *
1559xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1560 xmlChar buf[XML_MAX_NAMELEN + 5];
1561 xmlChar *buffer = NULL;
1562 int len = 0;
1563 int max = XML_MAX_NAMELEN;
1564 xmlChar *ret = NULL;
1565 const xmlChar *cur = name;
1566 int c;
1567
1568 *prefix = NULL;
1569
1570#ifndef XML_XML_NAMESPACE
1571 /* xml: prefix is not really a namespace */
1572 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1573 (cur[2] == 'l') && (cur[3] == ':'))
1574 return(xmlStrdup(name));
1575#endif
1576
1577 /* nasty but valid */
1578 if (cur[0] == ':')
1579 return(xmlStrdup(name));
1580
1581 c = *cur++;
1582 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1583 buf[len++] = c;
1584 c = *cur++;
1585 }
1586 if (len >= max) {
1587 /*
1588 * Okay someone managed to make a huge name, so he's ready to pay
1589 * for the processing speed.
1590 */
1591 max = len * 2;
1592
1593 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1594 if (buffer == NULL) {
1595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1596 ctxt->sax->error(ctxt->userData,
1597 "xmlSplitQName: out of memory\n");
1598 return(NULL);
1599 }
1600 memcpy(buffer, buf, len);
1601 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1602 if (len + 10 > max) {
1603 max *= 2;
1604 buffer = (xmlChar *) xmlRealloc(buffer,
1605 max * sizeof(xmlChar));
1606 if (buffer == NULL) {
1607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1608 ctxt->sax->error(ctxt->userData,
1609 "xmlSplitQName: out of memory\n");
1610 return(NULL);
1611 }
1612 }
1613 buffer[len++] = c;
1614 c = *cur++;
1615 }
1616 buffer[len] = 0;
1617 }
1618
1619 if (buffer == NULL)
1620 ret = xmlStrndup(buf, len);
1621 else {
1622 ret = buffer;
1623 buffer = NULL;
1624 max = XML_MAX_NAMELEN;
1625 }
1626
1627
1628 if (c == ':') {
1629 c = *cur++;
1630 if (c == 0) return(ret);
1631 *prefix = ret;
1632 len = 0;
1633
1634 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1635 buf[len++] = c;
1636 c = *cur++;
1637 }
1638 if (len >= max) {
1639 /*
1640 * Okay someone managed to make a huge name, so he's ready to pay
1641 * for the processing speed.
1642 */
1643 max = len * 2;
1644
1645 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1646 if (buffer == NULL) {
1647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1648 ctxt->sax->error(ctxt->userData,
1649 "xmlSplitQName: out of memory\n");
1650 return(NULL);
1651 }
1652 memcpy(buffer, buf, len);
1653 while (c != 0) { /* tested bigname2.xml */
1654 if (len + 10 > max) {
1655 max *= 2;
1656 buffer = (xmlChar *) xmlRealloc(buffer,
1657 max * sizeof(xmlChar));
1658 if (buffer == NULL) {
1659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1660 ctxt->sax->error(ctxt->userData,
1661 "xmlSplitQName: out of memory\n");
1662 return(NULL);
1663 }
1664 }
1665 buffer[len++] = c;
1666 c = *cur++;
1667 }
1668 buffer[len] = 0;
1669 }
1670
1671 if (buffer == NULL)
1672 ret = xmlStrndup(buf, len);
1673 else {
1674 ret = buffer;
1675 }
1676 }
1677
1678 return(ret);
1679}
1680
1681/************************************************************************
1682 * *
1683 * The parser itself *
1684 * Relates to http://www.w3.org/TR/REC-xml *
1685 * *
1686 ************************************************************************/
1687
Daniel Veillard76d66f42001-05-16 21:05:17 +00001688static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001689/**
1690 * xmlParseName:
1691 * @ctxt: an XML parser context
1692 *
1693 * parse an XML name.
1694 *
1695 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1696 * CombiningChar | Extender
1697 *
1698 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1699 *
1700 * [6] Names ::= Name (S Name)*
1701 *
1702 * Returns the Name parsed or NULL
1703 */
1704
1705xmlChar *
1706xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001707 const xmlChar *in;
1708 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001709 int count = 0;
1710
1711 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001712
1713 /*
1714 * Accelerator for simple ASCII names
1715 */
1716 in = ctxt->input->cur;
1717 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1718 ((*in >= 0x41) && (*in <= 0x5A)) ||
1719 (*in == '_') || (*in == ':')) {
1720 in++;
1721 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1722 ((*in >= 0x41) && (*in <= 0x5A)) ||
1723 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001724 (*in == '_') || (*in == '-') ||
1725 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001726 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001727 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001728 count = in - ctxt->input->cur;
1729 ret = xmlStrndup(ctxt->input->cur, count);
1730 ctxt->input->cur = in;
1731 return(ret);
1732 }
1733 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001734 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001735}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001736
Daniel Veillard76d66f42001-05-16 21:05:17 +00001737static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001738xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1739 xmlChar buf[XML_MAX_NAMELEN + 5];
1740 int len = 0, l;
1741 int c;
1742 int count = 0;
1743
1744 /*
1745 * Handler for more complex cases
1746 */
1747 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001748 c = CUR_CHAR(l);
1749 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1750 (!IS_LETTER(c) && (c != '_') &&
1751 (c != ':'))) {
1752 return(NULL);
1753 }
1754
1755 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1756 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1757 (c == '.') || (c == '-') ||
1758 (c == '_') || (c == ':') ||
1759 (IS_COMBINING(c)) ||
1760 (IS_EXTENDER(c)))) {
1761 if (count++ > 100) {
1762 count = 0;
1763 GROW;
1764 }
1765 COPY_BUF(l,buf,len,c);
1766 NEXTL(l);
1767 c = CUR_CHAR(l);
1768 if (len >= XML_MAX_NAMELEN) {
1769 /*
1770 * Okay someone managed to make a huge name, so he's ready to pay
1771 * for the processing speed.
1772 */
1773 xmlChar *buffer;
1774 int max = len * 2;
1775
1776 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1777 if (buffer == NULL) {
1778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1779 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001780 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001781 return(NULL);
1782 }
1783 memcpy(buffer, buf, len);
1784 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1785 (c == '.') || (c == '-') ||
1786 (c == '_') || (c == ':') ||
1787 (IS_COMBINING(c)) ||
1788 (IS_EXTENDER(c))) {
1789 if (count++ > 100) {
1790 count = 0;
1791 GROW;
1792 }
1793 if (len + 10 > max) {
1794 max *= 2;
1795 buffer = (xmlChar *) xmlRealloc(buffer,
1796 max * sizeof(xmlChar));
1797 if (buffer == NULL) {
1798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1799 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001800 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001801 return(NULL);
1802 }
1803 }
1804 COPY_BUF(l,buffer,len,c);
1805 NEXTL(l);
1806 c = CUR_CHAR(l);
1807 }
1808 buffer[len] = 0;
1809 return(buffer);
1810 }
1811 }
1812 return(xmlStrndup(buf, len));
1813}
1814
1815/**
1816 * xmlParseStringName:
1817 * @ctxt: an XML parser context
1818 * @str: a pointer to the string pointer (IN/OUT)
1819 *
1820 * parse an XML name.
1821 *
1822 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1823 * CombiningChar | Extender
1824 *
1825 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1826 *
1827 * [6] Names ::= Name (S Name)*
1828 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001829 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001830 * is updated to the current location in the string.
1831 */
1832
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001833static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001834xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1835 xmlChar buf[XML_MAX_NAMELEN + 5];
1836 const xmlChar *cur = *str;
1837 int len = 0, l;
1838 int c;
1839
1840 c = CUR_SCHAR(cur, l);
1841 if (!IS_LETTER(c) && (c != '_') &&
1842 (c != ':')) {
1843 return(NULL);
1844 }
1845
1846 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1847 (c == '.') || (c == '-') ||
1848 (c == '_') || (c == ':') ||
1849 (IS_COMBINING(c)) ||
1850 (IS_EXTENDER(c))) {
1851 COPY_BUF(l,buf,len,c);
1852 cur += l;
1853 c = CUR_SCHAR(cur, l);
1854 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1855 /*
1856 * Okay someone managed to make a huge name, so he's ready to pay
1857 * for the processing speed.
1858 */
1859 xmlChar *buffer;
1860 int max = len * 2;
1861
1862 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1863 if (buffer == NULL) {
1864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1865 ctxt->sax->error(ctxt->userData,
1866 "xmlParseStringName: out of memory\n");
1867 return(NULL);
1868 }
1869 memcpy(buffer, buf, len);
1870 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1871 (c == '.') || (c == '-') ||
1872 (c == '_') || (c == ':') ||
1873 (IS_COMBINING(c)) ||
1874 (IS_EXTENDER(c))) {
1875 if (len + 10 > max) {
1876 max *= 2;
1877 buffer = (xmlChar *) xmlRealloc(buffer,
1878 max * sizeof(xmlChar));
1879 if (buffer == NULL) {
1880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1881 ctxt->sax->error(ctxt->userData,
1882 "xmlParseStringName: out of memory\n");
1883 return(NULL);
1884 }
1885 }
1886 COPY_BUF(l,buffer,len,c);
1887 cur += l;
1888 c = CUR_SCHAR(cur, l);
1889 }
1890 buffer[len] = 0;
1891 *str = cur;
1892 return(buffer);
1893 }
1894 }
1895 *str = cur;
1896 return(xmlStrndup(buf, len));
1897}
1898
1899/**
1900 * xmlParseNmtoken:
1901 * @ctxt: an XML parser context
1902 *
1903 * parse an XML Nmtoken.
1904 *
1905 * [7] Nmtoken ::= (NameChar)+
1906 *
1907 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1908 *
1909 * Returns the Nmtoken parsed or NULL
1910 */
1911
1912xmlChar *
1913xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1914 xmlChar buf[XML_MAX_NAMELEN + 5];
1915 int len = 0, l;
1916 int c;
1917 int count = 0;
1918
1919 GROW;
1920 c = CUR_CHAR(l);
1921
1922 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1923 (c == '.') || (c == '-') ||
1924 (c == '_') || (c == ':') ||
1925 (IS_COMBINING(c)) ||
1926 (IS_EXTENDER(c))) {
1927 if (count++ > 100) {
1928 count = 0;
1929 GROW;
1930 }
1931 COPY_BUF(l,buf,len,c);
1932 NEXTL(l);
1933 c = CUR_CHAR(l);
1934 if (len >= XML_MAX_NAMELEN) {
1935 /*
1936 * Okay someone managed to make a huge token, so he's ready to pay
1937 * for the processing speed.
1938 */
1939 xmlChar *buffer;
1940 int max = len * 2;
1941
1942 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1943 if (buffer == NULL) {
1944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1945 ctxt->sax->error(ctxt->userData,
1946 "xmlParseNmtoken: out of memory\n");
1947 return(NULL);
1948 }
1949 memcpy(buffer, buf, len);
1950 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1951 (c == '.') || (c == '-') ||
1952 (c == '_') || (c == ':') ||
1953 (IS_COMBINING(c)) ||
1954 (IS_EXTENDER(c))) {
1955 if (count++ > 100) {
1956 count = 0;
1957 GROW;
1958 }
1959 if (len + 10 > max) {
1960 max *= 2;
1961 buffer = (xmlChar *) xmlRealloc(buffer,
1962 max * sizeof(xmlChar));
1963 if (buffer == NULL) {
1964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1965 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001966 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001967 return(NULL);
1968 }
1969 }
1970 COPY_BUF(l,buffer,len,c);
1971 NEXTL(l);
1972 c = CUR_CHAR(l);
1973 }
1974 buffer[len] = 0;
1975 return(buffer);
1976 }
1977 }
1978 if (len == 0)
1979 return(NULL);
1980 return(xmlStrndup(buf, len));
1981}
1982
1983/**
1984 * xmlParseEntityValue:
1985 * @ctxt: an XML parser context
1986 * @orig: if non-NULL store a copy of the original entity value
1987 *
1988 * parse a value for ENTITY declarations
1989 *
1990 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1991 * "'" ([^%&'] | PEReference | Reference)* "'"
1992 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001993 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00001994 */
1995
1996xmlChar *
1997xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1998 xmlChar *buf = NULL;
1999 int len = 0;
2000 int size = XML_PARSER_BUFFER_SIZE;
2001 int c, l;
2002 xmlChar stop;
2003 xmlChar *ret = NULL;
2004 const xmlChar *cur = NULL;
2005 xmlParserInputPtr input;
2006
2007 if (RAW == '"') stop = '"';
2008 else if (RAW == '\'') stop = '\'';
2009 else {
2010 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2012 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2013 ctxt->wellFormed = 0;
2014 ctxt->disableSAX = 1;
2015 return(NULL);
2016 }
2017 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2018 if (buf == NULL) {
2019 xmlGenericError(xmlGenericErrorContext,
2020 "malloc of %d byte failed\n", size);
2021 return(NULL);
2022 }
2023
2024 /*
2025 * The content of the entity definition is copied in a buffer.
2026 */
2027
2028 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2029 input = ctxt->input;
2030 GROW;
2031 NEXT;
2032 c = CUR_CHAR(l);
2033 /*
2034 * NOTE: 4.4.5 Included in Literal
2035 * When a parameter entity reference appears in a literal entity
2036 * value, ... a single or double quote character in the replacement
2037 * text is always treated as a normal data character and will not
2038 * terminate the literal.
2039 * In practice it means we stop the loop only when back at parsing
2040 * the initial entity and the quote is found
2041 */
2042 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2043 (ctxt->input != input))) {
2044 if (len + 5 >= size) {
2045 size *= 2;
2046 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2047 if (buf == NULL) {
2048 xmlGenericError(xmlGenericErrorContext,
2049 "realloc of %d byte failed\n", size);
2050 return(NULL);
2051 }
2052 }
2053 COPY_BUF(l,buf,len,c);
2054 NEXTL(l);
2055 /*
2056 * Pop-up of finished entities.
2057 */
2058 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2059 xmlPopInput(ctxt);
2060
2061 GROW;
2062 c = CUR_CHAR(l);
2063 if (c == 0) {
2064 GROW;
2065 c = CUR_CHAR(l);
2066 }
2067 }
2068 buf[len] = 0;
2069
2070 /*
2071 * Raise problem w.r.t. '&' and '%' being used in non-entities
2072 * reference constructs. Note Charref will be handled in
2073 * xmlStringDecodeEntities()
2074 */
2075 cur = buf;
2076 while (*cur != 0) { /* non input consuming */
2077 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2078 xmlChar *name;
2079 xmlChar tmp = *cur;
2080
2081 cur++;
2082 name = xmlParseStringName(ctxt, &cur);
2083 if ((name == NULL) || (*cur != ';')) {
2084 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2086 ctxt->sax->error(ctxt->userData,
2087 "EntityValue: '%c' forbidden except for entities references\n",
2088 tmp);
2089 ctxt->wellFormed = 0;
2090 ctxt->disableSAX = 1;
2091 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002092 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2093 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002094 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2096 ctxt->sax->error(ctxt->userData,
2097 "EntityValue: PEReferences forbidden in internal subset\n",
2098 tmp);
2099 ctxt->wellFormed = 0;
2100 ctxt->disableSAX = 1;
2101 }
2102 if (name != NULL)
2103 xmlFree(name);
2104 }
2105 cur++;
2106 }
2107
2108 /*
2109 * Then PEReference entities are substituted.
2110 */
2111 if (c != stop) {
2112 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2114 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2115 ctxt->wellFormed = 0;
2116 ctxt->disableSAX = 1;
2117 xmlFree(buf);
2118 } else {
2119 NEXT;
2120 /*
2121 * NOTE: 4.4.7 Bypassed
2122 * When a general entity reference appears in the EntityValue in
2123 * an entity declaration, it is bypassed and left as is.
2124 * so XML_SUBSTITUTE_REF is not set here.
2125 */
2126 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2127 0, 0, 0);
2128 if (orig != NULL)
2129 *orig = buf;
2130 else
2131 xmlFree(buf);
2132 }
2133
2134 return(ret);
2135}
2136
2137/**
2138 * xmlParseAttValue:
2139 * @ctxt: an XML parser context
2140 *
2141 * parse a value for an attribute
2142 * Note: the parser won't do substitution of entities here, this
2143 * will be handled later in xmlStringGetNodeList
2144 *
2145 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2146 * "'" ([^<&'] | Reference)* "'"
2147 *
2148 * 3.3.3 Attribute-Value Normalization:
2149 * Before the value of an attribute is passed to the application or
2150 * checked for validity, the XML processor must normalize it as follows:
2151 * - a character reference is processed by appending the referenced
2152 * character to the attribute value
2153 * - an entity reference is processed by recursively processing the
2154 * replacement text of the entity
2155 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2156 * appending #x20 to the normalized value, except that only a single
2157 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2158 * parsed entity or the literal entity value of an internal parsed entity
2159 * - other characters are processed by appending them to the normalized value
2160 * If the declared value is not CDATA, then the XML processor must further
2161 * process the normalized attribute value by discarding any leading and
2162 * trailing space (#x20) characters, and by replacing sequences of space
2163 * (#x20) characters by a single space (#x20) character.
2164 * All attributes for which no declaration has been read should be treated
2165 * by a non-validating parser as if declared CDATA.
2166 *
2167 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2168 */
2169
2170xmlChar *
2171xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2172 xmlChar limit = 0;
2173 xmlChar *buf = NULL;
2174 int len = 0;
2175 int buf_size = 0;
2176 int c, l;
2177 xmlChar *current = NULL;
2178 xmlEntityPtr ent;
2179
2180
2181 SHRINK;
2182 if (NXT(0) == '"') {
2183 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2184 limit = '"';
2185 NEXT;
2186 } else if (NXT(0) == '\'') {
2187 limit = '\'';
2188 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2189 NEXT;
2190 } else {
2191 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2193 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2194 ctxt->wellFormed = 0;
2195 ctxt->disableSAX = 1;
2196 return(NULL);
2197 }
2198
2199 /*
2200 * allocate a translation buffer.
2201 */
2202 buf_size = XML_PARSER_BUFFER_SIZE;
2203 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2204 if (buf == NULL) {
2205 perror("xmlParseAttValue: malloc failed");
2206 return(NULL);
2207 }
2208
2209 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002210 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002211 */
2212 c = CUR_CHAR(l);
2213 while (((NXT(0) != limit) && /* checked */
2214 (c != '<')) || (ctxt->token != 0)) {
2215 if (c == 0) break;
2216 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002217 if (ctxt->replaceEntities) {
2218 if (len > buf_size - 10) {
2219 growBuffer(buf);
2220 }
2221 buf[len++] = '&';
2222 } else {
2223 /*
2224 * The reparsing will be done in xmlStringGetNodeList()
2225 * called by the attribute() function in SAX.c
2226 */
2227 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002228
Daniel Veillard319a7422001-09-11 09:27:09 +00002229 if (len > buf_size - 10) {
2230 growBuffer(buf);
2231 }
2232 current = &buffer[0];
2233 while (*current != 0) { /* non input consuming */
2234 buf[len++] = *current++;
2235 }
2236 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002237 }
Owen Taylor3473f882001-02-23 17:55:21 +00002238 } else if (c == '&') {
2239 if (NXT(1) == '#') {
2240 int val = xmlParseCharRef(ctxt);
2241 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002242 if (ctxt->replaceEntities) {
2243 if (len > buf_size - 10) {
2244 growBuffer(buf);
2245 }
2246 buf[len++] = '&';
2247 } else {
2248 /*
2249 * The reparsing will be done in xmlStringGetNodeList()
2250 * called by the attribute() function in SAX.c
2251 */
2252 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002253
Daniel Veillard319a7422001-09-11 09:27:09 +00002254 if (len > buf_size - 10) {
2255 growBuffer(buf);
2256 }
2257 current = &buffer[0];
2258 while (*current != 0) { /* non input consuming */
2259 buf[len++] = *current++;
2260 }
Owen Taylor3473f882001-02-23 17:55:21 +00002261 }
2262 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002263 if (len > buf_size - 10) {
2264 growBuffer(buf);
2265 }
Owen Taylor3473f882001-02-23 17:55:21 +00002266 len += xmlCopyChar(0, &buf[len], val);
2267 }
2268 } else {
2269 ent = xmlParseEntityRef(ctxt);
2270 if ((ent != NULL) &&
2271 (ctxt->replaceEntities != 0)) {
2272 xmlChar *rep;
2273
2274 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2275 rep = xmlStringDecodeEntities(ctxt, ent->content,
2276 XML_SUBSTITUTE_REF, 0, 0, 0);
2277 if (rep != NULL) {
2278 current = rep;
2279 while (*current != 0) { /* non input consuming */
2280 buf[len++] = *current++;
2281 if (len > buf_size - 10) {
2282 growBuffer(buf);
2283 }
2284 }
2285 xmlFree(rep);
2286 }
2287 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002288 if (len > buf_size - 10) {
2289 growBuffer(buf);
2290 }
Owen Taylor3473f882001-02-23 17:55:21 +00002291 if (ent->content != NULL)
2292 buf[len++] = ent->content[0];
2293 }
2294 } else if (ent != NULL) {
2295 int i = xmlStrlen(ent->name);
2296 const xmlChar *cur = ent->name;
2297
2298 /*
2299 * This may look absurd but is needed to detect
2300 * entities problems
2301 */
2302 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2303 (ent->content != NULL)) {
2304 xmlChar *rep;
2305 rep = xmlStringDecodeEntities(ctxt, ent->content,
2306 XML_SUBSTITUTE_REF, 0, 0, 0);
2307 if (rep != NULL)
2308 xmlFree(rep);
2309 }
2310
2311 /*
2312 * Just output the reference
2313 */
2314 buf[len++] = '&';
2315 if (len > buf_size - i - 10) {
2316 growBuffer(buf);
2317 }
2318 for (;i > 0;i--)
2319 buf[len++] = *cur++;
2320 buf[len++] = ';';
2321 }
2322 }
2323 } else {
2324 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2325 COPY_BUF(l,buf,len,0x20);
2326 if (len > buf_size - 10) {
2327 growBuffer(buf);
2328 }
2329 } else {
2330 COPY_BUF(l,buf,len,c);
2331 if (len > buf_size - 10) {
2332 growBuffer(buf);
2333 }
2334 }
2335 NEXTL(l);
2336 }
2337 GROW;
2338 c = CUR_CHAR(l);
2339 }
2340 buf[len++] = 0;
2341 if (RAW == '<') {
2342 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2344 ctxt->sax->error(ctxt->userData,
2345 "Unescaped '<' not allowed in attributes values\n");
2346 ctxt->wellFormed = 0;
2347 ctxt->disableSAX = 1;
2348 } else if (RAW != limit) {
2349 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2351 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2352 ctxt->wellFormed = 0;
2353 ctxt->disableSAX = 1;
2354 } else
2355 NEXT;
2356 return(buf);
2357}
2358
2359/**
2360 * xmlParseSystemLiteral:
2361 * @ctxt: an XML parser context
2362 *
2363 * parse an XML Literal
2364 *
2365 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2366 *
2367 * Returns the SystemLiteral parsed or NULL
2368 */
2369
2370xmlChar *
2371xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2372 xmlChar *buf = NULL;
2373 int len = 0;
2374 int size = XML_PARSER_BUFFER_SIZE;
2375 int cur, l;
2376 xmlChar stop;
2377 int state = ctxt->instate;
2378 int count = 0;
2379
2380 SHRINK;
2381 if (RAW == '"') {
2382 NEXT;
2383 stop = '"';
2384 } else if (RAW == '\'') {
2385 NEXT;
2386 stop = '\'';
2387 } else {
2388 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2389 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2390 ctxt->sax->error(ctxt->userData,
2391 "SystemLiteral \" or ' expected\n");
2392 ctxt->wellFormed = 0;
2393 ctxt->disableSAX = 1;
2394 return(NULL);
2395 }
2396
2397 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2398 if (buf == NULL) {
2399 xmlGenericError(xmlGenericErrorContext,
2400 "malloc of %d byte failed\n", size);
2401 return(NULL);
2402 }
2403 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2404 cur = CUR_CHAR(l);
2405 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2406 if (len + 5 >= size) {
2407 size *= 2;
2408 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2409 if (buf == NULL) {
2410 xmlGenericError(xmlGenericErrorContext,
2411 "realloc of %d byte failed\n", size);
2412 ctxt->instate = (xmlParserInputState) state;
2413 return(NULL);
2414 }
2415 }
2416 count++;
2417 if (count > 50) {
2418 GROW;
2419 count = 0;
2420 }
2421 COPY_BUF(l,buf,len,cur);
2422 NEXTL(l);
2423 cur = CUR_CHAR(l);
2424 if (cur == 0) {
2425 GROW;
2426 SHRINK;
2427 cur = CUR_CHAR(l);
2428 }
2429 }
2430 buf[len] = 0;
2431 ctxt->instate = (xmlParserInputState) state;
2432 if (!IS_CHAR(cur)) {
2433 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2435 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2436 ctxt->wellFormed = 0;
2437 ctxt->disableSAX = 1;
2438 } else {
2439 NEXT;
2440 }
2441 return(buf);
2442}
2443
2444/**
2445 * xmlParsePubidLiteral:
2446 * @ctxt: an XML parser context
2447 *
2448 * parse an XML public literal
2449 *
2450 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2451 *
2452 * Returns the PubidLiteral parsed or NULL.
2453 */
2454
2455xmlChar *
2456xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2457 xmlChar *buf = NULL;
2458 int len = 0;
2459 int size = XML_PARSER_BUFFER_SIZE;
2460 xmlChar cur;
2461 xmlChar stop;
2462 int count = 0;
2463
2464 SHRINK;
2465 if (RAW == '"') {
2466 NEXT;
2467 stop = '"';
2468 } else if (RAW == '\'') {
2469 NEXT;
2470 stop = '\'';
2471 } else {
2472 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2473 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2474 ctxt->sax->error(ctxt->userData,
2475 "SystemLiteral \" or ' expected\n");
2476 ctxt->wellFormed = 0;
2477 ctxt->disableSAX = 1;
2478 return(NULL);
2479 }
2480 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2481 if (buf == NULL) {
2482 xmlGenericError(xmlGenericErrorContext,
2483 "malloc of %d byte failed\n", size);
2484 return(NULL);
2485 }
2486 cur = CUR;
2487 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2488 if (len + 1 >= size) {
2489 size *= 2;
2490 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2491 if (buf == NULL) {
2492 xmlGenericError(xmlGenericErrorContext,
2493 "realloc of %d byte failed\n", size);
2494 return(NULL);
2495 }
2496 }
2497 buf[len++] = cur;
2498 count++;
2499 if (count > 50) {
2500 GROW;
2501 count = 0;
2502 }
2503 NEXT;
2504 cur = CUR;
2505 if (cur == 0) {
2506 GROW;
2507 SHRINK;
2508 cur = CUR;
2509 }
2510 }
2511 buf[len] = 0;
2512 if (cur != stop) {
2513 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2515 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2516 ctxt->wellFormed = 0;
2517 ctxt->disableSAX = 1;
2518 } else {
2519 NEXT;
2520 }
2521 return(buf);
2522}
2523
Daniel Veillard48b2f892001-02-25 16:11:03 +00002524void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002525/**
2526 * xmlParseCharData:
2527 * @ctxt: an XML parser context
2528 * @cdata: int indicating whether we are within a CDATA section
2529 *
2530 * parse a CharData section.
2531 * if we are within a CDATA section ']]>' marks an end of section.
2532 *
2533 * The right angle bracket (>) may be represented using the string "&gt;",
2534 * and must, for compatibility, be escaped using "&gt;" or a character
2535 * reference when it appears in the string "]]>" in content, when that
2536 * string is not marking the end of a CDATA section.
2537 *
2538 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2539 */
2540
2541void
2542xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002543 const xmlChar *in;
2544 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002545 int line = ctxt->input->line;
2546 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002547
2548 SHRINK;
2549 GROW;
2550 /*
2551 * Accelerated common case where input don't need to be
2552 * modified before passing it to the handler.
2553 */
2554 if ((ctxt->token == 0) && (!cdata)) {
2555 in = ctxt->input->cur;
2556 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002557get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002558 while (((*in >= 0x20) && (*in != '<') &&
2559 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2560 in++;
2561 if (*in == 0xA) {
2562 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002563 in++;
2564 while (*in == 0xA) {
2565 ctxt->input->line++;
2566 in++;
2567 }
2568 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002569 }
2570 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002571 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002572 if (IS_BLANK(*ctxt->input->cur)) {
2573 const xmlChar *tmp = ctxt->input->cur;
2574 ctxt->input->cur = in;
2575 if (areBlanks(ctxt, tmp, nbchar)) {
2576 if (ctxt->sax->ignorableWhitespace != NULL)
2577 ctxt->sax->ignorableWhitespace(ctxt->userData,
2578 tmp, nbchar);
2579 } else {
2580 if (ctxt->sax->characters != NULL)
2581 ctxt->sax->characters(ctxt->userData,
2582 tmp, nbchar);
2583 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002584 line = ctxt->input->line;
2585 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002586 } else {
2587 if (ctxt->sax->characters != NULL)
2588 ctxt->sax->characters(ctxt->userData,
2589 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002590 line = ctxt->input->line;
2591 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002592 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002593 }
2594 ctxt->input->cur = in;
2595 if (*in == 0xD) {
2596 in++;
2597 if (*in == 0xA) {
2598 ctxt->input->cur = in;
2599 in++;
2600 ctxt->input->line++;
2601 continue; /* while */
2602 }
2603 in--;
2604 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002605 if (*in == '<') {
2606 return;
2607 }
2608 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002609 return;
2610 }
2611 SHRINK;
2612 GROW;
2613 in = ctxt->input->cur;
2614 } while ((*in >= 0x20) && (*in <= 0x7F));
2615 nbchar = 0;
2616 }
Daniel Veillard50582112001-03-26 22:52:16 +00002617 ctxt->input->line = line;
2618 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002619 xmlParseCharDataComplex(ctxt, cdata);
2620}
2621
2622void
2623xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002624 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2625 int nbchar = 0;
2626 int cur, l;
2627 int count = 0;
2628
2629 SHRINK;
2630 GROW;
2631 cur = CUR_CHAR(l);
2632 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2633 ((cur != '&') || (ctxt->token == '&')) &&
2634 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2635 if ((cur == ']') && (NXT(1) == ']') &&
2636 (NXT(2) == '>')) {
2637 if (cdata) break;
2638 else {
2639 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2641 ctxt->sax->error(ctxt->userData,
2642 "Sequence ']]>' not allowed in content\n");
2643 /* Should this be relaxed ??? I see a "must here */
2644 ctxt->wellFormed = 0;
2645 ctxt->disableSAX = 1;
2646 }
2647 }
2648 COPY_BUF(l,buf,nbchar,cur);
2649 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2650 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002651 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002652 */
2653 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2654 if (areBlanks(ctxt, buf, nbchar)) {
2655 if (ctxt->sax->ignorableWhitespace != NULL)
2656 ctxt->sax->ignorableWhitespace(ctxt->userData,
2657 buf, nbchar);
2658 } else {
2659 if (ctxt->sax->characters != NULL)
2660 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2661 }
2662 }
2663 nbchar = 0;
2664 }
2665 count++;
2666 if (count > 50) {
2667 GROW;
2668 count = 0;
2669 }
2670 NEXTL(l);
2671 cur = CUR_CHAR(l);
2672 }
2673 if (nbchar != 0) {
2674 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002675 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002676 */
2677 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2678 if (areBlanks(ctxt, buf, nbchar)) {
2679 if (ctxt->sax->ignorableWhitespace != NULL)
2680 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2681 } else {
2682 if (ctxt->sax->characters != NULL)
2683 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2684 }
2685 }
2686 }
2687}
2688
2689/**
2690 * xmlParseExternalID:
2691 * @ctxt: an XML parser context
2692 * @publicID: a xmlChar** receiving PubidLiteral
2693 * @strict: indicate whether we should restrict parsing to only
2694 * production [75], see NOTE below
2695 *
2696 * Parse an External ID or a Public ID
2697 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002698 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002699 * 'PUBLIC' S PubidLiteral S SystemLiteral
2700 *
2701 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2702 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2703 *
2704 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2705 *
2706 * Returns the function returns SystemLiteral and in the second
2707 * case publicID receives PubidLiteral, is strict is off
2708 * it is possible to return NULL and have publicID set.
2709 */
2710
2711xmlChar *
2712xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2713 xmlChar *URI = NULL;
2714
2715 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002716
2717 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002718 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2719 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2720 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2721 SKIP(6);
2722 if (!IS_BLANK(CUR)) {
2723 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2724 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2725 ctxt->sax->error(ctxt->userData,
2726 "Space required after 'SYSTEM'\n");
2727 ctxt->wellFormed = 0;
2728 ctxt->disableSAX = 1;
2729 }
2730 SKIP_BLANKS;
2731 URI = xmlParseSystemLiteral(ctxt);
2732 if (URI == NULL) {
2733 ctxt->errNo = XML_ERR_URI_REQUIRED;
2734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2735 ctxt->sax->error(ctxt->userData,
2736 "xmlParseExternalID: SYSTEM, no URI\n");
2737 ctxt->wellFormed = 0;
2738 ctxt->disableSAX = 1;
2739 }
2740 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2741 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2742 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2743 SKIP(6);
2744 if (!IS_BLANK(CUR)) {
2745 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2747 ctxt->sax->error(ctxt->userData,
2748 "Space required after 'PUBLIC'\n");
2749 ctxt->wellFormed = 0;
2750 ctxt->disableSAX = 1;
2751 }
2752 SKIP_BLANKS;
2753 *publicID = xmlParsePubidLiteral(ctxt);
2754 if (*publicID == NULL) {
2755 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2757 ctxt->sax->error(ctxt->userData,
2758 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2759 ctxt->wellFormed = 0;
2760 ctxt->disableSAX = 1;
2761 }
2762 if (strict) {
2763 /*
2764 * We don't handle [83] so "S SystemLiteral" is required.
2765 */
2766 if (!IS_BLANK(CUR)) {
2767 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2769 ctxt->sax->error(ctxt->userData,
2770 "Space required after the Public Identifier\n");
2771 ctxt->wellFormed = 0;
2772 ctxt->disableSAX = 1;
2773 }
2774 } else {
2775 /*
2776 * We handle [83] so we return immediately, if
2777 * "S SystemLiteral" is not detected. From a purely parsing
2778 * point of view that's a nice mess.
2779 */
2780 const xmlChar *ptr;
2781 GROW;
2782
2783 ptr = CUR_PTR;
2784 if (!IS_BLANK(*ptr)) return(NULL);
2785
2786 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2787 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2788 }
2789 SKIP_BLANKS;
2790 URI = xmlParseSystemLiteral(ctxt);
2791 if (URI == NULL) {
2792 ctxt->errNo = XML_ERR_URI_REQUIRED;
2793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2794 ctxt->sax->error(ctxt->userData,
2795 "xmlParseExternalID: PUBLIC, no URI\n");
2796 ctxt->wellFormed = 0;
2797 ctxt->disableSAX = 1;
2798 }
2799 }
2800 return(URI);
2801}
2802
2803/**
2804 * xmlParseComment:
2805 * @ctxt: an XML parser context
2806 *
2807 * Skip an XML (SGML) comment <!-- .... -->
2808 * The spec says that "For compatibility, the string "--" (double-hyphen)
2809 * must not occur within comments. "
2810 *
2811 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2812 */
2813void
2814xmlParseComment(xmlParserCtxtPtr ctxt) {
2815 xmlChar *buf = NULL;
2816 int len;
2817 int size = XML_PARSER_BUFFER_SIZE;
2818 int q, ql;
2819 int r, rl;
2820 int cur, l;
2821 xmlParserInputState state;
2822 xmlParserInputPtr input = ctxt->input;
2823 int count = 0;
2824
2825 /*
2826 * Check that there is a comment right here.
2827 */
2828 if ((RAW != '<') || (NXT(1) != '!') ||
2829 (NXT(2) != '-') || (NXT(3) != '-')) return;
2830
2831 state = ctxt->instate;
2832 ctxt->instate = XML_PARSER_COMMENT;
2833 SHRINK;
2834 SKIP(4);
2835 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2836 if (buf == NULL) {
2837 xmlGenericError(xmlGenericErrorContext,
2838 "malloc of %d byte failed\n", size);
2839 ctxt->instate = state;
2840 return;
2841 }
2842 q = CUR_CHAR(ql);
2843 NEXTL(ql);
2844 r = CUR_CHAR(rl);
2845 NEXTL(rl);
2846 cur = CUR_CHAR(l);
2847 len = 0;
2848 while (IS_CHAR(cur) && /* checked */
2849 ((cur != '>') ||
2850 (r != '-') || (q != '-'))) {
2851 if ((r == '-') && (q == '-') && (len > 1)) {
2852 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2854 ctxt->sax->error(ctxt->userData,
2855 "Comment must not contain '--' (double-hyphen)`\n");
2856 ctxt->wellFormed = 0;
2857 ctxt->disableSAX = 1;
2858 }
2859 if (len + 5 >= size) {
2860 size *= 2;
2861 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2862 if (buf == NULL) {
2863 xmlGenericError(xmlGenericErrorContext,
2864 "realloc of %d byte failed\n", size);
2865 ctxt->instate = state;
2866 return;
2867 }
2868 }
2869 COPY_BUF(ql,buf,len,q);
2870 q = r;
2871 ql = rl;
2872 r = cur;
2873 rl = l;
2874
2875 count++;
2876 if (count > 50) {
2877 GROW;
2878 count = 0;
2879 }
2880 NEXTL(l);
2881 cur = CUR_CHAR(l);
2882 if (cur == 0) {
2883 SHRINK;
2884 GROW;
2885 cur = CUR_CHAR(l);
2886 }
2887 }
2888 buf[len] = 0;
2889 if (!IS_CHAR(cur)) {
2890 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2892 ctxt->sax->error(ctxt->userData,
2893 "Comment not terminated \n<!--%.50s\n", buf);
2894 ctxt->wellFormed = 0;
2895 ctxt->disableSAX = 1;
2896 xmlFree(buf);
2897 } else {
2898 if (input != ctxt->input) {
2899 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2901 ctxt->sax->error(ctxt->userData,
2902"Comment doesn't start and stop in the same entity\n");
2903 ctxt->wellFormed = 0;
2904 ctxt->disableSAX = 1;
2905 }
2906 NEXT;
2907 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2908 (!ctxt->disableSAX))
2909 ctxt->sax->comment(ctxt->userData, buf);
2910 xmlFree(buf);
2911 }
2912 ctxt->instate = state;
2913}
2914
2915/**
2916 * xmlParsePITarget:
2917 * @ctxt: an XML parser context
2918 *
2919 * parse the name of a PI
2920 *
2921 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2922 *
2923 * Returns the PITarget name or NULL
2924 */
2925
2926xmlChar *
2927xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2928 xmlChar *name;
2929
2930 name = xmlParseName(ctxt);
2931 if ((name != NULL) &&
2932 ((name[0] == 'x') || (name[0] == 'X')) &&
2933 ((name[1] == 'm') || (name[1] == 'M')) &&
2934 ((name[2] == 'l') || (name[2] == 'L'))) {
2935 int i;
2936 if ((name[0] == 'x') && (name[1] == 'm') &&
2937 (name[2] == 'l') && (name[3] == 0)) {
2938 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2940 ctxt->sax->error(ctxt->userData,
2941 "XML declaration allowed only at the start of the document\n");
2942 ctxt->wellFormed = 0;
2943 ctxt->disableSAX = 1;
2944 return(name);
2945 } else if (name[3] == 0) {
2946 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2948 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2949 ctxt->wellFormed = 0;
2950 ctxt->disableSAX = 1;
2951 return(name);
2952 }
2953 for (i = 0;;i++) {
2954 if (xmlW3CPIs[i] == NULL) break;
2955 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2956 return(name);
2957 }
2958 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2959 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2960 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002961 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002962 }
2963 }
2964 return(name);
2965}
2966
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002967#ifdef LIBXML_CATALOG_ENABLED
2968/**
2969 * xmlParseCatalogPI:
2970 * @ctxt: an XML parser context
2971 * @catalog: the PI value string
2972 *
2973 * parse an XML Catalog Processing Instruction.
2974 *
2975 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
2976 *
2977 * Occurs only if allowed by the user and if happening in the Misc
2978 * part of the document before any doctype informations
2979 * This will add the given catalog to the parsing context in order
2980 * to be used if there is a resolution need further down in the document
2981 */
2982
2983static void
2984xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
2985 xmlChar *URL = NULL;
2986 const xmlChar *tmp, *base;
2987 xmlChar marker;
2988
2989 tmp = catalog;
2990 while (IS_BLANK(*tmp)) tmp++;
2991 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
2992 goto error;
2993 tmp += 7;
2994 while (IS_BLANK(*tmp)) tmp++;
2995 if (*tmp != '=') {
2996 return;
2997 }
2998 tmp++;
2999 while (IS_BLANK(*tmp)) tmp++;
3000 marker = *tmp;
3001 if ((marker != '\'') && (marker != '"'))
3002 goto error;
3003 tmp++;
3004 base = tmp;
3005 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3006 if (*tmp == 0)
3007 goto error;
3008 URL = xmlStrndup(base, tmp - base);
3009 tmp++;
3010 while (IS_BLANK(*tmp)) tmp++;
3011 if (*tmp != 0)
3012 goto error;
3013
3014 if (URL != NULL) {
3015 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3016 xmlFree(URL);
3017 }
3018 return;
3019
3020error:
3021 ctxt->errNo = XML_WAR_CATALOG_PI;
3022 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3023 ctxt->sax->warning(ctxt->userData,
3024 "Catalog PI syntax error: %s\n", catalog);
3025 if (URL != NULL)
3026 xmlFree(URL);
3027}
3028#endif
3029
Owen Taylor3473f882001-02-23 17:55:21 +00003030/**
3031 * xmlParsePI:
3032 * @ctxt: an XML parser context
3033 *
3034 * parse an XML Processing Instruction.
3035 *
3036 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3037 *
3038 * The processing is transfered to SAX once parsed.
3039 */
3040
3041void
3042xmlParsePI(xmlParserCtxtPtr ctxt) {
3043 xmlChar *buf = NULL;
3044 int len = 0;
3045 int size = XML_PARSER_BUFFER_SIZE;
3046 int cur, l;
3047 xmlChar *target;
3048 xmlParserInputState state;
3049 int count = 0;
3050
3051 if ((RAW == '<') && (NXT(1) == '?')) {
3052 xmlParserInputPtr input = ctxt->input;
3053 state = ctxt->instate;
3054 ctxt->instate = XML_PARSER_PI;
3055 /*
3056 * this is a Processing Instruction.
3057 */
3058 SKIP(2);
3059 SHRINK;
3060
3061 /*
3062 * Parse the target name and check for special support like
3063 * namespace.
3064 */
3065 target = xmlParsePITarget(ctxt);
3066 if (target != NULL) {
3067 if ((RAW == '?') && (NXT(1) == '>')) {
3068 if (input != ctxt->input) {
3069 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3070 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3071 ctxt->sax->error(ctxt->userData,
3072 "PI declaration doesn't start and stop in the same entity\n");
3073 ctxt->wellFormed = 0;
3074 ctxt->disableSAX = 1;
3075 }
3076 SKIP(2);
3077
3078 /*
3079 * SAX: PI detected.
3080 */
3081 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3082 (ctxt->sax->processingInstruction != NULL))
3083 ctxt->sax->processingInstruction(ctxt->userData,
3084 target, NULL);
3085 ctxt->instate = state;
3086 xmlFree(target);
3087 return;
3088 }
3089 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3090 if (buf == NULL) {
3091 xmlGenericError(xmlGenericErrorContext,
3092 "malloc of %d byte failed\n", size);
3093 ctxt->instate = state;
3094 return;
3095 }
3096 cur = CUR;
3097 if (!IS_BLANK(cur)) {
3098 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3100 ctxt->sax->error(ctxt->userData,
3101 "xmlParsePI: PI %s space expected\n", target);
3102 ctxt->wellFormed = 0;
3103 ctxt->disableSAX = 1;
3104 }
3105 SKIP_BLANKS;
3106 cur = CUR_CHAR(l);
3107 while (IS_CHAR(cur) && /* checked */
3108 ((cur != '?') || (NXT(1) != '>'))) {
3109 if (len + 5 >= size) {
3110 size *= 2;
3111 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3112 if (buf == NULL) {
3113 xmlGenericError(xmlGenericErrorContext,
3114 "realloc of %d byte failed\n", size);
3115 ctxt->instate = state;
3116 return;
3117 }
3118 }
3119 count++;
3120 if (count > 50) {
3121 GROW;
3122 count = 0;
3123 }
3124 COPY_BUF(l,buf,len,cur);
3125 NEXTL(l);
3126 cur = CUR_CHAR(l);
3127 if (cur == 0) {
3128 SHRINK;
3129 GROW;
3130 cur = CUR_CHAR(l);
3131 }
3132 }
3133 buf[len] = 0;
3134 if (cur != '?') {
3135 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3136 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3137 ctxt->sax->error(ctxt->userData,
3138 "xmlParsePI: PI %s never end ...\n", target);
3139 ctxt->wellFormed = 0;
3140 ctxt->disableSAX = 1;
3141 } else {
3142 if (input != ctxt->input) {
3143 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3145 ctxt->sax->error(ctxt->userData,
3146 "PI declaration doesn't start and stop in the same entity\n");
3147 ctxt->wellFormed = 0;
3148 ctxt->disableSAX = 1;
3149 }
3150 SKIP(2);
3151
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003152#ifdef LIBXML_CATALOG_ENABLED
3153 if (((state == XML_PARSER_MISC) ||
3154 (state == XML_PARSER_START)) &&
3155 (xmlStrEqual(target, XML_CATALOG_PI))) {
3156 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3157 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3158 (allow == XML_CATA_ALLOW_ALL))
3159 xmlParseCatalogPI(ctxt, buf);
3160 }
3161#endif
3162
3163
Owen Taylor3473f882001-02-23 17:55:21 +00003164 /*
3165 * SAX: PI detected.
3166 */
3167 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3168 (ctxt->sax->processingInstruction != NULL))
3169 ctxt->sax->processingInstruction(ctxt->userData,
3170 target, buf);
3171 }
3172 xmlFree(buf);
3173 xmlFree(target);
3174 } else {
3175 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3177 ctxt->sax->error(ctxt->userData,
3178 "xmlParsePI : no target name\n");
3179 ctxt->wellFormed = 0;
3180 ctxt->disableSAX = 1;
3181 }
3182 ctxt->instate = state;
3183 }
3184}
3185
3186/**
3187 * xmlParseNotationDecl:
3188 * @ctxt: an XML parser context
3189 *
3190 * parse a notation declaration
3191 *
3192 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3193 *
3194 * Hence there is actually 3 choices:
3195 * 'PUBLIC' S PubidLiteral
3196 * 'PUBLIC' S PubidLiteral S SystemLiteral
3197 * and 'SYSTEM' S SystemLiteral
3198 *
3199 * See the NOTE on xmlParseExternalID().
3200 */
3201
3202void
3203xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3204 xmlChar *name;
3205 xmlChar *Pubid;
3206 xmlChar *Systemid;
3207
3208 if ((RAW == '<') && (NXT(1) == '!') &&
3209 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3210 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3211 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3212 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3213 xmlParserInputPtr input = ctxt->input;
3214 SHRINK;
3215 SKIP(10);
3216 if (!IS_BLANK(CUR)) {
3217 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3219 ctxt->sax->error(ctxt->userData,
3220 "Space required after '<!NOTATION'\n");
3221 ctxt->wellFormed = 0;
3222 ctxt->disableSAX = 1;
3223 return;
3224 }
3225 SKIP_BLANKS;
3226
Daniel Veillard76d66f42001-05-16 21:05:17 +00003227 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003228 if (name == NULL) {
3229 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3231 ctxt->sax->error(ctxt->userData,
3232 "NOTATION: Name expected here\n");
3233 ctxt->wellFormed = 0;
3234 ctxt->disableSAX = 1;
3235 return;
3236 }
3237 if (!IS_BLANK(CUR)) {
3238 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3240 ctxt->sax->error(ctxt->userData,
3241 "Space required after the NOTATION name'\n");
3242 ctxt->wellFormed = 0;
3243 ctxt->disableSAX = 1;
3244 return;
3245 }
3246 SKIP_BLANKS;
3247
3248 /*
3249 * Parse the IDs.
3250 */
3251 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3252 SKIP_BLANKS;
3253
3254 if (RAW == '>') {
3255 if (input != ctxt->input) {
3256 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3258 ctxt->sax->error(ctxt->userData,
3259"Notation declaration doesn't start and stop in the same entity\n");
3260 ctxt->wellFormed = 0;
3261 ctxt->disableSAX = 1;
3262 }
3263 NEXT;
3264 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3265 (ctxt->sax->notationDecl != NULL))
3266 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3267 } else {
3268 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3270 ctxt->sax->error(ctxt->userData,
3271 "'>' required to close NOTATION declaration\n");
3272 ctxt->wellFormed = 0;
3273 ctxt->disableSAX = 1;
3274 }
3275 xmlFree(name);
3276 if (Systemid != NULL) xmlFree(Systemid);
3277 if (Pubid != NULL) xmlFree(Pubid);
3278 }
3279}
3280
3281/**
3282 * xmlParseEntityDecl:
3283 * @ctxt: an XML parser context
3284 *
3285 * parse <!ENTITY declarations
3286 *
3287 * [70] EntityDecl ::= GEDecl | PEDecl
3288 *
3289 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3290 *
3291 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3292 *
3293 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3294 *
3295 * [74] PEDef ::= EntityValue | ExternalID
3296 *
3297 * [76] NDataDecl ::= S 'NDATA' S Name
3298 *
3299 * [ VC: Notation Declared ]
3300 * The Name must match the declared name of a notation.
3301 */
3302
3303void
3304xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3305 xmlChar *name = NULL;
3306 xmlChar *value = NULL;
3307 xmlChar *URI = NULL, *literal = NULL;
3308 xmlChar *ndata = NULL;
3309 int isParameter = 0;
3310 xmlChar *orig = NULL;
3311
3312 GROW;
3313 if ((RAW == '<') && (NXT(1) == '!') &&
3314 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3315 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3316 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3317 xmlParserInputPtr input = ctxt->input;
3318 ctxt->instate = XML_PARSER_ENTITY_DECL;
3319 SHRINK;
3320 SKIP(8);
3321 if (!IS_BLANK(CUR)) {
3322 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3324 ctxt->sax->error(ctxt->userData,
3325 "Space required after '<!ENTITY'\n");
3326 ctxt->wellFormed = 0;
3327 ctxt->disableSAX = 1;
3328 }
3329 SKIP_BLANKS;
3330
3331 if (RAW == '%') {
3332 NEXT;
3333 if (!IS_BLANK(CUR)) {
3334 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3335 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3336 ctxt->sax->error(ctxt->userData,
3337 "Space required after '%'\n");
3338 ctxt->wellFormed = 0;
3339 ctxt->disableSAX = 1;
3340 }
3341 SKIP_BLANKS;
3342 isParameter = 1;
3343 }
3344
Daniel Veillard76d66f42001-05-16 21:05:17 +00003345 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003346 if (name == NULL) {
3347 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3349 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3350 ctxt->wellFormed = 0;
3351 ctxt->disableSAX = 1;
3352 return;
3353 }
3354 if (!IS_BLANK(CUR)) {
3355 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3357 ctxt->sax->error(ctxt->userData,
3358 "Space required after the entity name\n");
3359 ctxt->wellFormed = 0;
3360 ctxt->disableSAX = 1;
3361 }
3362 SKIP_BLANKS;
3363
3364 /*
3365 * handle the various case of definitions...
3366 */
3367 if (isParameter) {
3368 if ((RAW == '"') || (RAW == '\'')) {
3369 value = xmlParseEntityValue(ctxt, &orig);
3370 if (value) {
3371 if ((ctxt->sax != NULL) &&
3372 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3373 ctxt->sax->entityDecl(ctxt->userData, name,
3374 XML_INTERNAL_PARAMETER_ENTITY,
3375 NULL, NULL, value);
3376 }
3377 } else {
3378 URI = xmlParseExternalID(ctxt, &literal, 1);
3379 if ((URI == NULL) && (literal == NULL)) {
3380 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3382 ctxt->sax->error(ctxt->userData,
3383 "Entity value required\n");
3384 ctxt->wellFormed = 0;
3385 ctxt->disableSAX = 1;
3386 }
3387 if (URI) {
3388 xmlURIPtr uri;
3389
3390 uri = xmlParseURI((const char *) URI);
3391 if (uri == NULL) {
3392 ctxt->errNo = XML_ERR_INVALID_URI;
3393 if ((ctxt->sax != NULL) &&
3394 (!ctxt->disableSAX) &&
3395 (ctxt->sax->error != NULL))
3396 ctxt->sax->error(ctxt->userData,
3397 "Invalid URI: %s\n", URI);
3398 ctxt->wellFormed = 0;
3399 } else {
3400 if (uri->fragment != NULL) {
3401 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3402 if ((ctxt->sax != NULL) &&
3403 (!ctxt->disableSAX) &&
3404 (ctxt->sax->error != NULL))
3405 ctxt->sax->error(ctxt->userData,
3406 "Fragment not allowed: %s\n", URI);
3407 ctxt->wellFormed = 0;
3408 } else {
3409 if ((ctxt->sax != NULL) &&
3410 (!ctxt->disableSAX) &&
3411 (ctxt->sax->entityDecl != NULL))
3412 ctxt->sax->entityDecl(ctxt->userData, name,
3413 XML_EXTERNAL_PARAMETER_ENTITY,
3414 literal, URI, NULL);
3415 }
3416 xmlFreeURI(uri);
3417 }
3418 }
3419 }
3420 } else {
3421 if ((RAW == '"') || (RAW == '\'')) {
3422 value = xmlParseEntityValue(ctxt, &orig);
3423 if ((ctxt->sax != NULL) &&
3424 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3425 ctxt->sax->entityDecl(ctxt->userData, name,
3426 XML_INTERNAL_GENERAL_ENTITY,
3427 NULL, NULL, value);
3428 } else {
3429 URI = xmlParseExternalID(ctxt, &literal, 1);
3430 if ((URI == NULL) && (literal == NULL)) {
3431 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3432 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3433 ctxt->sax->error(ctxt->userData,
3434 "Entity value required\n");
3435 ctxt->wellFormed = 0;
3436 ctxt->disableSAX = 1;
3437 }
3438 if (URI) {
3439 xmlURIPtr uri;
3440
3441 uri = xmlParseURI((const char *)URI);
3442 if (uri == NULL) {
3443 ctxt->errNo = XML_ERR_INVALID_URI;
3444 if ((ctxt->sax != NULL) &&
3445 (!ctxt->disableSAX) &&
3446 (ctxt->sax->error != NULL))
3447 ctxt->sax->error(ctxt->userData,
3448 "Invalid URI: %s\n", URI);
3449 ctxt->wellFormed = 0;
3450 } else {
3451 if (uri->fragment != NULL) {
3452 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3453 if ((ctxt->sax != NULL) &&
3454 (!ctxt->disableSAX) &&
3455 (ctxt->sax->error != NULL))
3456 ctxt->sax->error(ctxt->userData,
3457 "Fragment not allowed: %s\n", URI);
3458 ctxt->wellFormed = 0;
3459 }
3460 xmlFreeURI(uri);
3461 }
3462 }
3463 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3464 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3466 ctxt->sax->error(ctxt->userData,
3467 "Space required before 'NDATA'\n");
3468 ctxt->wellFormed = 0;
3469 ctxt->disableSAX = 1;
3470 }
3471 SKIP_BLANKS;
3472 if ((RAW == 'N') && (NXT(1) == 'D') &&
3473 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3474 (NXT(4) == 'A')) {
3475 SKIP(5);
3476 if (!IS_BLANK(CUR)) {
3477 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3479 ctxt->sax->error(ctxt->userData,
3480 "Space required after 'NDATA'\n");
3481 ctxt->wellFormed = 0;
3482 ctxt->disableSAX = 1;
3483 }
3484 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003485 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003486 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3487 (ctxt->sax->unparsedEntityDecl != NULL))
3488 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3489 literal, URI, ndata);
3490 } else {
3491 if ((ctxt->sax != NULL) &&
3492 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3493 ctxt->sax->entityDecl(ctxt->userData, name,
3494 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3495 literal, URI, NULL);
3496 }
3497 }
3498 }
3499 SKIP_BLANKS;
3500 if (RAW != '>') {
3501 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3503 ctxt->sax->error(ctxt->userData,
3504 "xmlParseEntityDecl: entity %s not terminated\n", name);
3505 ctxt->wellFormed = 0;
3506 ctxt->disableSAX = 1;
3507 } else {
3508 if (input != ctxt->input) {
3509 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3511 ctxt->sax->error(ctxt->userData,
3512"Entity declaration doesn't start and stop in the same entity\n");
3513 ctxt->wellFormed = 0;
3514 ctxt->disableSAX = 1;
3515 }
3516 NEXT;
3517 }
3518 if (orig != NULL) {
3519 /*
3520 * Ugly mechanism to save the raw entity value.
3521 */
3522 xmlEntityPtr cur = NULL;
3523
3524 if (isParameter) {
3525 if ((ctxt->sax != NULL) &&
3526 (ctxt->sax->getParameterEntity != NULL))
3527 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3528 } else {
3529 if ((ctxt->sax != NULL) &&
3530 (ctxt->sax->getEntity != NULL))
3531 cur = ctxt->sax->getEntity(ctxt->userData, name);
3532 }
3533 if (cur != NULL) {
3534 if (cur->orig != NULL)
3535 xmlFree(orig);
3536 else
3537 cur->orig = orig;
3538 } else
3539 xmlFree(orig);
3540 }
3541 if (name != NULL) xmlFree(name);
3542 if (value != NULL) xmlFree(value);
3543 if (URI != NULL) xmlFree(URI);
3544 if (literal != NULL) xmlFree(literal);
3545 if (ndata != NULL) xmlFree(ndata);
3546 }
3547}
3548
3549/**
3550 * xmlParseDefaultDecl:
3551 * @ctxt: an XML parser context
3552 * @value: Receive a possible fixed default value for the attribute
3553 *
3554 * Parse an attribute default declaration
3555 *
3556 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3557 *
3558 * [ VC: Required Attribute ]
3559 * if the default declaration is the keyword #REQUIRED, then the
3560 * attribute must be specified for all elements of the type in the
3561 * attribute-list declaration.
3562 *
3563 * [ VC: Attribute Default Legal ]
3564 * The declared default value must meet the lexical constraints of
3565 * the declared attribute type c.f. xmlValidateAttributeDecl()
3566 *
3567 * [ VC: Fixed Attribute Default ]
3568 * if an attribute has a default value declared with the #FIXED
3569 * keyword, instances of that attribute must match the default value.
3570 *
3571 * [ WFC: No < in Attribute Values ]
3572 * handled in xmlParseAttValue()
3573 *
3574 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3575 * or XML_ATTRIBUTE_FIXED.
3576 */
3577
3578int
3579xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3580 int val;
3581 xmlChar *ret;
3582
3583 *value = NULL;
3584 if ((RAW == '#') && (NXT(1) == 'R') &&
3585 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3586 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3587 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3588 (NXT(8) == 'D')) {
3589 SKIP(9);
3590 return(XML_ATTRIBUTE_REQUIRED);
3591 }
3592 if ((RAW == '#') && (NXT(1) == 'I') &&
3593 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3594 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3595 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3596 SKIP(8);
3597 return(XML_ATTRIBUTE_IMPLIED);
3598 }
3599 val = XML_ATTRIBUTE_NONE;
3600 if ((RAW == '#') && (NXT(1) == 'F') &&
3601 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3602 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3603 SKIP(6);
3604 val = XML_ATTRIBUTE_FIXED;
3605 if (!IS_BLANK(CUR)) {
3606 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3608 ctxt->sax->error(ctxt->userData,
3609 "Space required after '#FIXED'\n");
3610 ctxt->wellFormed = 0;
3611 ctxt->disableSAX = 1;
3612 }
3613 SKIP_BLANKS;
3614 }
3615 ret = xmlParseAttValue(ctxt);
3616 ctxt->instate = XML_PARSER_DTD;
3617 if (ret == NULL) {
3618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3619 ctxt->sax->error(ctxt->userData,
3620 "Attribute default value declaration error\n");
3621 ctxt->wellFormed = 0;
3622 ctxt->disableSAX = 1;
3623 } else
3624 *value = ret;
3625 return(val);
3626}
3627
3628/**
3629 * xmlParseNotationType:
3630 * @ctxt: an XML parser context
3631 *
3632 * parse an Notation attribute type.
3633 *
3634 * Note: the leading 'NOTATION' S part has already being parsed...
3635 *
3636 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3637 *
3638 * [ VC: Notation Attributes ]
3639 * Values of this type must match one of the notation names included
3640 * in the declaration; all notation names in the declaration must be declared.
3641 *
3642 * Returns: the notation attribute tree built while parsing
3643 */
3644
3645xmlEnumerationPtr
3646xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3647 xmlChar *name;
3648 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3649
3650 if (RAW != '(') {
3651 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3653 ctxt->sax->error(ctxt->userData,
3654 "'(' required to start 'NOTATION'\n");
3655 ctxt->wellFormed = 0;
3656 ctxt->disableSAX = 1;
3657 return(NULL);
3658 }
3659 SHRINK;
3660 do {
3661 NEXT;
3662 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003663 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003664 if (name == NULL) {
3665 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3667 ctxt->sax->error(ctxt->userData,
3668 "Name expected in NOTATION declaration\n");
3669 ctxt->wellFormed = 0;
3670 ctxt->disableSAX = 1;
3671 return(ret);
3672 }
3673 cur = xmlCreateEnumeration(name);
3674 xmlFree(name);
3675 if (cur == NULL) return(ret);
3676 if (last == NULL) ret = last = cur;
3677 else {
3678 last->next = cur;
3679 last = cur;
3680 }
3681 SKIP_BLANKS;
3682 } while (RAW == '|');
3683 if (RAW != ')') {
3684 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3686 ctxt->sax->error(ctxt->userData,
3687 "')' required to finish NOTATION declaration\n");
3688 ctxt->wellFormed = 0;
3689 ctxt->disableSAX = 1;
3690 if ((last != NULL) && (last != ret))
3691 xmlFreeEnumeration(last);
3692 return(ret);
3693 }
3694 NEXT;
3695 return(ret);
3696}
3697
3698/**
3699 * xmlParseEnumerationType:
3700 * @ctxt: an XML parser context
3701 *
3702 * parse an Enumeration attribute type.
3703 *
3704 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3705 *
3706 * [ VC: Enumeration ]
3707 * Values of this type must match one of the Nmtoken tokens in
3708 * the declaration
3709 *
3710 * Returns: the enumeration attribute tree built while parsing
3711 */
3712
3713xmlEnumerationPtr
3714xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3715 xmlChar *name;
3716 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3717
3718 if (RAW != '(') {
3719 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3721 ctxt->sax->error(ctxt->userData,
3722 "'(' required to start ATTLIST enumeration\n");
3723 ctxt->wellFormed = 0;
3724 ctxt->disableSAX = 1;
3725 return(NULL);
3726 }
3727 SHRINK;
3728 do {
3729 NEXT;
3730 SKIP_BLANKS;
3731 name = xmlParseNmtoken(ctxt);
3732 if (name == NULL) {
3733 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3735 ctxt->sax->error(ctxt->userData,
3736 "NmToken expected in ATTLIST enumeration\n");
3737 ctxt->wellFormed = 0;
3738 ctxt->disableSAX = 1;
3739 return(ret);
3740 }
3741 cur = xmlCreateEnumeration(name);
3742 xmlFree(name);
3743 if (cur == NULL) return(ret);
3744 if (last == NULL) ret = last = cur;
3745 else {
3746 last->next = cur;
3747 last = cur;
3748 }
3749 SKIP_BLANKS;
3750 } while (RAW == '|');
3751 if (RAW != ')') {
3752 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3753 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3754 ctxt->sax->error(ctxt->userData,
3755 "')' required to finish ATTLIST enumeration\n");
3756 ctxt->wellFormed = 0;
3757 ctxt->disableSAX = 1;
3758 return(ret);
3759 }
3760 NEXT;
3761 return(ret);
3762}
3763
3764/**
3765 * xmlParseEnumeratedType:
3766 * @ctxt: an XML parser context
3767 * @tree: the enumeration tree built while parsing
3768 *
3769 * parse an Enumerated attribute type.
3770 *
3771 * [57] EnumeratedType ::= NotationType | Enumeration
3772 *
3773 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3774 *
3775 *
3776 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3777 */
3778
3779int
3780xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3781 if ((RAW == 'N') && (NXT(1) == 'O') &&
3782 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3783 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3784 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3785 SKIP(8);
3786 if (!IS_BLANK(CUR)) {
3787 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3789 ctxt->sax->error(ctxt->userData,
3790 "Space required after 'NOTATION'\n");
3791 ctxt->wellFormed = 0;
3792 ctxt->disableSAX = 1;
3793 return(0);
3794 }
3795 SKIP_BLANKS;
3796 *tree = xmlParseNotationType(ctxt);
3797 if (*tree == NULL) return(0);
3798 return(XML_ATTRIBUTE_NOTATION);
3799 }
3800 *tree = xmlParseEnumerationType(ctxt);
3801 if (*tree == NULL) return(0);
3802 return(XML_ATTRIBUTE_ENUMERATION);
3803}
3804
3805/**
3806 * xmlParseAttributeType:
3807 * @ctxt: an XML parser context
3808 * @tree: the enumeration tree built while parsing
3809 *
3810 * parse the Attribute list def for an element
3811 *
3812 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3813 *
3814 * [55] StringType ::= 'CDATA'
3815 *
3816 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3817 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3818 *
3819 * Validity constraints for attribute values syntax are checked in
3820 * xmlValidateAttributeValue()
3821 *
3822 * [ VC: ID ]
3823 * Values of type ID must match the Name production. A name must not
3824 * appear more than once in an XML document as a value of this type;
3825 * i.e., ID values must uniquely identify the elements which bear them.
3826 *
3827 * [ VC: One ID per Element Type ]
3828 * No element type may have more than one ID attribute specified.
3829 *
3830 * [ VC: ID Attribute Default ]
3831 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3832 *
3833 * [ VC: IDREF ]
3834 * Values of type IDREF must match the Name production, and values
3835 * of type IDREFS must match Names; each IDREF Name must match the value
3836 * of an ID attribute on some element in the XML document; i.e. IDREF
3837 * values must match the value of some ID attribute.
3838 *
3839 * [ VC: Entity Name ]
3840 * Values of type ENTITY must match the Name production, values
3841 * of type ENTITIES must match Names; each Entity Name must match the
3842 * name of an unparsed entity declared in the DTD.
3843 *
3844 * [ VC: Name Token ]
3845 * Values of type NMTOKEN must match the Nmtoken production; values
3846 * of type NMTOKENS must match Nmtokens.
3847 *
3848 * Returns the attribute type
3849 */
3850int
3851xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3852 SHRINK;
3853 if ((RAW == 'C') && (NXT(1) == 'D') &&
3854 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3855 (NXT(4) == 'A')) {
3856 SKIP(5);
3857 return(XML_ATTRIBUTE_CDATA);
3858 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3859 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3860 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3861 SKIP(6);
3862 return(XML_ATTRIBUTE_IDREFS);
3863 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3864 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3865 (NXT(4) == 'F')) {
3866 SKIP(5);
3867 return(XML_ATTRIBUTE_IDREF);
3868 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3869 SKIP(2);
3870 return(XML_ATTRIBUTE_ID);
3871 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3872 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3873 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3874 SKIP(6);
3875 return(XML_ATTRIBUTE_ENTITY);
3876 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3877 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3878 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3879 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3880 SKIP(8);
3881 return(XML_ATTRIBUTE_ENTITIES);
3882 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3883 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3884 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3885 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3886 SKIP(8);
3887 return(XML_ATTRIBUTE_NMTOKENS);
3888 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3889 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3890 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3891 (NXT(6) == 'N')) {
3892 SKIP(7);
3893 return(XML_ATTRIBUTE_NMTOKEN);
3894 }
3895 return(xmlParseEnumeratedType(ctxt, tree));
3896}
3897
3898/**
3899 * xmlParseAttributeListDecl:
3900 * @ctxt: an XML parser context
3901 *
3902 * : parse the Attribute list def for an element
3903 *
3904 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3905 *
3906 * [53] AttDef ::= S Name S AttType S DefaultDecl
3907 *
3908 */
3909void
3910xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3911 xmlChar *elemName;
3912 xmlChar *attrName;
3913 xmlEnumerationPtr tree;
3914
3915 if ((RAW == '<') && (NXT(1) == '!') &&
3916 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3917 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3918 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3919 (NXT(8) == 'T')) {
3920 xmlParserInputPtr input = ctxt->input;
3921
3922 SKIP(9);
3923 if (!IS_BLANK(CUR)) {
3924 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3926 ctxt->sax->error(ctxt->userData,
3927 "Space required after '<!ATTLIST'\n");
3928 ctxt->wellFormed = 0;
3929 ctxt->disableSAX = 1;
3930 }
3931 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003932 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003933 if (elemName == NULL) {
3934 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3936 ctxt->sax->error(ctxt->userData,
3937 "ATTLIST: no name for Element\n");
3938 ctxt->wellFormed = 0;
3939 ctxt->disableSAX = 1;
3940 return;
3941 }
3942 SKIP_BLANKS;
3943 GROW;
3944 while (RAW != '>') {
3945 const xmlChar *check = CUR_PTR;
3946 int type;
3947 int def;
3948 xmlChar *defaultValue = NULL;
3949
3950 GROW;
3951 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003952 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003953 if (attrName == NULL) {
3954 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3955 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3956 ctxt->sax->error(ctxt->userData,
3957 "ATTLIST: no name for Attribute\n");
3958 ctxt->wellFormed = 0;
3959 ctxt->disableSAX = 1;
3960 break;
3961 }
3962 GROW;
3963 if (!IS_BLANK(CUR)) {
3964 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3966 ctxt->sax->error(ctxt->userData,
3967 "Space required after the attribute name\n");
3968 ctxt->wellFormed = 0;
3969 ctxt->disableSAX = 1;
3970 if (attrName != NULL)
3971 xmlFree(attrName);
3972 if (defaultValue != NULL)
3973 xmlFree(defaultValue);
3974 break;
3975 }
3976 SKIP_BLANKS;
3977
3978 type = xmlParseAttributeType(ctxt, &tree);
3979 if (type <= 0) {
3980 if (attrName != NULL)
3981 xmlFree(attrName);
3982 if (defaultValue != NULL)
3983 xmlFree(defaultValue);
3984 break;
3985 }
3986
3987 GROW;
3988 if (!IS_BLANK(CUR)) {
3989 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3991 ctxt->sax->error(ctxt->userData,
3992 "Space required after the attribute type\n");
3993 ctxt->wellFormed = 0;
3994 ctxt->disableSAX = 1;
3995 if (attrName != NULL)
3996 xmlFree(attrName);
3997 if (defaultValue != NULL)
3998 xmlFree(defaultValue);
3999 if (tree != NULL)
4000 xmlFreeEnumeration(tree);
4001 break;
4002 }
4003 SKIP_BLANKS;
4004
4005 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4006 if (def <= 0) {
4007 if (attrName != NULL)
4008 xmlFree(attrName);
4009 if (defaultValue != NULL)
4010 xmlFree(defaultValue);
4011 if (tree != NULL)
4012 xmlFreeEnumeration(tree);
4013 break;
4014 }
4015
4016 GROW;
4017 if (RAW != '>') {
4018 if (!IS_BLANK(CUR)) {
4019 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4021 ctxt->sax->error(ctxt->userData,
4022 "Space required after the attribute default value\n");
4023 ctxt->wellFormed = 0;
4024 ctxt->disableSAX = 1;
4025 if (attrName != NULL)
4026 xmlFree(attrName);
4027 if (defaultValue != NULL)
4028 xmlFree(defaultValue);
4029 if (tree != NULL)
4030 xmlFreeEnumeration(tree);
4031 break;
4032 }
4033 SKIP_BLANKS;
4034 }
4035 if (check == CUR_PTR) {
4036 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4038 ctxt->sax->error(ctxt->userData,
4039 "xmlParseAttributeListDecl: detected internal error\n");
4040 if (attrName != NULL)
4041 xmlFree(attrName);
4042 if (defaultValue != NULL)
4043 xmlFree(defaultValue);
4044 if (tree != NULL)
4045 xmlFreeEnumeration(tree);
4046 break;
4047 }
4048 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4049 (ctxt->sax->attributeDecl != NULL))
4050 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4051 type, def, defaultValue, tree);
4052 if (attrName != NULL)
4053 xmlFree(attrName);
4054 if (defaultValue != NULL)
4055 xmlFree(defaultValue);
4056 GROW;
4057 }
4058 if (RAW == '>') {
4059 if (input != ctxt->input) {
4060 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4062 ctxt->sax->error(ctxt->userData,
4063"Attribute list declaration doesn't start and stop in the same entity\n");
4064 ctxt->wellFormed = 0;
4065 ctxt->disableSAX = 1;
4066 }
4067 NEXT;
4068 }
4069
4070 xmlFree(elemName);
4071 }
4072}
4073
4074/**
4075 * xmlParseElementMixedContentDecl:
4076 * @ctxt: an XML parser context
4077 *
4078 * parse the declaration for a Mixed Element content
4079 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4080 *
4081 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4082 * '(' S? '#PCDATA' S? ')'
4083 *
4084 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4085 *
4086 * [ VC: No Duplicate Types ]
4087 * The same name must not appear more than once in a single
4088 * mixed-content declaration.
4089 *
4090 * returns: the list of the xmlElementContentPtr describing the element choices
4091 */
4092xmlElementContentPtr
4093xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
4094 xmlElementContentPtr ret = NULL, cur = NULL, n;
4095 xmlChar *elem = NULL;
4096
4097 GROW;
4098 if ((RAW == '#') && (NXT(1) == 'P') &&
4099 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4100 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4101 (NXT(6) == 'A')) {
4102 SKIP(7);
4103 SKIP_BLANKS;
4104 SHRINK;
4105 if (RAW == ')') {
4106 ctxt->entity = ctxt->input;
4107 NEXT;
4108 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4109 if (RAW == '*') {
4110 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4111 NEXT;
4112 }
4113 return(ret);
4114 }
4115 if ((RAW == '(') || (RAW == '|')) {
4116 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4117 if (ret == NULL) return(NULL);
4118 }
4119 while (RAW == '|') {
4120 NEXT;
4121 if (elem == NULL) {
4122 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4123 if (ret == NULL) return(NULL);
4124 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004125 if (cur != NULL)
4126 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004127 cur = ret;
4128 } else {
4129 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4130 if (n == NULL) return(NULL);
4131 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004132 if (n->c1 != NULL)
4133 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004134 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004135 if (n != NULL)
4136 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004137 cur = n;
4138 xmlFree(elem);
4139 }
4140 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004141 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004142 if (elem == NULL) {
4143 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4145 ctxt->sax->error(ctxt->userData,
4146 "xmlParseElementMixedContentDecl : Name expected\n");
4147 ctxt->wellFormed = 0;
4148 ctxt->disableSAX = 1;
4149 xmlFreeElementContent(cur);
4150 return(NULL);
4151 }
4152 SKIP_BLANKS;
4153 GROW;
4154 }
4155 if ((RAW == ')') && (NXT(1) == '*')) {
4156 if (elem != NULL) {
4157 cur->c2 = xmlNewElementContent(elem,
4158 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004159 if (cur->c2 != NULL)
4160 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004161 xmlFree(elem);
4162 }
4163 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4164 ctxt->entity = ctxt->input;
4165 SKIP(2);
4166 } else {
4167 if (elem != NULL) xmlFree(elem);
4168 xmlFreeElementContent(ret);
4169 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4171 ctxt->sax->error(ctxt->userData,
4172 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4173 ctxt->wellFormed = 0;
4174 ctxt->disableSAX = 1;
4175 return(NULL);
4176 }
4177
4178 } else {
4179 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4181 ctxt->sax->error(ctxt->userData,
4182 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4183 ctxt->wellFormed = 0;
4184 ctxt->disableSAX = 1;
4185 }
4186 return(ret);
4187}
4188
4189/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004190 * xmlParseElementChildrenContentD:
4191 * @ctxt: an XML parser context
4192 *
4193 * VMS version of xmlParseElementChildrenContentDecl()
4194 *
4195 * Returns the tree of xmlElementContentPtr describing the element
4196 * hierarchy.
4197 */
4198/**
Owen Taylor3473f882001-02-23 17:55:21 +00004199 * xmlParseElementChildrenContentDecl:
4200 * @ctxt: an XML parser context
4201 *
4202 * parse the declaration for a Mixed Element content
4203 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4204 *
4205 *
4206 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4207 *
4208 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4209 *
4210 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4211 *
4212 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4213 *
4214 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4215 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004216 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004217 * opening or closing parentheses in a choice, seq, or Mixed
4218 * construct is contained in the replacement text for a parameter
4219 * entity, both must be contained in the same replacement text. For
4220 * interoperability, if a parameter-entity reference appears in a
4221 * choice, seq, or Mixed construct, its replacement text should not
4222 * be empty, and neither the first nor last non-blank character of
4223 * the replacement text should be a connector (| or ,).
4224 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004225 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004226 * hierarchy.
4227 */
4228xmlElementContentPtr
4229#ifdef VMS
4230xmlParseElementChildrenContentD
4231#else
4232xmlParseElementChildrenContentDecl
4233#endif
4234(xmlParserCtxtPtr ctxt) {
4235 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4236 xmlChar *elem;
4237 xmlChar type = 0;
4238
4239 SKIP_BLANKS;
4240 GROW;
4241 if (RAW == '(') {
4242 /* Recurse on first child */
4243 NEXT;
4244 SKIP_BLANKS;
4245 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4246 SKIP_BLANKS;
4247 GROW;
4248 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004249 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004250 if (elem == NULL) {
4251 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4253 ctxt->sax->error(ctxt->userData,
4254 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4255 ctxt->wellFormed = 0;
4256 ctxt->disableSAX = 1;
4257 return(NULL);
4258 }
4259 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4260 GROW;
4261 if (RAW == '?') {
4262 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4263 NEXT;
4264 } else if (RAW == '*') {
4265 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4266 NEXT;
4267 } else if (RAW == '+') {
4268 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4269 NEXT;
4270 } else {
4271 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4272 }
4273 xmlFree(elem);
4274 GROW;
4275 }
4276 SKIP_BLANKS;
4277 SHRINK;
4278 while (RAW != ')') {
4279 /*
4280 * Each loop we parse one separator and one element.
4281 */
4282 if (RAW == ',') {
4283 if (type == 0) type = CUR;
4284
4285 /*
4286 * Detect "Name | Name , Name" error
4287 */
4288 else if (type != CUR) {
4289 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4290 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4291 ctxt->sax->error(ctxt->userData,
4292 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4293 type);
4294 ctxt->wellFormed = 0;
4295 ctxt->disableSAX = 1;
4296 if ((op != NULL) && (op != ret))
4297 xmlFreeElementContent(op);
4298 if ((last != NULL) && (last != ret) &&
4299 (last != ret->c1) && (last != ret->c2))
4300 xmlFreeElementContent(last);
4301 if (ret != NULL)
4302 xmlFreeElementContent(ret);
4303 return(NULL);
4304 }
4305 NEXT;
4306
4307 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4308 if (op == NULL) {
4309 xmlFreeElementContent(ret);
4310 return(NULL);
4311 }
4312 if (last == NULL) {
4313 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004314 if (ret != NULL)
4315 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004316 ret = cur = op;
4317 } else {
4318 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004319 if (op != NULL)
4320 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004321 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004322 if (last != NULL)
4323 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004324 cur =op;
4325 last = NULL;
4326 }
4327 } else if (RAW == '|') {
4328 if (type == 0) type = CUR;
4329
4330 /*
4331 * Detect "Name , Name | Name" error
4332 */
4333 else if (type != CUR) {
4334 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4335 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4336 ctxt->sax->error(ctxt->userData,
4337 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4338 type);
4339 ctxt->wellFormed = 0;
4340 ctxt->disableSAX = 1;
4341 if ((op != NULL) && (op != ret) && (op != last))
4342 xmlFreeElementContent(op);
4343 if ((last != NULL) && (last != ret) &&
4344 (last != ret->c1) && (last != ret->c2))
4345 xmlFreeElementContent(last);
4346 if (ret != NULL)
4347 xmlFreeElementContent(ret);
4348 return(NULL);
4349 }
4350 NEXT;
4351
4352 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4353 if (op == NULL) {
4354 if ((op != NULL) && (op != ret))
4355 xmlFreeElementContent(op);
4356 if ((last != NULL) && (last != ret) &&
4357 (last != ret->c1) && (last != ret->c2))
4358 xmlFreeElementContent(last);
4359 if (ret != NULL)
4360 xmlFreeElementContent(ret);
4361 return(NULL);
4362 }
4363 if (last == NULL) {
4364 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004365 if (ret != NULL)
4366 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004367 ret = cur = op;
4368 } else {
4369 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004370 if (op != NULL)
4371 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004372 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004373 if (last != NULL)
4374 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004375 cur =op;
4376 last = NULL;
4377 }
4378 } else {
4379 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4381 ctxt->sax->error(ctxt->userData,
4382 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4383 ctxt->wellFormed = 0;
4384 ctxt->disableSAX = 1;
4385 if ((op != NULL) && (op != ret))
4386 xmlFreeElementContent(op);
4387 if ((last != NULL) && (last != ret) &&
4388 (last != ret->c1) && (last != ret->c2))
4389 xmlFreeElementContent(last);
4390 if (ret != NULL)
4391 xmlFreeElementContent(ret);
4392 return(NULL);
4393 }
4394 GROW;
4395 SKIP_BLANKS;
4396 GROW;
4397 if (RAW == '(') {
4398 /* Recurse on second child */
4399 NEXT;
4400 SKIP_BLANKS;
4401 last = xmlParseElementChildrenContentDecl(ctxt);
4402 SKIP_BLANKS;
4403 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004404 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004405 if (elem == NULL) {
4406 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4408 ctxt->sax->error(ctxt->userData,
4409 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4410 ctxt->wellFormed = 0;
4411 ctxt->disableSAX = 1;
4412 if ((op != NULL) && (op != ret))
4413 xmlFreeElementContent(op);
4414 if ((last != NULL) && (last != ret) &&
4415 (last != ret->c1) && (last != ret->c2))
4416 xmlFreeElementContent(last);
4417 if (ret != NULL)
4418 xmlFreeElementContent(ret);
4419 return(NULL);
4420 }
4421 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4422 xmlFree(elem);
4423 if (RAW == '?') {
4424 last->ocur = XML_ELEMENT_CONTENT_OPT;
4425 NEXT;
4426 } else if (RAW == '*') {
4427 last->ocur = XML_ELEMENT_CONTENT_MULT;
4428 NEXT;
4429 } else if (RAW == '+') {
4430 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4431 NEXT;
4432 } else {
4433 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4434 }
4435 }
4436 SKIP_BLANKS;
4437 GROW;
4438 }
4439 if ((cur != NULL) && (last != NULL)) {
4440 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004441 if (last != NULL)
4442 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004443 }
4444 ctxt->entity = ctxt->input;
4445 NEXT;
4446 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004447 if (ret != NULL)
4448 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004449 NEXT;
4450 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004451 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004452 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004453 cur = ret;
4454 /*
4455 * Some normalization:
4456 * (a | b* | c?)* == (a | b | c)*
4457 */
4458 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4459 if ((cur->c1 != NULL) &&
4460 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4461 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4462 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4463 if ((cur->c2 != NULL) &&
4464 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4465 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4466 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4467 cur = cur->c2;
4468 }
4469 }
Owen Taylor3473f882001-02-23 17:55:21 +00004470 NEXT;
4471 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004472 if (ret != NULL) {
4473 int found = 0;
4474
Daniel Veillarde470df72001-04-18 21:41:07 +00004475 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004476 /*
4477 * Some normalization:
4478 * (a | b*)+ == (a | b)*
4479 * (a | b?)+ == (a | b)*
4480 */
4481 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4482 if ((cur->c1 != NULL) &&
4483 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4484 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4485 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4486 found = 1;
4487 }
4488 if ((cur->c2 != NULL) &&
4489 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4490 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4491 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4492 found = 1;
4493 }
4494 cur = cur->c2;
4495 }
4496 if (found)
4497 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4498 }
Owen Taylor3473f882001-02-23 17:55:21 +00004499 NEXT;
4500 }
4501 return(ret);
4502}
4503
4504/**
4505 * xmlParseElementContentDecl:
4506 * @ctxt: an XML parser context
4507 * @name: the name of the element being defined.
4508 * @result: the Element Content pointer will be stored here if any
4509 *
4510 * parse the declaration for an Element content either Mixed or Children,
4511 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4512 *
4513 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4514 *
4515 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4516 */
4517
4518int
4519xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4520 xmlElementContentPtr *result) {
4521
4522 xmlElementContentPtr tree = NULL;
4523 xmlParserInputPtr input = ctxt->input;
4524 int res;
4525
4526 *result = NULL;
4527
4528 if (RAW != '(') {
4529 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4531 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004532 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004533 ctxt->wellFormed = 0;
4534 ctxt->disableSAX = 1;
4535 return(-1);
4536 }
4537 NEXT;
4538 GROW;
4539 SKIP_BLANKS;
4540 if ((RAW == '#') && (NXT(1) == 'P') &&
4541 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4542 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4543 (NXT(6) == 'A')) {
4544 tree = xmlParseElementMixedContentDecl(ctxt);
4545 res = XML_ELEMENT_TYPE_MIXED;
4546 } else {
4547 tree = xmlParseElementChildrenContentDecl(ctxt);
4548 res = XML_ELEMENT_TYPE_ELEMENT;
4549 }
4550 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4551 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4553 ctxt->sax->error(ctxt->userData,
4554"Element content declaration doesn't start and stop in the same entity\n");
4555 ctxt->wellFormed = 0;
4556 ctxt->disableSAX = 1;
4557 }
4558 SKIP_BLANKS;
4559 *result = tree;
4560 return(res);
4561}
4562
4563/**
4564 * xmlParseElementDecl:
4565 * @ctxt: an XML parser context
4566 *
4567 * parse an Element declaration.
4568 *
4569 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4570 *
4571 * [ VC: Unique Element Type Declaration ]
4572 * No element type may be declared more than once
4573 *
4574 * Returns the type of the element, or -1 in case of error
4575 */
4576int
4577xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4578 xmlChar *name;
4579 int ret = -1;
4580 xmlElementContentPtr content = NULL;
4581
4582 GROW;
4583 if ((RAW == '<') && (NXT(1) == '!') &&
4584 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4585 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4586 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4587 (NXT(8) == 'T')) {
4588 xmlParserInputPtr input = ctxt->input;
4589
4590 SKIP(9);
4591 if (!IS_BLANK(CUR)) {
4592 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4594 ctxt->sax->error(ctxt->userData,
4595 "Space required after 'ELEMENT'\n");
4596 ctxt->wellFormed = 0;
4597 ctxt->disableSAX = 1;
4598 }
4599 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004600 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004601 if (name == NULL) {
4602 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4604 ctxt->sax->error(ctxt->userData,
4605 "xmlParseElementDecl: no name for Element\n");
4606 ctxt->wellFormed = 0;
4607 ctxt->disableSAX = 1;
4608 return(-1);
4609 }
4610 while ((RAW == 0) && (ctxt->inputNr > 1))
4611 xmlPopInput(ctxt);
4612 if (!IS_BLANK(CUR)) {
4613 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4615 ctxt->sax->error(ctxt->userData,
4616 "Space required after the element name\n");
4617 ctxt->wellFormed = 0;
4618 ctxt->disableSAX = 1;
4619 }
4620 SKIP_BLANKS;
4621 if ((RAW == 'E') && (NXT(1) == 'M') &&
4622 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4623 (NXT(4) == 'Y')) {
4624 SKIP(5);
4625 /*
4626 * Element must always be empty.
4627 */
4628 ret = XML_ELEMENT_TYPE_EMPTY;
4629 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4630 (NXT(2) == 'Y')) {
4631 SKIP(3);
4632 /*
4633 * Element is a generic container.
4634 */
4635 ret = XML_ELEMENT_TYPE_ANY;
4636 } else if (RAW == '(') {
4637 ret = xmlParseElementContentDecl(ctxt, name, &content);
4638 } else {
4639 /*
4640 * [ WFC: PEs in Internal Subset ] error handling.
4641 */
4642 if ((RAW == '%') && (ctxt->external == 0) &&
4643 (ctxt->inputNr == 1)) {
4644 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4646 ctxt->sax->error(ctxt->userData,
4647 "PEReference: forbidden within markup decl in internal subset\n");
4648 } else {
4649 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4651 ctxt->sax->error(ctxt->userData,
4652 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4653 }
4654 ctxt->wellFormed = 0;
4655 ctxt->disableSAX = 1;
4656 if (name != NULL) xmlFree(name);
4657 return(-1);
4658 }
4659
4660 SKIP_BLANKS;
4661 /*
4662 * Pop-up of finished entities.
4663 */
4664 while ((RAW == 0) && (ctxt->inputNr > 1))
4665 xmlPopInput(ctxt);
4666 SKIP_BLANKS;
4667
4668 if (RAW != '>') {
4669 ctxt->errNo = XML_ERR_GT_REQUIRED;
4670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4671 ctxt->sax->error(ctxt->userData,
4672 "xmlParseElementDecl: expected '>' at the end\n");
4673 ctxt->wellFormed = 0;
4674 ctxt->disableSAX = 1;
4675 } else {
4676 if (input != ctxt->input) {
4677 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4679 ctxt->sax->error(ctxt->userData,
4680"Element declaration doesn't start and stop in the same entity\n");
4681 ctxt->wellFormed = 0;
4682 ctxt->disableSAX = 1;
4683 }
4684
4685 NEXT;
4686 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4687 (ctxt->sax->elementDecl != NULL))
4688 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4689 content);
4690 }
4691 if (content != NULL) {
4692 xmlFreeElementContent(content);
4693 }
4694 if (name != NULL) {
4695 xmlFree(name);
4696 }
4697 }
4698 return(ret);
4699}
4700
4701/**
Owen Taylor3473f882001-02-23 17:55:21 +00004702 * xmlParseConditionalSections
4703 * @ctxt: an XML parser context
4704 *
4705 * [61] conditionalSect ::= includeSect | ignoreSect
4706 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4707 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4708 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4709 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4710 */
4711
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004712static void
Owen Taylor3473f882001-02-23 17:55:21 +00004713xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4714 SKIP(3);
4715 SKIP_BLANKS;
4716 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4717 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4718 (NXT(6) == 'E')) {
4719 SKIP(7);
4720 SKIP_BLANKS;
4721 if (RAW != '[') {
4722 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4724 ctxt->sax->error(ctxt->userData,
4725 "XML conditional section '[' expected\n");
4726 ctxt->wellFormed = 0;
4727 ctxt->disableSAX = 1;
4728 } else {
4729 NEXT;
4730 }
4731 if (xmlParserDebugEntities) {
4732 if ((ctxt->input != NULL) && (ctxt->input->filename))
4733 xmlGenericError(xmlGenericErrorContext,
4734 "%s(%d): ", ctxt->input->filename,
4735 ctxt->input->line);
4736 xmlGenericError(xmlGenericErrorContext,
4737 "Entering INCLUDE Conditional Section\n");
4738 }
4739
4740 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4741 (NXT(2) != '>'))) {
4742 const xmlChar *check = CUR_PTR;
4743 int cons = ctxt->input->consumed;
4744 int tok = ctxt->token;
4745
4746 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4747 xmlParseConditionalSections(ctxt);
4748 } else if (IS_BLANK(CUR)) {
4749 NEXT;
4750 } else if (RAW == '%') {
4751 xmlParsePEReference(ctxt);
4752 } else
4753 xmlParseMarkupDecl(ctxt);
4754
4755 /*
4756 * Pop-up of finished entities.
4757 */
4758 while ((RAW == 0) && (ctxt->inputNr > 1))
4759 xmlPopInput(ctxt);
4760
4761 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4762 (tok == ctxt->token)) {
4763 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4765 ctxt->sax->error(ctxt->userData,
4766 "Content error in the external subset\n");
4767 ctxt->wellFormed = 0;
4768 ctxt->disableSAX = 1;
4769 break;
4770 }
4771 }
4772 if (xmlParserDebugEntities) {
4773 if ((ctxt->input != NULL) && (ctxt->input->filename))
4774 xmlGenericError(xmlGenericErrorContext,
4775 "%s(%d): ", ctxt->input->filename,
4776 ctxt->input->line);
4777 xmlGenericError(xmlGenericErrorContext,
4778 "Leaving INCLUDE Conditional Section\n");
4779 }
4780
4781 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4782 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4783 int state;
4784 int instate;
4785 int depth = 0;
4786
4787 SKIP(6);
4788 SKIP_BLANKS;
4789 if (RAW != '[') {
4790 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4792 ctxt->sax->error(ctxt->userData,
4793 "XML conditional section '[' expected\n");
4794 ctxt->wellFormed = 0;
4795 ctxt->disableSAX = 1;
4796 } else {
4797 NEXT;
4798 }
4799 if (xmlParserDebugEntities) {
4800 if ((ctxt->input != NULL) && (ctxt->input->filename))
4801 xmlGenericError(xmlGenericErrorContext,
4802 "%s(%d): ", ctxt->input->filename,
4803 ctxt->input->line);
4804 xmlGenericError(xmlGenericErrorContext,
4805 "Entering IGNORE Conditional Section\n");
4806 }
4807
4808 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004809 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004810 * But disable SAX event generating DTD building in the meantime
4811 */
4812 state = ctxt->disableSAX;
4813 instate = ctxt->instate;
4814 ctxt->disableSAX = 1;
4815 ctxt->instate = XML_PARSER_IGNORE;
4816
4817 while (depth >= 0) {
4818 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4819 depth++;
4820 SKIP(3);
4821 continue;
4822 }
4823 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4824 if (--depth >= 0) SKIP(3);
4825 continue;
4826 }
4827 NEXT;
4828 continue;
4829 }
4830
4831 ctxt->disableSAX = state;
4832 ctxt->instate = instate;
4833
4834 if (xmlParserDebugEntities) {
4835 if ((ctxt->input != NULL) && (ctxt->input->filename))
4836 xmlGenericError(xmlGenericErrorContext,
4837 "%s(%d): ", ctxt->input->filename,
4838 ctxt->input->line);
4839 xmlGenericError(xmlGenericErrorContext,
4840 "Leaving IGNORE Conditional Section\n");
4841 }
4842
4843 } else {
4844 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4846 ctxt->sax->error(ctxt->userData,
4847 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4848 ctxt->wellFormed = 0;
4849 ctxt->disableSAX = 1;
4850 }
4851
4852 if (RAW == 0)
4853 SHRINK;
4854
4855 if (RAW == 0) {
4856 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4858 ctxt->sax->error(ctxt->userData,
4859 "XML conditional section not closed\n");
4860 ctxt->wellFormed = 0;
4861 ctxt->disableSAX = 1;
4862 } else {
4863 SKIP(3);
4864 }
4865}
4866
4867/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004868 * xmlParseMarkupDecl:
4869 * @ctxt: an XML parser context
4870 *
4871 * parse Markup declarations
4872 *
4873 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4874 * NotationDecl | PI | Comment
4875 *
4876 * [ VC: Proper Declaration/PE Nesting ]
4877 * Parameter-entity replacement text must be properly nested with
4878 * markup declarations. That is to say, if either the first character
4879 * or the last character of a markup declaration (markupdecl above) is
4880 * contained in the replacement text for a parameter-entity reference,
4881 * both must be contained in the same replacement text.
4882 *
4883 * [ WFC: PEs in Internal Subset ]
4884 * In the internal DTD subset, parameter-entity references can occur
4885 * only where markup declarations can occur, not within markup declarations.
4886 * (This does not apply to references that occur in external parameter
4887 * entities or to the external subset.)
4888 */
4889void
4890xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4891 GROW;
4892 xmlParseElementDecl(ctxt);
4893 xmlParseAttributeListDecl(ctxt);
4894 xmlParseEntityDecl(ctxt);
4895 xmlParseNotationDecl(ctxt);
4896 xmlParsePI(ctxt);
4897 xmlParseComment(ctxt);
4898 /*
4899 * This is only for internal subset. On external entities,
4900 * the replacement is done before parsing stage
4901 */
4902 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4903 xmlParsePEReference(ctxt);
4904
4905 /*
4906 * Conditional sections are allowed from entities included
4907 * by PE References in the internal subset.
4908 */
4909 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4910 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4911 xmlParseConditionalSections(ctxt);
4912 }
4913 }
4914
4915 ctxt->instate = XML_PARSER_DTD;
4916}
4917
4918/**
4919 * xmlParseTextDecl:
4920 * @ctxt: an XML parser context
4921 *
4922 * parse an XML declaration header for external entities
4923 *
4924 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4925 *
4926 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4927 */
4928
4929void
4930xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4931 xmlChar *version;
4932
4933 /*
4934 * We know that '<?xml' is here.
4935 */
4936 if ((RAW == '<') && (NXT(1) == '?') &&
4937 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4938 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4939 SKIP(5);
4940 } else {
4941 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4943 ctxt->sax->error(ctxt->userData,
4944 "Text declaration '<?xml' required\n");
4945 ctxt->wellFormed = 0;
4946 ctxt->disableSAX = 1;
4947
4948 return;
4949 }
4950
4951 if (!IS_BLANK(CUR)) {
4952 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4954 ctxt->sax->error(ctxt->userData,
4955 "Space needed after '<?xml'\n");
4956 ctxt->wellFormed = 0;
4957 ctxt->disableSAX = 1;
4958 }
4959 SKIP_BLANKS;
4960
4961 /*
4962 * We may have the VersionInfo here.
4963 */
4964 version = xmlParseVersionInfo(ctxt);
4965 if (version == NULL)
4966 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00004967 else {
4968 if (!IS_BLANK(CUR)) {
4969 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4971 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4972 ctxt->wellFormed = 0;
4973 ctxt->disableSAX = 1;
4974 }
4975 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004976 ctxt->input->version = version;
4977
4978 /*
4979 * We must have the encoding declaration
4980 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004981 xmlParseEncodingDecl(ctxt);
4982 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4983 /*
4984 * The XML REC instructs us to stop parsing right here
4985 */
4986 return;
4987 }
4988
4989 SKIP_BLANKS;
4990 if ((RAW == '?') && (NXT(1) == '>')) {
4991 SKIP(2);
4992 } else if (RAW == '>') {
4993 /* Deprecated old WD ... */
4994 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4996 ctxt->sax->error(ctxt->userData,
4997 "XML declaration must end-up with '?>'\n");
4998 ctxt->wellFormed = 0;
4999 ctxt->disableSAX = 1;
5000 NEXT;
5001 } else {
5002 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5004 ctxt->sax->error(ctxt->userData,
5005 "parsing XML declaration: '?>' expected\n");
5006 ctxt->wellFormed = 0;
5007 ctxt->disableSAX = 1;
5008 MOVETO_ENDTAG(CUR_PTR);
5009 NEXT;
5010 }
5011}
5012
5013/**
Owen Taylor3473f882001-02-23 17:55:21 +00005014 * xmlParseExternalSubset:
5015 * @ctxt: an XML parser context
5016 * @ExternalID: the external identifier
5017 * @SystemID: the system identifier (or URL)
5018 *
5019 * parse Markup declarations from an external subset
5020 *
5021 * [30] extSubset ::= textDecl? extSubsetDecl
5022 *
5023 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5024 */
5025void
5026xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5027 const xmlChar *SystemID) {
5028 GROW;
5029 if ((RAW == '<') && (NXT(1) == '?') &&
5030 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5031 (NXT(4) == 'l')) {
5032 xmlParseTextDecl(ctxt);
5033 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5034 /*
5035 * The XML REC instructs us to stop parsing right here
5036 */
5037 ctxt->instate = XML_PARSER_EOF;
5038 return;
5039 }
5040 }
5041 if (ctxt->myDoc == NULL) {
5042 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5043 }
5044 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5045 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5046
5047 ctxt->instate = XML_PARSER_DTD;
5048 ctxt->external = 1;
5049 while (((RAW == '<') && (NXT(1) == '?')) ||
5050 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005051 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005052 const xmlChar *check = CUR_PTR;
5053 int cons = ctxt->input->consumed;
5054 int tok = ctxt->token;
5055
5056 GROW;
5057 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5058 xmlParseConditionalSections(ctxt);
5059 } else if (IS_BLANK(CUR)) {
5060 NEXT;
5061 } else if (RAW == '%') {
5062 xmlParsePEReference(ctxt);
5063 } else
5064 xmlParseMarkupDecl(ctxt);
5065
5066 /*
5067 * Pop-up of finished entities.
5068 */
5069 while ((RAW == 0) && (ctxt->inputNr > 1))
5070 xmlPopInput(ctxt);
5071
5072 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5073 (tok == ctxt->token)) {
5074 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5076 ctxt->sax->error(ctxt->userData,
5077 "Content error in the external subset\n");
5078 ctxt->wellFormed = 0;
5079 ctxt->disableSAX = 1;
5080 break;
5081 }
5082 }
5083
5084 if (RAW != 0) {
5085 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5086 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5087 ctxt->sax->error(ctxt->userData,
5088 "Extra content at the end of the document\n");
5089 ctxt->wellFormed = 0;
5090 ctxt->disableSAX = 1;
5091 }
5092
5093}
5094
5095/**
5096 * xmlParseReference:
5097 * @ctxt: an XML parser context
5098 *
5099 * parse and handle entity references in content, depending on the SAX
5100 * interface, this may end-up in a call to character() if this is a
5101 * CharRef, a predefined entity, if there is no reference() callback.
5102 * or if the parser was asked to switch to that mode.
5103 *
5104 * [67] Reference ::= EntityRef | CharRef
5105 */
5106void
5107xmlParseReference(xmlParserCtxtPtr ctxt) {
5108 xmlEntityPtr ent;
5109 xmlChar *val;
5110 if (RAW != '&') return;
5111
5112 if (NXT(1) == '#') {
5113 int i = 0;
5114 xmlChar out[10];
5115 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005116 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005117
5118 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5119 /*
5120 * So we are using non-UTF-8 buffers
5121 * Check that the char fit on 8bits, if not
5122 * generate a CharRef.
5123 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005124 if (value <= 0xFF) {
5125 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005126 out[1] = 0;
5127 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5128 (!ctxt->disableSAX))
5129 ctxt->sax->characters(ctxt->userData, out, 1);
5130 } else {
5131 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005132 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005133 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005134 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005135 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5136 (!ctxt->disableSAX))
5137 ctxt->sax->reference(ctxt->userData, out);
5138 }
5139 } else {
5140 /*
5141 * Just encode the value in UTF-8
5142 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005143 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005144 out[i] = 0;
5145 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5146 (!ctxt->disableSAX))
5147 ctxt->sax->characters(ctxt->userData, out, i);
5148 }
5149 } else {
5150 ent = xmlParseEntityRef(ctxt);
5151 if (ent == NULL) return;
5152 if ((ent->name != NULL) &&
5153 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5154 xmlNodePtr list = NULL;
5155 int ret;
5156
5157
5158 /*
5159 * The first reference to the entity trigger a parsing phase
5160 * where the ent->children is filled with the result from
5161 * the parsing.
5162 */
5163 if (ent->children == NULL) {
5164 xmlChar *value;
5165 value = ent->content;
5166
5167 /*
5168 * Check that this entity is well formed
5169 */
5170 if ((value != NULL) &&
5171 (value[1] == 0) && (value[0] == '<') &&
5172 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5173 /*
5174 * DONE: get definite answer on this !!!
5175 * Lots of entity decls are used to declare a single
5176 * char
5177 * <!ENTITY lt "<">
5178 * Which seems to be valid since
5179 * 2.4: The ampersand character (&) and the left angle
5180 * bracket (<) may appear in their literal form only
5181 * when used ... They are also legal within the literal
5182 * entity value of an internal entity declaration;i
5183 * see "4.3.2 Well-Formed Parsed Entities".
5184 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5185 * Looking at the OASIS test suite and James Clark
5186 * tests, this is broken. However the XML REC uses
5187 * it. Is the XML REC not well-formed ????
5188 * This is a hack to avoid this problem
5189 *
5190 * ANSWER: since lt gt amp .. are already defined,
5191 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005192 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005193 * is lousy but acceptable.
5194 */
5195 list = xmlNewDocText(ctxt->myDoc, value);
5196 if (list != NULL) {
5197 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5198 (ent->children == NULL)) {
5199 ent->children = list;
5200 ent->last = list;
5201 list->parent = (xmlNodePtr) ent;
5202 } else {
5203 xmlFreeNodeList(list);
5204 }
5205 } else if (list != NULL) {
5206 xmlFreeNodeList(list);
5207 }
5208 } else {
5209 /*
5210 * 4.3.2: An internal general parsed entity is well-formed
5211 * if its replacement text matches the production labeled
5212 * content.
5213 */
5214 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5215 ctxt->depth++;
5216 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5217 ctxt->sax, NULL, ctxt->depth,
5218 value, &list);
5219 ctxt->depth--;
5220 } else if (ent->etype ==
5221 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5222 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005223 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005224 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005225 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005226 ctxt->depth--;
5227 } else {
5228 ret = -1;
5229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5230 ctxt->sax->error(ctxt->userData,
5231 "Internal: invalid entity type\n");
5232 }
5233 if (ret == XML_ERR_ENTITY_LOOP) {
5234 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5236 ctxt->sax->error(ctxt->userData,
5237 "Detected entity reference loop\n");
5238 ctxt->wellFormed = 0;
5239 ctxt->disableSAX = 1;
5240 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005241 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5242 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005243 (ent->children == NULL)) {
5244 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005245 if (ctxt->replaceEntities) {
5246 /*
5247 * Prune it directly in the generated document
5248 * except for single text nodes.
5249 */
5250 if ((list->type == XML_TEXT_NODE) &&
5251 (list->next == NULL)) {
5252 list->parent = (xmlNodePtr) ent;
5253 list = NULL;
5254 } else {
5255 while (list != NULL) {
5256 list->parent = (xmlNodePtr) ctxt->node;
5257 if (list->next == NULL)
5258 ent->last = list;
5259 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005260 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005261 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005262 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5263 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005264 }
5265 } else {
5266 while (list != NULL) {
5267 list->parent = (xmlNodePtr) ent;
5268 if (list->next == NULL)
5269 ent->last = list;
5270 list = list->next;
5271 }
Owen Taylor3473f882001-02-23 17:55:21 +00005272 }
5273 } else {
5274 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005275 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005276 }
5277 } else if (ret > 0) {
5278 ctxt->errNo = ret;
5279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5280 ctxt->sax->error(ctxt->userData,
5281 "Entity value required\n");
5282 ctxt->wellFormed = 0;
5283 ctxt->disableSAX = 1;
5284 } else if (list != NULL) {
5285 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005286 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005287 }
5288 }
5289 }
5290 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5291 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5292 /*
5293 * Create a node.
5294 */
5295 ctxt->sax->reference(ctxt->userData, ent->name);
5296 return;
5297 } else if (ctxt->replaceEntities) {
5298 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5299 /*
5300 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005301 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005302 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005303 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005304 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005305 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005306 cur = ent->children;
5307 while (cur != NULL) {
5308 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005309 if (firstChild == NULL){
5310 firstChild = new;
5311 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005312 xmlAddChild(ctxt->node, new);
5313 if (cur == ent->last)
5314 break;
5315 cur = cur->next;
5316 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005317 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5318 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005319 } else {
5320 /*
5321 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005322 * node with a possible previous text one which
5323 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005324 */
5325 if (ent->children->type == XML_TEXT_NODE)
5326 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5327 if ((ent->last != ent->children) &&
5328 (ent->last->type == XML_TEXT_NODE))
5329 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5330 xmlAddChildList(ctxt->node, ent->children);
5331 }
5332
Owen Taylor3473f882001-02-23 17:55:21 +00005333 /*
5334 * This is to avoid a nasty side effect, see
5335 * characters() in SAX.c
5336 */
5337 ctxt->nodemem = 0;
5338 ctxt->nodelen = 0;
5339 return;
5340 } else {
5341 /*
5342 * Probably running in SAX mode
5343 */
5344 xmlParserInputPtr input;
5345
5346 input = xmlNewEntityInputStream(ctxt, ent);
5347 xmlPushInput(ctxt, input);
5348 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5349 (RAW == '<') && (NXT(1) == '?') &&
5350 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5351 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5352 xmlParseTextDecl(ctxt);
5353 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5354 /*
5355 * The XML REC instructs us to stop parsing right here
5356 */
5357 ctxt->instate = XML_PARSER_EOF;
5358 return;
5359 }
5360 if (input->standalone == 1) {
5361 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5363 ctxt->sax->error(ctxt->userData,
5364 "external parsed entities cannot be standalone\n");
5365 ctxt->wellFormed = 0;
5366 ctxt->disableSAX = 1;
5367 }
5368 }
5369 return;
5370 }
5371 }
5372 } else {
5373 val = ent->content;
5374 if (val == NULL) return;
5375 /*
5376 * inline the entity.
5377 */
5378 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5379 (!ctxt->disableSAX))
5380 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5381 }
5382 }
5383}
5384
5385/**
5386 * xmlParseEntityRef:
5387 * @ctxt: an XML parser context
5388 *
5389 * parse ENTITY references declarations
5390 *
5391 * [68] EntityRef ::= '&' Name ';'
5392 *
5393 * [ WFC: Entity Declared ]
5394 * In a document without any DTD, a document with only an internal DTD
5395 * subset which contains no parameter entity references, or a document
5396 * with "standalone='yes'", the Name given in the entity reference
5397 * must match that in an entity declaration, except that well-formed
5398 * documents need not declare any of the following entities: amp, lt,
5399 * gt, apos, quot. The declaration of a parameter entity must precede
5400 * any reference to it. Similarly, the declaration of a general entity
5401 * must precede any reference to it which appears in a default value in an
5402 * attribute-list declaration. Note that if entities are declared in the
5403 * external subset or in external parameter entities, a non-validating
5404 * processor is not obligated to read and process their declarations;
5405 * for such documents, the rule that an entity must be declared is a
5406 * well-formedness constraint only if standalone='yes'.
5407 *
5408 * [ WFC: Parsed Entity ]
5409 * An entity reference must not contain the name of an unparsed entity
5410 *
5411 * Returns the xmlEntityPtr if found, or NULL otherwise.
5412 */
5413xmlEntityPtr
5414xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5415 xmlChar *name;
5416 xmlEntityPtr ent = NULL;
5417
5418 GROW;
5419
5420 if (RAW == '&') {
5421 NEXT;
5422 name = xmlParseName(ctxt);
5423 if (name == NULL) {
5424 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5425 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5426 ctxt->sax->error(ctxt->userData,
5427 "xmlParseEntityRef: no name\n");
5428 ctxt->wellFormed = 0;
5429 ctxt->disableSAX = 1;
5430 } else {
5431 if (RAW == ';') {
5432 NEXT;
5433 /*
5434 * Ask first SAX for entity resolution, otherwise try the
5435 * predefined set.
5436 */
5437 if (ctxt->sax != NULL) {
5438 if (ctxt->sax->getEntity != NULL)
5439 ent = ctxt->sax->getEntity(ctxt->userData, name);
5440 if (ent == NULL)
5441 ent = xmlGetPredefinedEntity(name);
5442 }
5443 /*
5444 * [ WFC: Entity Declared ]
5445 * In a document without any DTD, a document with only an
5446 * internal DTD subset which contains no parameter entity
5447 * references, or a document with "standalone='yes'", the
5448 * Name given in the entity reference must match that in an
5449 * entity declaration, except that well-formed documents
5450 * need not declare any of the following entities: amp, lt,
5451 * gt, apos, quot.
5452 * The declaration of a parameter entity must precede any
5453 * reference to it.
5454 * Similarly, the declaration of a general entity must
5455 * precede any reference to it which appears in a default
5456 * value in an attribute-list declaration. Note that if
5457 * entities are declared in the external subset or in
5458 * external parameter entities, a non-validating processor
5459 * is not obligated to read and process their declarations;
5460 * for such documents, the rule that an entity must be
5461 * declared is a well-formedness constraint only if
5462 * standalone='yes'.
5463 */
5464 if (ent == NULL) {
5465 if ((ctxt->standalone == 1) ||
5466 ((ctxt->hasExternalSubset == 0) &&
5467 (ctxt->hasPErefs == 0))) {
5468 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5470 ctxt->sax->error(ctxt->userData,
5471 "Entity '%s' not defined\n", name);
5472 ctxt->wellFormed = 0;
5473 ctxt->disableSAX = 1;
5474 } else {
5475 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005477 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005478 "Entity '%s' not defined\n", name);
5479 }
5480 }
5481
5482 /*
5483 * [ WFC: Parsed Entity ]
5484 * An entity reference must not contain the name of an
5485 * unparsed entity
5486 */
5487 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5488 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5490 ctxt->sax->error(ctxt->userData,
5491 "Entity reference to unparsed entity %s\n", name);
5492 ctxt->wellFormed = 0;
5493 ctxt->disableSAX = 1;
5494 }
5495
5496 /*
5497 * [ WFC: No External Entity References ]
5498 * Attribute values cannot contain direct or indirect
5499 * entity references to external entities.
5500 */
5501 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5502 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5503 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5505 ctxt->sax->error(ctxt->userData,
5506 "Attribute references external entity '%s'\n", name);
5507 ctxt->wellFormed = 0;
5508 ctxt->disableSAX = 1;
5509 }
5510 /*
5511 * [ WFC: No < in Attribute Values ]
5512 * The replacement text of any entity referred to directly or
5513 * indirectly in an attribute value (other than "&lt;") must
5514 * not contain a <.
5515 */
5516 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5517 (ent != NULL) &&
5518 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5519 (ent->content != NULL) &&
5520 (xmlStrchr(ent->content, '<'))) {
5521 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5523 ctxt->sax->error(ctxt->userData,
5524 "'<' in entity '%s' is not allowed in attributes values\n", name);
5525 ctxt->wellFormed = 0;
5526 ctxt->disableSAX = 1;
5527 }
5528
5529 /*
5530 * Internal check, no parameter entities here ...
5531 */
5532 else {
5533 switch (ent->etype) {
5534 case XML_INTERNAL_PARAMETER_ENTITY:
5535 case XML_EXTERNAL_PARAMETER_ENTITY:
5536 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5538 ctxt->sax->error(ctxt->userData,
5539 "Attempt to reference the parameter entity '%s'\n", name);
5540 ctxt->wellFormed = 0;
5541 ctxt->disableSAX = 1;
5542 break;
5543 default:
5544 break;
5545 }
5546 }
5547
5548 /*
5549 * [ WFC: No Recursion ]
5550 * A parsed entity must not contain a recursive reference
5551 * to itself, either directly or indirectly.
5552 * Done somewhere else
5553 */
5554
5555 } else {
5556 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5558 ctxt->sax->error(ctxt->userData,
5559 "xmlParseEntityRef: expecting ';'\n");
5560 ctxt->wellFormed = 0;
5561 ctxt->disableSAX = 1;
5562 }
5563 xmlFree(name);
5564 }
5565 }
5566 return(ent);
5567}
5568
5569/**
5570 * xmlParseStringEntityRef:
5571 * @ctxt: an XML parser context
5572 * @str: a pointer to an index in the string
5573 *
5574 * parse ENTITY references declarations, but this version parses it from
5575 * a string value.
5576 *
5577 * [68] EntityRef ::= '&' Name ';'
5578 *
5579 * [ WFC: Entity Declared ]
5580 * In a document without any DTD, a document with only an internal DTD
5581 * subset which contains no parameter entity references, or a document
5582 * with "standalone='yes'", the Name given in the entity reference
5583 * must match that in an entity declaration, except that well-formed
5584 * documents need not declare any of the following entities: amp, lt,
5585 * gt, apos, quot. The declaration of a parameter entity must precede
5586 * any reference to it. Similarly, the declaration of a general entity
5587 * must precede any reference to it which appears in a default value in an
5588 * attribute-list declaration. Note that if entities are declared in the
5589 * external subset or in external parameter entities, a non-validating
5590 * processor is not obligated to read and process their declarations;
5591 * for such documents, the rule that an entity must be declared is a
5592 * well-formedness constraint only if standalone='yes'.
5593 *
5594 * [ WFC: Parsed Entity ]
5595 * An entity reference must not contain the name of an unparsed entity
5596 *
5597 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5598 * is updated to the current location in the string.
5599 */
5600xmlEntityPtr
5601xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5602 xmlChar *name;
5603 const xmlChar *ptr;
5604 xmlChar cur;
5605 xmlEntityPtr ent = NULL;
5606
5607 if ((str == NULL) || (*str == NULL))
5608 return(NULL);
5609 ptr = *str;
5610 cur = *ptr;
5611 if (cur == '&') {
5612 ptr++;
5613 cur = *ptr;
5614 name = xmlParseStringName(ctxt, &ptr);
5615 if (name == NULL) {
5616 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5618 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005619 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005620 ctxt->wellFormed = 0;
5621 ctxt->disableSAX = 1;
5622 } else {
5623 if (*ptr == ';') {
5624 ptr++;
5625 /*
5626 * Ask first SAX for entity resolution, otherwise try the
5627 * predefined set.
5628 */
5629 if (ctxt->sax != NULL) {
5630 if (ctxt->sax->getEntity != NULL)
5631 ent = ctxt->sax->getEntity(ctxt->userData, name);
5632 if (ent == NULL)
5633 ent = xmlGetPredefinedEntity(name);
5634 }
5635 /*
5636 * [ WFC: Entity Declared ]
5637 * In a document without any DTD, a document with only an
5638 * internal DTD subset which contains no parameter entity
5639 * references, or a document with "standalone='yes'", the
5640 * Name given in the entity reference must match that in an
5641 * entity declaration, except that well-formed documents
5642 * need not declare any of the following entities: amp, lt,
5643 * gt, apos, quot.
5644 * The declaration of a parameter entity must precede any
5645 * reference to it.
5646 * Similarly, the declaration of a general entity must
5647 * precede any reference to it which appears in a default
5648 * value in an attribute-list declaration. Note that if
5649 * entities are declared in the external subset or in
5650 * external parameter entities, a non-validating processor
5651 * is not obligated to read and process their declarations;
5652 * for such documents, the rule that an entity must be
5653 * declared is a well-formedness constraint only if
5654 * standalone='yes'.
5655 */
5656 if (ent == NULL) {
5657 if ((ctxt->standalone == 1) ||
5658 ((ctxt->hasExternalSubset == 0) &&
5659 (ctxt->hasPErefs == 0))) {
5660 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5662 ctxt->sax->error(ctxt->userData,
5663 "Entity '%s' not defined\n", name);
5664 ctxt->wellFormed = 0;
5665 ctxt->disableSAX = 1;
5666 } else {
5667 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5668 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5669 ctxt->sax->warning(ctxt->userData,
5670 "Entity '%s' not defined\n", name);
5671 }
5672 }
5673
5674 /*
5675 * [ WFC: Parsed Entity ]
5676 * An entity reference must not contain the name of an
5677 * unparsed entity
5678 */
5679 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5680 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5681 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5682 ctxt->sax->error(ctxt->userData,
5683 "Entity reference to unparsed entity %s\n", name);
5684 ctxt->wellFormed = 0;
5685 ctxt->disableSAX = 1;
5686 }
5687
5688 /*
5689 * [ WFC: No External Entity References ]
5690 * Attribute values cannot contain direct or indirect
5691 * entity references to external entities.
5692 */
5693 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5694 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5695 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5697 ctxt->sax->error(ctxt->userData,
5698 "Attribute references external entity '%s'\n", name);
5699 ctxt->wellFormed = 0;
5700 ctxt->disableSAX = 1;
5701 }
5702 /*
5703 * [ WFC: No < in Attribute Values ]
5704 * The replacement text of any entity referred to directly or
5705 * indirectly in an attribute value (other than "&lt;") must
5706 * not contain a <.
5707 */
5708 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5709 (ent != NULL) &&
5710 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5711 (ent->content != NULL) &&
5712 (xmlStrchr(ent->content, '<'))) {
5713 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5715 ctxt->sax->error(ctxt->userData,
5716 "'<' in entity '%s' is not allowed in attributes values\n", name);
5717 ctxt->wellFormed = 0;
5718 ctxt->disableSAX = 1;
5719 }
5720
5721 /*
5722 * Internal check, no parameter entities here ...
5723 */
5724 else {
5725 switch (ent->etype) {
5726 case XML_INTERNAL_PARAMETER_ENTITY:
5727 case XML_EXTERNAL_PARAMETER_ENTITY:
5728 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5730 ctxt->sax->error(ctxt->userData,
5731 "Attempt to reference the parameter entity '%s'\n", name);
5732 ctxt->wellFormed = 0;
5733 ctxt->disableSAX = 1;
5734 break;
5735 default:
5736 break;
5737 }
5738 }
5739
5740 /*
5741 * [ WFC: No Recursion ]
5742 * A parsed entity must not contain a recursive reference
5743 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005744 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005745 */
5746
5747 } else {
5748 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5750 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005751 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005752 ctxt->wellFormed = 0;
5753 ctxt->disableSAX = 1;
5754 }
5755 xmlFree(name);
5756 }
5757 }
5758 *str = ptr;
5759 return(ent);
5760}
5761
5762/**
5763 * xmlParsePEReference:
5764 * @ctxt: an XML parser context
5765 *
5766 * parse PEReference declarations
5767 * The entity content is handled directly by pushing it's content as
5768 * a new input stream.
5769 *
5770 * [69] PEReference ::= '%' Name ';'
5771 *
5772 * [ WFC: No Recursion ]
5773 * A parsed entity must not contain a recursive
5774 * reference to itself, either directly or indirectly.
5775 *
5776 * [ WFC: Entity Declared ]
5777 * In a document without any DTD, a document with only an internal DTD
5778 * subset which contains no parameter entity references, or a document
5779 * with "standalone='yes'", ... ... The declaration of a parameter
5780 * entity must precede any reference to it...
5781 *
5782 * [ VC: Entity Declared ]
5783 * In a document with an external subset or external parameter entities
5784 * with "standalone='no'", ... ... The declaration of a parameter entity
5785 * must precede any reference to it...
5786 *
5787 * [ WFC: In DTD ]
5788 * Parameter-entity references may only appear in the DTD.
5789 * NOTE: misleading but this is handled.
5790 */
5791void
5792xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5793 xmlChar *name;
5794 xmlEntityPtr entity = NULL;
5795 xmlParserInputPtr input;
5796
5797 if (RAW == '%') {
5798 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005799 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005800 if (name == NULL) {
5801 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5803 ctxt->sax->error(ctxt->userData,
5804 "xmlParsePEReference: no name\n");
5805 ctxt->wellFormed = 0;
5806 ctxt->disableSAX = 1;
5807 } else {
5808 if (RAW == ';') {
5809 NEXT;
5810 if ((ctxt->sax != NULL) &&
5811 (ctxt->sax->getParameterEntity != NULL))
5812 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5813 name);
5814 if (entity == NULL) {
5815 /*
5816 * [ WFC: Entity Declared ]
5817 * In a document without any DTD, a document with only an
5818 * internal DTD subset which contains no parameter entity
5819 * references, or a document with "standalone='yes'", ...
5820 * ... The declaration of a parameter entity must precede
5821 * any reference to it...
5822 */
5823 if ((ctxt->standalone == 1) ||
5824 ((ctxt->hasExternalSubset == 0) &&
5825 (ctxt->hasPErefs == 0))) {
5826 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5827 if ((!ctxt->disableSAX) &&
5828 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5829 ctxt->sax->error(ctxt->userData,
5830 "PEReference: %%%s; not found\n", name);
5831 ctxt->wellFormed = 0;
5832 ctxt->disableSAX = 1;
5833 } else {
5834 /*
5835 * [ VC: Entity Declared ]
5836 * In a document with an external subset or external
5837 * parameter entities with "standalone='no'", ...
5838 * ... The declaration of a parameter entity must precede
5839 * any reference to it...
5840 */
5841 if ((!ctxt->disableSAX) &&
5842 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5843 ctxt->sax->warning(ctxt->userData,
5844 "PEReference: %%%s; not found\n", name);
5845 ctxt->valid = 0;
5846 }
5847 } else {
5848 /*
5849 * Internal checking in case the entity quest barfed
5850 */
5851 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5852 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5853 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5854 ctxt->sax->warning(ctxt->userData,
5855 "Internal: %%%s; is not a parameter entity\n", name);
5856 } else {
5857 /*
5858 * TODO !!!
5859 * handle the extra spaces added before and after
5860 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5861 */
5862 input = xmlNewEntityInputStream(ctxt, entity);
5863 xmlPushInput(ctxt, input);
5864 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5865 (RAW == '<') && (NXT(1) == '?') &&
5866 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5867 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5868 xmlParseTextDecl(ctxt);
5869 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5870 /*
5871 * The XML REC instructs us to stop parsing
5872 * right here
5873 */
5874 ctxt->instate = XML_PARSER_EOF;
5875 xmlFree(name);
5876 return;
5877 }
5878 }
5879 if (ctxt->token == 0)
5880 ctxt->token = ' ';
5881 }
5882 }
5883 ctxt->hasPErefs = 1;
5884 } else {
5885 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5887 ctxt->sax->error(ctxt->userData,
5888 "xmlParsePEReference: expecting ';'\n");
5889 ctxt->wellFormed = 0;
5890 ctxt->disableSAX = 1;
5891 }
5892 xmlFree(name);
5893 }
5894 }
5895}
5896
5897/**
5898 * xmlParseStringPEReference:
5899 * @ctxt: an XML parser context
5900 * @str: a pointer to an index in the string
5901 *
5902 * parse PEReference declarations
5903 *
5904 * [69] PEReference ::= '%' Name ';'
5905 *
5906 * [ WFC: No Recursion ]
5907 * A parsed entity must not contain a recursive
5908 * reference to itself, either directly or indirectly.
5909 *
5910 * [ WFC: Entity Declared ]
5911 * In a document without any DTD, a document with only an internal DTD
5912 * subset which contains no parameter entity references, or a document
5913 * with "standalone='yes'", ... ... The declaration of a parameter
5914 * entity must precede any reference to it...
5915 *
5916 * [ VC: Entity Declared ]
5917 * In a document with an external subset or external parameter entities
5918 * with "standalone='no'", ... ... The declaration of a parameter entity
5919 * must precede any reference to it...
5920 *
5921 * [ WFC: In DTD ]
5922 * Parameter-entity references may only appear in the DTD.
5923 * NOTE: misleading but this is handled.
5924 *
5925 * Returns the string of the entity content.
5926 * str is updated to the current value of the index
5927 */
5928xmlEntityPtr
5929xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5930 const xmlChar *ptr;
5931 xmlChar cur;
5932 xmlChar *name;
5933 xmlEntityPtr entity = NULL;
5934
5935 if ((str == NULL) || (*str == NULL)) return(NULL);
5936 ptr = *str;
5937 cur = *ptr;
5938 if (cur == '%') {
5939 ptr++;
5940 cur = *ptr;
5941 name = xmlParseStringName(ctxt, &ptr);
5942 if (name == NULL) {
5943 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5945 ctxt->sax->error(ctxt->userData,
5946 "xmlParseStringPEReference: no name\n");
5947 ctxt->wellFormed = 0;
5948 ctxt->disableSAX = 1;
5949 } else {
5950 cur = *ptr;
5951 if (cur == ';') {
5952 ptr++;
5953 cur = *ptr;
5954 if ((ctxt->sax != NULL) &&
5955 (ctxt->sax->getParameterEntity != NULL))
5956 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5957 name);
5958 if (entity == NULL) {
5959 /*
5960 * [ WFC: Entity Declared ]
5961 * In a document without any DTD, a document with only an
5962 * internal DTD subset which contains no parameter entity
5963 * references, or a document with "standalone='yes'", ...
5964 * ... The declaration of a parameter entity must precede
5965 * any reference to it...
5966 */
5967 if ((ctxt->standalone == 1) ||
5968 ((ctxt->hasExternalSubset == 0) &&
5969 (ctxt->hasPErefs == 0))) {
5970 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5972 ctxt->sax->error(ctxt->userData,
5973 "PEReference: %%%s; not found\n", name);
5974 ctxt->wellFormed = 0;
5975 ctxt->disableSAX = 1;
5976 } else {
5977 /*
5978 * [ VC: Entity Declared ]
5979 * In a document with an external subset or external
5980 * parameter entities with "standalone='no'", ...
5981 * ... The declaration of a parameter entity must
5982 * precede any reference to it...
5983 */
5984 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5985 ctxt->sax->warning(ctxt->userData,
5986 "PEReference: %%%s; not found\n", name);
5987 ctxt->valid = 0;
5988 }
5989 } else {
5990 /*
5991 * Internal checking in case the entity quest barfed
5992 */
5993 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5994 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5995 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5996 ctxt->sax->warning(ctxt->userData,
5997 "Internal: %%%s; is not a parameter entity\n", name);
5998 }
5999 }
6000 ctxt->hasPErefs = 1;
6001 } else {
6002 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6004 ctxt->sax->error(ctxt->userData,
6005 "xmlParseStringPEReference: expecting ';'\n");
6006 ctxt->wellFormed = 0;
6007 ctxt->disableSAX = 1;
6008 }
6009 xmlFree(name);
6010 }
6011 }
6012 *str = ptr;
6013 return(entity);
6014}
6015
6016/**
6017 * xmlParseDocTypeDecl:
6018 * @ctxt: an XML parser context
6019 *
6020 * parse a DOCTYPE declaration
6021 *
6022 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6023 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6024 *
6025 * [ VC: Root Element Type ]
6026 * The Name in the document type declaration must match the element
6027 * type of the root element.
6028 */
6029
6030void
6031xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6032 xmlChar *name = NULL;
6033 xmlChar *ExternalID = NULL;
6034 xmlChar *URI = NULL;
6035
6036 /*
6037 * We know that '<!DOCTYPE' has been detected.
6038 */
6039 SKIP(9);
6040
6041 SKIP_BLANKS;
6042
6043 /*
6044 * Parse the DOCTYPE name.
6045 */
6046 name = xmlParseName(ctxt);
6047 if (name == NULL) {
6048 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6050 ctxt->sax->error(ctxt->userData,
6051 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6052 ctxt->wellFormed = 0;
6053 ctxt->disableSAX = 1;
6054 }
6055 ctxt->intSubName = name;
6056
6057 SKIP_BLANKS;
6058
6059 /*
6060 * Check for SystemID and ExternalID
6061 */
6062 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6063
6064 if ((URI != NULL) || (ExternalID != NULL)) {
6065 ctxt->hasExternalSubset = 1;
6066 }
6067 ctxt->extSubURI = URI;
6068 ctxt->extSubSystem = ExternalID;
6069
6070 SKIP_BLANKS;
6071
6072 /*
6073 * Create and update the internal subset.
6074 */
6075 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6076 (!ctxt->disableSAX))
6077 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6078
6079 /*
6080 * Is there any internal subset declarations ?
6081 * they are handled separately in xmlParseInternalSubset()
6082 */
6083 if (RAW == '[')
6084 return;
6085
6086 /*
6087 * We should be at the end of the DOCTYPE declaration.
6088 */
6089 if (RAW != '>') {
6090 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6091 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006092 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006093 ctxt->wellFormed = 0;
6094 ctxt->disableSAX = 1;
6095 }
6096 NEXT;
6097}
6098
6099/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006100 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006101 * @ctxt: an XML parser context
6102 *
6103 * parse the internal subset declaration
6104 *
6105 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6106 */
6107
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006108static void
Owen Taylor3473f882001-02-23 17:55:21 +00006109xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6110 /*
6111 * Is there any DTD definition ?
6112 */
6113 if (RAW == '[') {
6114 ctxt->instate = XML_PARSER_DTD;
6115 NEXT;
6116 /*
6117 * Parse the succession of Markup declarations and
6118 * PEReferences.
6119 * Subsequence (markupdecl | PEReference | S)*
6120 */
6121 while (RAW != ']') {
6122 const xmlChar *check = CUR_PTR;
6123 int cons = ctxt->input->consumed;
6124
6125 SKIP_BLANKS;
6126 xmlParseMarkupDecl(ctxt);
6127 xmlParsePEReference(ctxt);
6128
6129 /*
6130 * Pop-up of finished entities.
6131 */
6132 while ((RAW == 0) && (ctxt->inputNr > 1))
6133 xmlPopInput(ctxt);
6134
6135 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6136 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6138 ctxt->sax->error(ctxt->userData,
6139 "xmlParseInternalSubset: error detected in Markup declaration\n");
6140 ctxt->wellFormed = 0;
6141 ctxt->disableSAX = 1;
6142 break;
6143 }
6144 }
6145 if (RAW == ']') {
6146 NEXT;
6147 SKIP_BLANKS;
6148 }
6149 }
6150
6151 /*
6152 * We should be at the end of the DOCTYPE declaration.
6153 */
6154 if (RAW != '>') {
6155 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006157 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006158 ctxt->wellFormed = 0;
6159 ctxt->disableSAX = 1;
6160 }
6161 NEXT;
6162}
6163
6164/**
6165 * xmlParseAttribute:
6166 * @ctxt: an XML parser context
6167 * @value: a xmlChar ** used to store the value of the attribute
6168 *
6169 * parse an attribute
6170 *
6171 * [41] Attribute ::= Name Eq AttValue
6172 *
6173 * [ WFC: No External Entity References ]
6174 * Attribute values cannot contain direct or indirect entity references
6175 * to external entities.
6176 *
6177 * [ WFC: No < in Attribute Values ]
6178 * The replacement text of any entity referred to directly or indirectly in
6179 * an attribute value (other than "&lt;") must not contain a <.
6180 *
6181 * [ VC: Attribute Value Type ]
6182 * The attribute must have been declared; the value must be of the type
6183 * declared for it.
6184 *
6185 * [25] Eq ::= S? '=' S?
6186 *
6187 * With namespace:
6188 *
6189 * [NS 11] Attribute ::= QName Eq AttValue
6190 *
6191 * Also the case QName == xmlns:??? is handled independently as a namespace
6192 * definition.
6193 *
6194 * Returns the attribute name, and the value in *value.
6195 */
6196
6197xmlChar *
6198xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6199 xmlChar *name, *val;
6200
6201 *value = NULL;
6202 name = xmlParseName(ctxt);
6203 if (name == NULL) {
6204 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6206 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6207 ctxt->wellFormed = 0;
6208 ctxt->disableSAX = 1;
6209 return(NULL);
6210 }
6211
6212 /*
6213 * read the value
6214 */
6215 SKIP_BLANKS;
6216 if (RAW == '=') {
6217 NEXT;
6218 SKIP_BLANKS;
6219 val = xmlParseAttValue(ctxt);
6220 ctxt->instate = XML_PARSER_CONTENT;
6221 } else {
6222 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6224 ctxt->sax->error(ctxt->userData,
6225 "Specification mandate value for attribute %s\n", name);
6226 ctxt->wellFormed = 0;
6227 ctxt->disableSAX = 1;
6228 xmlFree(name);
6229 return(NULL);
6230 }
6231
6232 /*
6233 * Check that xml:lang conforms to the specification
6234 * No more registered as an error, just generate a warning now
6235 * since this was deprecated in XML second edition
6236 */
6237 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6238 if (!xmlCheckLanguageID(val)) {
6239 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6240 ctxt->sax->warning(ctxt->userData,
6241 "Malformed value for xml:lang : %s\n", val);
6242 }
6243 }
6244
6245 /*
6246 * Check that xml:space conforms to the specification
6247 */
6248 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6249 if (xmlStrEqual(val, BAD_CAST "default"))
6250 *(ctxt->space) = 0;
6251 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6252 *(ctxt->space) = 1;
6253 else {
6254 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6256 ctxt->sax->error(ctxt->userData,
6257"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6258 val);
6259 ctxt->wellFormed = 0;
6260 ctxt->disableSAX = 1;
6261 }
6262 }
6263
6264 *value = val;
6265 return(name);
6266}
6267
6268/**
6269 * xmlParseStartTag:
6270 * @ctxt: an XML parser context
6271 *
6272 * parse a start of tag either for rule element or
6273 * EmptyElement. In both case we don't parse the tag closing chars.
6274 *
6275 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6276 *
6277 * [ WFC: Unique Att Spec ]
6278 * No attribute name may appear more than once in the same start-tag or
6279 * empty-element tag.
6280 *
6281 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6282 *
6283 * [ WFC: Unique Att Spec ]
6284 * No attribute name may appear more than once in the same start-tag or
6285 * empty-element tag.
6286 *
6287 * With namespace:
6288 *
6289 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6290 *
6291 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6292 *
6293 * Returns the element name parsed
6294 */
6295
6296xmlChar *
6297xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6298 xmlChar *name;
6299 xmlChar *attname;
6300 xmlChar *attvalue;
6301 const xmlChar **atts = NULL;
6302 int nbatts = 0;
6303 int maxatts = 0;
6304 int i;
6305
6306 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006307 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006308
6309 name = xmlParseName(ctxt);
6310 if (name == NULL) {
6311 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6313 ctxt->sax->error(ctxt->userData,
6314 "xmlParseStartTag: invalid element name\n");
6315 ctxt->wellFormed = 0;
6316 ctxt->disableSAX = 1;
6317 return(NULL);
6318 }
6319
6320 /*
6321 * Now parse the attributes, it ends up with the ending
6322 *
6323 * (S Attribute)* S?
6324 */
6325 SKIP_BLANKS;
6326 GROW;
6327
Daniel Veillard21a0f912001-02-25 19:54:14 +00006328 while ((RAW != '>') &&
6329 ((RAW != '/') || (NXT(1) != '>')) &&
6330 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006331 const xmlChar *q = CUR_PTR;
6332 int cons = ctxt->input->consumed;
6333
6334 attname = xmlParseAttribute(ctxt, &attvalue);
6335 if ((attname != NULL) && (attvalue != NULL)) {
6336 /*
6337 * [ WFC: Unique Att Spec ]
6338 * No attribute name may appear more than once in the same
6339 * start-tag or empty-element tag.
6340 */
6341 for (i = 0; i < nbatts;i += 2) {
6342 if (xmlStrEqual(atts[i], attname)) {
6343 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6345 ctxt->sax->error(ctxt->userData,
6346 "Attribute %s redefined\n",
6347 attname);
6348 ctxt->wellFormed = 0;
6349 ctxt->disableSAX = 1;
6350 xmlFree(attname);
6351 xmlFree(attvalue);
6352 goto failed;
6353 }
6354 }
6355
6356 /*
6357 * Add the pair to atts
6358 */
6359 if (atts == NULL) {
6360 maxatts = 10;
6361 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6362 if (atts == NULL) {
6363 xmlGenericError(xmlGenericErrorContext,
6364 "malloc of %ld byte failed\n",
6365 maxatts * (long)sizeof(xmlChar *));
6366 return(NULL);
6367 }
6368 } else if (nbatts + 4 > maxatts) {
6369 maxatts *= 2;
6370 atts = (const xmlChar **) xmlRealloc((void *) atts,
6371 maxatts * sizeof(xmlChar *));
6372 if (atts == NULL) {
6373 xmlGenericError(xmlGenericErrorContext,
6374 "realloc of %ld byte failed\n",
6375 maxatts * (long)sizeof(xmlChar *));
6376 return(NULL);
6377 }
6378 }
6379 atts[nbatts++] = attname;
6380 atts[nbatts++] = attvalue;
6381 atts[nbatts] = NULL;
6382 atts[nbatts + 1] = NULL;
6383 } else {
6384 if (attname != NULL)
6385 xmlFree(attname);
6386 if (attvalue != NULL)
6387 xmlFree(attvalue);
6388 }
6389
6390failed:
6391
6392 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6393 break;
6394 if (!IS_BLANK(RAW)) {
6395 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6397 ctxt->sax->error(ctxt->userData,
6398 "attributes construct error\n");
6399 ctxt->wellFormed = 0;
6400 ctxt->disableSAX = 1;
6401 }
6402 SKIP_BLANKS;
6403 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6404 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6406 ctxt->sax->error(ctxt->userData,
6407 "xmlParseStartTag: problem parsing attributes\n");
6408 ctxt->wellFormed = 0;
6409 ctxt->disableSAX = 1;
6410 break;
6411 }
6412 GROW;
6413 }
6414
6415 /*
6416 * SAX: Start of Element !
6417 */
6418 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6419 (!ctxt->disableSAX))
6420 ctxt->sax->startElement(ctxt->userData, name, atts);
6421
6422 if (atts != NULL) {
6423 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6424 xmlFree((void *) atts);
6425 }
6426 return(name);
6427}
6428
6429/**
6430 * xmlParseEndTag:
6431 * @ctxt: an XML parser context
6432 *
6433 * parse an end of tag
6434 *
6435 * [42] ETag ::= '</' Name S? '>'
6436 *
6437 * With namespace
6438 *
6439 * [NS 9] ETag ::= '</' QName S? '>'
6440 */
6441
6442void
6443xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6444 xmlChar *name;
6445 xmlChar *oldname;
6446
6447 GROW;
6448 if ((RAW != '<') || (NXT(1) != '/')) {
6449 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6450 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6451 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6452 ctxt->wellFormed = 0;
6453 ctxt->disableSAX = 1;
6454 return;
6455 }
6456 SKIP(2);
6457
6458 name = xmlParseName(ctxt);
6459
6460 /*
6461 * We should definitely be at the ending "S? '>'" part
6462 */
6463 GROW;
6464 SKIP_BLANKS;
6465 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6466 ctxt->errNo = XML_ERR_GT_REQUIRED;
6467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6468 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6469 ctxt->wellFormed = 0;
6470 ctxt->disableSAX = 1;
6471 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006472 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006473
6474 /*
6475 * [ WFC: Element Type Match ]
6476 * The Name in an element's end-tag must match the element type in the
6477 * start-tag.
6478 *
6479 */
6480 if ((name == NULL) || (ctxt->name == NULL) ||
6481 (!xmlStrEqual(name, ctxt->name))) {
6482 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6484 if ((name != NULL) && (ctxt->name != NULL)) {
6485 ctxt->sax->error(ctxt->userData,
6486 "Opening and ending tag mismatch: %s and %s\n",
6487 ctxt->name, name);
6488 } else if (ctxt->name != NULL) {
6489 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006490 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006491 } else {
6492 ctxt->sax->error(ctxt->userData,
6493 "Ending tag error: internal error ???\n");
6494 }
6495
6496 }
6497 ctxt->wellFormed = 0;
6498 ctxt->disableSAX = 1;
6499 }
6500
6501 /*
6502 * SAX: End of Tag
6503 */
6504 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6505 (!ctxt->disableSAX))
6506 ctxt->sax->endElement(ctxt->userData, name);
6507
6508 if (name != NULL)
6509 xmlFree(name);
6510 oldname = namePop(ctxt);
6511 spacePop(ctxt);
6512 if (oldname != NULL) {
6513#ifdef DEBUG_STACK
6514 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6515#endif
6516 xmlFree(oldname);
6517 }
6518 return;
6519}
6520
6521/**
6522 * xmlParseCDSect:
6523 * @ctxt: an XML parser context
6524 *
6525 * Parse escaped pure raw content.
6526 *
6527 * [18] CDSect ::= CDStart CData CDEnd
6528 *
6529 * [19] CDStart ::= '<![CDATA['
6530 *
6531 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6532 *
6533 * [21] CDEnd ::= ']]>'
6534 */
6535void
6536xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6537 xmlChar *buf = NULL;
6538 int len = 0;
6539 int size = XML_PARSER_BUFFER_SIZE;
6540 int r, rl;
6541 int s, sl;
6542 int cur, l;
6543 int count = 0;
6544
6545 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6546 (NXT(2) == '[') && (NXT(3) == 'C') &&
6547 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6548 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6549 (NXT(8) == '[')) {
6550 SKIP(9);
6551 } else
6552 return;
6553
6554 ctxt->instate = XML_PARSER_CDATA_SECTION;
6555 r = CUR_CHAR(rl);
6556 if (!IS_CHAR(r)) {
6557 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6559 ctxt->sax->error(ctxt->userData,
6560 "CData section not finished\n");
6561 ctxt->wellFormed = 0;
6562 ctxt->disableSAX = 1;
6563 ctxt->instate = XML_PARSER_CONTENT;
6564 return;
6565 }
6566 NEXTL(rl);
6567 s = CUR_CHAR(sl);
6568 if (!IS_CHAR(s)) {
6569 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6571 ctxt->sax->error(ctxt->userData,
6572 "CData section not finished\n");
6573 ctxt->wellFormed = 0;
6574 ctxt->disableSAX = 1;
6575 ctxt->instate = XML_PARSER_CONTENT;
6576 return;
6577 }
6578 NEXTL(sl);
6579 cur = CUR_CHAR(l);
6580 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6581 if (buf == NULL) {
6582 xmlGenericError(xmlGenericErrorContext,
6583 "malloc of %d byte failed\n", size);
6584 return;
6585 }
6586 while (IS_CHAR(cur) &&
6587 ((r != ']') || (s != ']') || (cur != '>'))) {
6588 if (len + 5 >= size) {
6589 size *= 2;
6590 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6591 if (buf == NULL) {
6592 xmlGenericError(xmlGenericErrorContext,
6593 "realloc of %d byte failed\n", size);
6594 return;
6595 }
6596 }
6597 COPY_BUF(rl,buf,len,r);
6598 r = s;
6599 rl = sl;
6600 s = cur;
6601 sl = l;
6602 count++;
6603 if (count > 50) {
6604 GROW;
6605 count = 0;
6606 }
6607 NEXTL(l);
6608 cur = CUR_CHAR(l);
6609 }
6610 buf[len] = 0;
6611 ctxt->instate = XML_PARSER_CONTENT;
6612 if (cur != '>') {
6613 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6615 ctxt->sax->error(ctxt->userData,
6616 "CData section not finished\n%.50s\n", buf);
6617 ctxt->wellFormed = 0;
6618 ctxt->disableSAX = 1;
6619 xmlFree(buf);
6620 return;
6621 }
6622 NEXTL(l);
6623
6624 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006625 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006626 */
6627 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6628 if (ctxt->sax->cdataBlock != NULL)
6629 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006630 else if (ctxt->sax->characters != NULL)
6631 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006632 }
6633 xmlFree(buf);
6634}
6635
6636/**
6637 * xmlParseContent:
6638 * @ctxt: an XML parser context
6639 *
6640 * Parse a content:
6641 *
6642 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6643 */
6644
6645void
6646xmlParseContent(xmlParserCtxtPtr ctxt) {
6647 GROW;
6648 while (((RAW != 0) || (ctxt->token != 0)) &&
6649 ((RAW != '<') || (NXT(1) != '/'))) {
6650 const xmlChar *test = CUR_PTR;
6651 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006652 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006653 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006654
6655 /*
6656 * Handle possible processed charrefs.
6657 */
6658 if (ctxt->token != 0) {
6659 xmlParseCharData(ctxt, 0);
6660 }
6661 /*
6662 * First case : a Processing Instruction.
6663 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006664 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006665 xmlParsePI(ctxt);
6666 }
6667
6668 /*
6669 * Second case : a CDSection
6670 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006671 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006672 (NXT(2) == '[') && (NXT(3) == 'C') &&
6673 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6674 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6675 (NXT(8) == '[')) {
6676 xmlParseCDSect(ctxt);
6677 }
6678
6679 /*
6680 * Third case : a comment
6681 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006682 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006683 (NXT(2) == '-') && (NXT(3) == '-')) {
6684 xmlParseComment(ctxt);
6685 ctxt->instate = XML_PARSER_CONTENT;
6686 }
6687
6688 /*
6689 * Fourth case : a sub-element.
6690 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006691 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006692 xmlParseElement(ctxt);
6693 }
6694
6695 /*
6696 * Fifth case : a reference. If if has not been resolved,
6697 * parsing returns it's Name, create the node
6698 */
6699
Daniel Veillard21a0f912001-02-25 19:54:14 +00006700 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006701 xmlParseReference(ctxt);
6702 }
6703
6704 /*
6705 * Last case, text. Note that References are handled directly.
6706 */
6707 else {
6708 xmlParseCharData(ctxt, 0);
6709 }
6710
6711 GROW;
6712 /*
6713 * Pop-up of finished entities.
6714 */
6715 while ((RAW == 0) && (ctxt->inputNr > 1))
6716 xmlPopInput(ctxt);
6717 SHRINK;
6718
6719 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6720 (tok == ctxt->token)) {
6721 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6723 ctxt->sax->error(ctxt->userData,
6724 "detected an error in element content\n");
6725 ctxt->wellFormed = 0;
6726 ctxt->disableSAX = 1;
6727 ctxt->instate = XML_PARSER_EOF;
6728 break;
6729 }
6730 }
6731}
6732
6733/**
6734 * xmlParseElement:
6735 * @ctxt: an XML parser context
6736 *
6737 * parse an XML element, this is highly recursive
6738 *
6739 * [39] element ::= EmptyElemTag | STag content ETag
6740 *
6741 * [ WFC: Element Type Match ]
6742 * The Name in an element's end-tag must match the element type in the
6743 * start-tag.
6744 *
6745 * [ VC: Element Valid ]
6746 * An element is valid if there is a declaration matching elementdecl
6747 * where the Name matches the element type and one of the following holds:
6748 * - The declaration matches EMPTY and the element has no content.
6749 * - The declaration matches children and the sequence of child elements
6750 * belongs to the language generated by the regular expression in the
6751 * content model, with optional white space (characters matching the
6752 * nonterminal S) between each pair of child elements.
6753 * - The declaration matches Mixed and the content consists of character
6754 * data and child elements whose types match names in the content model.
6755 * - The declaration matches ANY, and the types of any child elements have
6756 * been declared.
6757 */
6758
6759void
6760xmlParseElement(xmlParserCtxtPtr ctxt) {
6761 const xmlChar *openTag = CUR_PTR;
6762 xmlChar *name;
6763 xmlChar *oldname;
6764 xmlParserNodeInfo node_info;
6765 xmlNodePtr ret;
6766
6767 /* Capture start position */
6768 if (ctxt->record_info) {
6769 node_info.begin_pos = ctxt->input->consumed +
6770 (CUR_PTR - ctxt->input->base);
6771 node_info.begin_line = ctxt->input->line;
6772 }
6773
6774 if (ctxt->spaceNr == 0)
6775 spacePush(ctxt, -1);
6776 else
6777 spacePush(ctxt, *ctxt->space);
6778
6779 name = xmlParseStartTag(ctxt);
6780 if (name == NULL) {
6781 spacePop(ctxt);
6782 return;
6783 }
6784 namePush(ctxt, name);
6785 ret = ctxt->node;
6786
6787 /*
6788 * [ VC: Root Element Type ]
6789 * The Name in the document type declaration must match the element
6790 * type of the root element.
6791 */
6792 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6793 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6794 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6795
6796 /*
6797 * Check for an Empty Element.
6798 */
6799 if ((RAW == '/') && (NXT(1) == '>')) {
6800 SKIP(2);
6801 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6802 (!ctxt->disableSAX))
6803 ctxt->sax->endElement(ctxt->userData, name);
6804 oldname = namePop(ctxt);
6805 spacePop(ctxt);
6806 if (oldname != NULL) {
6807#ifdef DEBUG_STACK
6808 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6809#endif
6810 xmlFree(oldname);
6811 }
6812 if ( ret != NULL && ctxt->record_info ) {
6813 node_info.end_pos = ctxt->input->consumed +
6814 (CUR_PTR - ctxt->input->base);
6815 node_info.end_line = ctxt->input->line;
6816 node_info.node = ret;
6817 xmlParserAddNodeInfo(ctxt, &node_info);
6818 }
6819 return;
6820 }
6821 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006822 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006823 } else {
6824 ctxt->errNo = XML_ERR_GT_REQUIRED;
6825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6826 ctxt->sax->error(ctxt->userData,
6827 "Couldn't find end of Start Tag\n%.30s\n",
6828 openTag);
6829 ctxt->wellFormed = 0;
6830 ctxt->disableSAX = 1;
6831
6832 /*
6833 * end of parsing of this node.
6834 */
6835 nodePop(ctxt);
6836 oldname = namePop(ctxt);
6837 spacePop(ctxt);
6838 if (oldname != NULL) {
6839#ifdef DEBUG_STACK
6840 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6841#endif
6842 xmlFree(oldname);
6843 }
6844
6845 /*
6846 * Capture end position and add node
6847 */
6848 if ( ret != NULL && ctxt->record_info ) {
6849 node_info.end_pos = ctxt->input->consumed +
6850 (CUR_PTR - ctxt->input->base);
6851 node_info.end_line = ctxt->input->line;
6852 node_info.node = ret;
6853 xmlParserAddNodeInfo(ctxt, &node_info);
6854 }
6855 return;
6856 }
6857
6858 /*
6859 * Parse the content of the element:
6860 */
6861 xmlParseContent(ctxt);
6862 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00006863 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00006864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6865 ctxt->sax->error(ctxt->userData,
6866 "Premature end of data in tag %.30s\n", openTag);
6867 ctxt->wellFormed = 0;
6868 ctxt->disableSAX = 1;
6869
6870 /*
6871 * end of parsing of this node.
6872 */
6873 nodePop(ctxt);
6874 oldname = namePop(ctxt);
6875 spacePop(ctxt);
6876 if (oldname != NULL) {
6877#ifdef DEBUG_STACK
6878 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6879#endif
6880 xmlFree(oldname);
6881 }
6882 return;
6883 }
6884
6885 /*
6886 * parse the end of tag: '</' should be here.
6887 */
6888 xmlParseEndTag(ctxt);
6889
6890 /*
6891 * Capture end position and add node
6892 */
6893 if ( ret != NULL && ctxt->record_info ) {
6894 node_info.end_pos = ctxt->input->consumed +
6895 (CUR_PTR - ctxt->input->base);
6896 node_info.end_line = ctxt->input->line;
6897 node_info.node = ret;
6898 xmlParserAddNodeInfo(ctxt, &node_info);
6899 }
6900}
6901
6902/**
6903 * xmlParseVersionNum:
6904 * @ctxt: an XML parser context
6905 *
6906 * parse the XML version value.
6907 *
6908 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6909 *
6910 * Returns the string giving the XML version number, or NULL
6911 */
6912xmlChar *
6913xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6914 xmlChar *buf = NULL;
6915 int len = 0;
6916 int size = 10;
6917 xmlChar cur;
6918
6919 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6920 if (buf == NULL) {
6921 xmlGenericError(xmlGenericErrorContext,
6922 "malloc of %d byte failed\n", size);
6923 return(NULL);
6924 }
6925 cur = CUR;
6926 while (((cur >= 'a') && (cur <= 'z')) ||
6927 ((cur >= 'A') && (cur <= 'Z')) ||
6928 ((cur >= '0') && (cur <= '9')) ||
6929 (cur == '_') || (cur == '.') ||
6930 (cur == ':') || (cur == '-')) {
6931 if (len + 1 >= size) {
6932 size *= 2;
6933 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6934 if (buf == NULL) {
6935 xmlGenericError(xmlGenericErrorContext,
6936 "realloc of %d byte failed\n", size);
6937 return(NULL);
6938 }
6939 }
6940 buf[len++] = cur;
6941 NEXT;
6942 cur=CUR;
6943 }
6944 buf[len] = 0;
6945 return(buf);
6946}
6947
6948/**
6949 * xmlParseVersionInfo:
6950 * @ctxt: an XML parser context
6951 *
6952 * parse the XML version.
6953 *
6954 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6955 *
6956 * [25] Eq ::= S? '=' S?
6957 *
6958 * Returns the version string, e.g. "1.0"
6959 */
6960
6961xmlChar *
6962xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6963 xmlChar *version = NULL;
6964 const xmlChar *q;
6965
6966 if ((RAW == 'v') && (NXT(1) == 'e') &&
6967 (NXT(2) == 'r') && (NXT(3) == 's') &&
6968 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6969 (NXT(6) == 'n')) {
6970 SKIP(7);
6971 SKIP_BLANKS;
6972 if (RAW != '=') {
6973 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6975 ctxt->sax->error(ctxt->userData,
6976 "xmlParseVersionInfo : expected '='\n");
6977 ctxt->wellFormed = 0;
6978 ctxt->disableSAX = 1;
6979 return(NULL);
6980 }
6981 NEXT;
6982 SKIP_BLANKS;
6983 if (RAW == '"') {
6984 NEXT;
6985 q = CUR_PTR;
6986 version = xmlParseVersionNum(ctxt);
6987 if (RAW != '"') {
6988 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6990 ctxt->sax->error(ctxt->userData,
6991 "String not closed\n%.50s\n", q);
6992 ctxt->wellFormed = 0;
6993 ctxt->disableSAX = 1;
6994 } else
6995 NEXT;
6996 } else if (RAW == '\''){
6997 NEXT;
6998 q = CUR_PTR;
6999 version = xmlParseVersionNum(ctxt);
7000 if (RAW != '\'') {
7001 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7003 ctxt->sax->error(ctxt->userData,
7004 "String not closed\n%.50s\n", q);
7005 ctxt->wellFormed = 0;
7006 ctxt->disableSAX = 1;
7007 } else
7008 NEXT;
7009 } else {
7010 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7012 ctxt->sax->error(ctxt->userData,
7013 "xmlParseVersionInfo : expected ' or \"\n");
7014 ctxt->wellFormed = 0;
7015 ctxt->disableSAX = 1;
7016 }
7017 }
7018 return(version);
7019}
7020
7021/**
7022 * xmlParseEncName:
7023 * @ctxt: an XML parser context
7024 *
7025 * parse the XML encoding name
7026 *
7027 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7028 *
7029 * Returns the encoding name value or NULL
7030 */
7031xmlChar *
7032xmlParseEncName(xmlParserCtxtPtr ctxt) {
7033 xmlChar *buf = NULL;
7034 int len = 0;
7035 int size = 10;
7036 xmlChar cur;
7037
7038 cur = CUR;
7039 if (((cur >= 'a') && (cur <= 'z')) ||
7040 ((cur >= 'A') && (cur <= 'Z'))) {
7041 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7042 if (buf == NULL) {
7043 xmlGenericError(xmlGenericErrorContext,
7044 "malloc of %d byte failed\n", size);
7045 return(NULL);
7046 }
7047
7048 buf[len++] = cur;
7049 NEXT;
7050 cur = CUR;
7051 while (((cur >= 'a') && (cur <= 'z')) ||
7052 ((cur >= 'A') && (cur <= 'Z')) ||
7053 ((cur >= '0') && (cur <= '9')) ||
7054 (cur == '.') || (cur == '_') ||
7055 (cur == '-')) {
7056 if (len + 1 >= size) {
7057 size *= 2;
7058 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7059 if (buf == NULL) {
7060 xmlGenericError(xmlGenericErrorContext,
7061 "realloc of %d byte failed\n", size);
7062 return(NULL);
7063 }
7064 }
7065 buf[len++] = cur;
7066 NEXT;
7067 cur = CUR;
7068 if (cur == 0) {
7069 SHRINK;
7070 GROW;
7071 cur = CUR;
7072 }
7073 }
7074 buf[len] = 0;
7075 } else {
7076 ctxt->errNo = XML_ERR_ENCODING_NAME;
7077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7078 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7079 ctxt->wellFormed = 0;
7080 ctxt->disableSAX = 1;
7081 }
7082 return(buf);
7083}
7084
7085/**
7086 * xmlParseEncodingDecl:
7087 * @ctxt: an XML parser context
7088 *
7089 * parse the XML encoding declaration
7090 *
7091 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7092 *
7093 * this setups the conversion filters.
7094 *
7095 * Returns the encoding value or NULL
7096 */
7097
7098xmlChar *
7099xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7100 xmlChar *encoding = NULL;
7101 const xmlChar *q;
7102
7103 SKIP_BLANKS;
7104 if ((RAW == 'e') && (NXT(1) == 'n') &&
7105 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7106 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7107 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7108 SKIP(8);
7109 SKIP_BLANKS;
7110 if (RAW != '=') {
7111 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7112 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7113 ctxt->sax->error(ctxt->userData,
7114 "xmlParseEncodingDecl : expected '='\n");
7115 ctxt->wellFormed = 0;
7116 ctxt->disableSAX = 1;
7117 return(NULL);
7118 }
7119 NEXT;
7120 SKIP_BLANKS;
7121 if (RAW == '"') {
7122 NEXT;
7123 q = CUR_PTR;
7124 encoding = xmlParseEncName(ctxt);
7125 if (RAW != '"') {
7126 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7128 ctxt->sax->error(ctxt->userData,
7129 "String not closed\n%.50s\n", q);
7130 ctxt->wellFormed = 0;
7131 ctxt->disableSAX = 1;
7132 } else
7133 NEXT;
7134 } else if (RAW == '\''){
7135 NEXT;
7136 q = CUR_PTR;
7137 encoding = xmlParseEncName(ctxt);
7138 if (RAW != '\'') {
7139 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7141 ctxt->sax->error(ctxt->userData,
7142 "String not closed\n%.50s\n", q);
7143 ctxt->wellFormed = 0;
7144 ctxt->disableSAX = 1;
7145 } else
7146 NEXT;
7147 } else if (RAW == '"'){
7148 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7150 ctxt->sax->error(ctxt->userData,
7151 "xmlParseEncodingDecl : expected ' or \"\n");
7152 ctxt->wellFormed = 0;
7153 ctxt->disableSAX = 1;
7154 }
7155 if (encoding != NULL) {
7156 xmlCharEncoding enc;
7157 xmlCharEncodingHandlerPtr handler;
7158
7159 if (ctxt->input->encoding != NULL)
7160 xmlFree((xmlChar *) ctxt->input->encoding);
7161 ctxt->input->encoding = encoding;
7162
7163 enc = xmlParseCharEncoding((const char *) encoding);
7164 /*
7165 * registered set of known encodings
7166 */
7167 if (enc != XML_CHAR_ENCODING_ERROR) {
7168 xmlSwitchEncoding(ctxt, enc);
7169 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7170 xmlFree(encoding);
7171 return(NULL);
7172 }
7173 } else {
7174 /*
7175 * fallback for unknown encodings
7176 */
7177 handler = xmlFindCharEncodingHandler((const char *) encoding);
7178 if (handler != NULL) {
7179 xmlSwitchToEncoding(ctxt, handler);
7180 } else {
7181 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7183 ctxt->sax->error(ctxt->userData,
7184 "Unsupported encoding %s\n", encoding);
7185 return(NULL);
7186 }
7187 }
7188 }
7189 }
7190 return(encoding);
7191}
7192
7193/**
7194 * xmlParseSDDecl:
7195 * @ctxt: an XML parser context
7196 *
7197 * parse the XML standalone declaration
7198 *
7199 * [32] SDDecl ::= S 'standalone' Eq
7200 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7201 *
7202 * [ VC: Standalone Document Declaration ]
7203 * TODO The standalone document declaration must have the value "no"
7204 * if any external markup declarations contain declarations of:
7205 * - attributes with default values, if elements to which these
7206 * attributes apply appear in the document without specifications
7207 * of values for these attributes, or
7208 * - entities (other than amp, lt, gt, apos, quot), if references
7209 * to those entities appear in the document, or
7210 * - attributes with values subject to normalization, where the
7211 * attribute appears in the document with a value which will change
7212 * as a result of normalization, or
7213 * - element types with element content, if white space occurs directly
7214 * within any instance of those types.
7215 *
7216 * Returns 1 if standalone, 0 otherwise
7217 */
7218
7219int
7220xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7221 int standalone = -1;
7222
7223 SKIP_BLANKS;
7224 if ((RAW == 's') && (NXT(1) == 't') &&
7225 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7226 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7227 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7228 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7229 SKIP(10);
7230 SKIP_BLANKS;
7231 if (RAW != '=') {
7232 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7234 ctxt->sax->error(ctxt->userData,
7235 "XML standalone declaration : expected '='\n");
7236 ctxt->wellFormed = 0;
7237 ctxt->disableSAX = 1;
7238 return(standalone);
7239 }
7240 NEXT;
7241 SKIP_BLANKS;
7242 if (RAW == '\''){
7243 NEXT;
7244 if ((RAW == 'n') && (NXT(1) == 'o')) {
7245 standalone = 0;
7246 SKIP(2);
7247 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7248 (NXT(2) == 's')) {
7249 standalone = 1;
7250 SKIP(3);
7251 } else {
7252 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7254 ctxt->sax->error(ctxt->userData,
7255 "standalone accepts only 'yes' or 'no'\n");
7256 ctxt->wellFormed = 0;
7257 ctxt->disableSAX = 1;
7258 }
7259 if (RAW != '\'') {
7260 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7262 ctxt->sax->error(ctxt->userData, "String not closed\n");
7263 ctxt->wellFormed = 0;
7264 ctxt->disableSAX = 1;
7265 } else
7266 NEXT;
7267 } else if (RAW == '"'){
7268 NEXT;
7269 if ((RAW == 'n') && (NXT(1) == 'o')) {
7270 standalone = 0;
7271 SKIP(2);
7272 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7273 (NXT(2) == 's')) {
7274 standalone = 1;
7275 SKIP(3);
7276 } else {
7277 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7279 ctxt->sax->error(ctxt->userData,
7280 "standalone accepts only 'yes' or 'no'\n");
7281 ctxt->wellFormed = 0;
7282 ctxt->disableSAX = 1;
7283 }
7284 if (RAW != '"') {
7285 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7287 ctxt->sax->error(ctxt->userData, "String not closed\n");
7288 ctxt->wellFormed = 0;
7289 ctxt->disableSAX = 1;
7290 } else
7291 NEXT;
7292 } else {
7293 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7295 ctxt->sax->error(ctxt->userData,
7296 "Standalone value not found\n");
7297 ctxt->wellFormed = 0;
7298 ctxt->disableSAX = 1;
7299 }
7300 }
7301 return(standalone);
7302}
7303
7304/**
7305 * xmlParseXMLDecl:
7306 * @ctxt: an XML parser context
7307 *
7308 * parse an XML declaration header
7309 *
7310 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7311 */
7312
7313void
7314xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7315 xmlChar *version;
7316
7317 /*
7318 * We know that '<?xml' is here.
7319 */
7320 SKIP(5);
7321
7322 if (!IS_BLANK(RAW)) {
7323 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7325 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7326 ctxt->wellFormed = 0;
7327 ctxt->disableSAX = 1;
7328 }
7329 SKIP_BLANKS;
7330
7331 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007332 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007333 */
7334 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007335 if (version == NULL) {
7336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7337 ctxt->sax->error(ctxt->userData,
7338 "Malformed declaration expecting version\n");
7339 ctxt->wellFormed = 0;
7340 ctxt->disableSAX = 1;
7341 } else {
7342 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7343 /*
7344 * TODO: Blueberry should be detected here
7345 */
7346 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7347 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7348 version);
7349 }
7350 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007351 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007352 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007353 }
Owen Taylor3473f882001-02-23 17:55:21 +00007354
7355 /*
7356 * We may have the encoding declaration
7357 */
7358 if (!IS_BLANK(RAW)) {
7359 if ((RAW == '?') && (NXT(1) == '>')) {
7360 SKIP(2);
7361 return;
7362 }
7363 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7365 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7366 ctxt->wellFormed = 0;
7367 ctxt->disableSAX = 1;
7368 }
7369 xmlParseEncodingDecl(ctxt);
7370 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7371 /*
7372 * The XML REC instructs us to stop parsing right here
7373 */
7374 return;
7375 }
7376
7377 /*
7378 * We may have the standalone status.
7379 */
7380 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7381 if ((RAW == '?') && (NXT(1) == '>')) {
7382 SKIP(2);
7383 return;
7384 }
7385 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7387 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7388 ctxt->wellFormed = 0;
7389 ctxt->disableSAX = 1;
7390 }
7391 SKIP_BLANKS;
7392 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7393
7394 SKIP_BLANKS;
7395 if ((RAW == '?') && (NXT(1) == '>')) {
7396 SKIP(2);
7397 } else if (RAW == '>') {
7398 /* Deprecated old WD ... */
7399 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7401 ctxt->sax->error(ctxt->userData,
7402 "XML declaration must end-up with '?>'\n");
7403 ctxt->wellFormed = 0;
7404 ctxt->disableSAX = 1;
7405 NEXT;
7406 } else {
7407 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7409 ctxt->sax->error(ctxt->userData,
7410 "parsing XML declaration: '?>' expected\n");
7411 ctxt->wellFormed = 0;
7412 ctxt->disableSAX = 1;
7413 MOVETO_ENDTAG(CUR_PTR);
7414 NEXT;
7415 }
7416}
7417
7418/**
7419 * xmlParseMisc:
7420 * @ctxt: an XML parser context
7421 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007422 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007423 *
7424 * [27] Misc ::= Comment | PI | S
7425 */
7426
7427void
7428xmlParseMisc(xmlParserCtxtPtr ctxt) {
7429 while (((RAW == '<') && (NXT(1) == '?')) ||
7430 ((RAW == '<') && (NXT(1) == '!') &&
7431 (NXT(2) == '-') && (NXT(3) == '-')) ||
7432 IS_BLANK(CUR)) {
7433 if ((RAW == '<') && (NXT(1) == '?')) {
7434 xmlParsePI(ctxt);
7435 } else if (IS_BLANK(CUR)) {
7436 NEXT;
7437 } else
7438 xmlParseComment(ctxt);
7439 }
7440}
7441
7442/**
7443 * xmlParseDocument:
7444 * @ctxt: an XML parser context
7445 *
7446 * parse an XML document (and build a tree if using the standard SAX
7447 * interface).
7448 *
7449 * [1] document ::= prolog element Misc*
7450 *
7451 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7452 *
7453 * Returns 0, -1 in case of error. the parser context is augmented
7454 * as a result of the parsing.
7455 */
7456
7457int
7458xmlParseDocument(xmlParserCtxtPtr ctxt) {
7459 xmlChar start[4];
7460 xmlCharEncoding enc;
7461
7462 xmlInitParser();
7463
7464 GROW;
7465
7466 /*
7467 * SAX: beginning of the document processing.
7468 */
7469 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7470 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7471
Daniel Veillard50f34372001-08-03 12:06:36 +00007472 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007473 /*
7474 * Get the 4 first bytes and decode the charset
7475 * if enc != XML_CHAR_ENCODING_NONE
7476 * plug some encoding conversion routines.
7477 */
7478 start[0] = RAW;
7479 start[1] = NXT(1);
7480 start[2] = NXT(2);
7481 start[3] = NXT(3);
7482 enc = xmlDetectCharEncoding(start, 4);
7483 if (enc != XML_CHAR_ENCODING_NONE) {
7484 xmlSwitchEncoding(ctxt, enc);
7485 }
Owen Taylor3473f882001-02-23 17:55:21 +00007486 }
7487
7488
7489 if (CUR == 0) {
7490 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7492 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7493 ctxt->wellFormed = 0;
7494 ctxt->disableSAX = 1;
7495 }
7496
7497 /*
7498 * Check for the XMLDecl in the Prolog.
7499 */
7500 GROW;
7501 if ((RAW == '<') && (NXT(1) == '?') &&
7502 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7503 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7504
7505 /*
7506 * Note that we will switch encoding on the fly.
7507 */
7508 xmlParseXMLDecl(ctxt);
7509 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7510 /*
7511 * The XML REC instructs us to stop parsing right here
7512 */
7513 return(-1);
7514 }
7515 ctxt->standalone = ctxt->input->standalone;
7516 SKIP_BLANKS;
7517 } else {
7518 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7519 }
7520 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7521 ctxt->sax->startDocument(ctxt->userData);
7522
7523 /*
7524 * The Misc part of the Prolog
7525 */
7526 GROW;
7527 xmlParseMisc(ctxt);
7528
7529 /*
7530 * Then possibly doc type declaration(s) and more Misc
7531 * (doctypedecl Misc*)?
7532 */
7533 GROW;
7534 if ((RAW == '<') && (NXT(1) == '!') &&
7535 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7536 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7537 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7538 (NXT(8) == 'E')) {
7539
7540 ctxt->inSubset = 1;
7541 xmlParseDocTypeDecl(ctxt);
7542 if (RAW == '[') {
7543 ctxt->instate = XML_PARSER_DTD;
7544 xmlParseInternalSubset(ctxt);
7545 }
7546
7547 /*
7548 * Create and update the external subset.
7549 */
7550 ctxt->inSubset = 2;
7551 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7552 (!ctxt->disableSAX))
7553 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7554 ctxt->extSubSystem, ctxt->extSubURI);
7555 ctxt->inSubset = 0;
7556
7557
7558 ctxt->instate = XML_PARSER_PROLOG;
7559 xmlParseMisc(ctxt);
7560 }
7561
7562 /*
7563 * Time to start parsing the tree itself
7564 */
7565 GROW;
7566 if (RAW != '<') {
7567 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7569 ctxt->sax->error(ctxt->userData,
7570 "Start tag expected, '<' not found\n");
7571 ctxt->wellFormed = 0;
7572 ctxt->disableSAX = 1;
7573 ctxt->instate = XML_PARSER_EOF;
7574 } else {
7575 ctxt->instate = XML_PARSER_CONTENT;
7576 xmlParseElement(ctxt);
7577 ctxt->instate = XML_PARSER_EPILOG;
7578
7579
7580 /*
7581 * The Misc part at the end
7582 */
7583 xmlParseMisc(ctxt);
7584
7585 if (RAW != 0) {
7586 ctxt->errNo = XML_ERR_DOCUMENT_END;
7587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7588 ctxt->sax->error(ctxt->userData,
7589 "Extra content at the end of the document\n");
7590 ctxt->wellFormed = 0;
7591 ctxt->disableSAX = 1;
7592 }
7593 ctxt->instate = XML_PARSER_EOF;
7594 }
7595
7596 /*
7597 * SAX: end of the document processing.
7598 */
7599 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7600 (!ctxt->disableSAX))
7601 ctxt->sax->endDocument(ctxt->userData);
7602
7603 if (! ctxt->wellFormed) return(-1);
7604 return(0);
7605}
7606
7607/**
7608 * xmlParseExtParsedEnt:
7609 * @ctxt: an XML parser context
7610 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007611 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007612 * An external general parsed entity is well-formed if it matches the
7613 * production labeled extParsedEnt.
7614 *
7615 * [78] extParsedEnt ::= TextDecl? content
7616 *
7617 * Returns 0, -1 in case of error. the parser context is augmented
7618 * as a result of the parsing.
7619 */
7620
7621int
7622xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7623 xmlChar start[4];
7624 xmlCharEncoding enc;
7625
7626 xmlDefaultSAXHandlerInit();
7627
7628 GROW;
7629
7630 /*
7631 * SAX: beginning of the document processing.
7632 */
7633 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7634 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7635
7636 /*
7637 * Get the 4 first bytes and decode the charset
7638 * if enc != XML_CHAR_ENCODING_NONE
7639 * plug some encoding conversion routines.
7640 */
7641 start[0] = RAW;
7642 start[1] = NXT(1);
7643 start[2] = NXT(2);
7644 start[3] = NXT(3);
7645 enc = xmlDetectCharEncoding(start, 4);
7646 if (enc != XML_CHAR_ENCODING_NONE) {
7647 xmlSwitchEncoding(ctxt, enc);
7648 }
7649
7650
7651 if (CUR == 0) {
7652 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7654 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7655 ctxt->wellFormed = 0;
7656 ctxt->disableSAX = 1;
7657 }
7658
7659 /*
7660 * Check for the XMLDecl in the Prolog.
7661 */
7662 GROW;
7663 if ((RAW == '<') && (NXT(1) == '?') &&
7664 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7665 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7666
7667 /*
7668 * Note that we will switch encoding on the fly.
7669 */
7670 xmlParseXMLDecl(ctxt);
7671 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7672 /*
7673 * The XML REC instructs us to stop parsing right here
7674 */
7675 return(-1);
7676 }
7677 SKIP_BLANKS;
7678 } else {
7679 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7680 }
7681 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7682 ctxt->sax->startDocument(ctxt->userData);
7683
7684 /*
7685 * Doing validity checking on chunk doesn't make sense
7686 */
7687 ctxt->instate = XML_PARSER_CONTENT;
7688 ctxt->validate = 0;
7689 ctxt->loadsubset = 0;
7690 ctxt->depth = 0;
7691
7692 xmlParseContent(ctxt);
7693
7694 if ((RAW == '<') && (NXT(1) == '/')) {
7695 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7697 ctxt->sax->error(ctxt->userData,
7698 "chunk is not well balanced\n");
7699 ctxt->wellFormed = 0;
7700 ctxt->disableSAX = 1;
7701 } else if (RAW != 0) {
7702 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7704 ctxt->sax->error(ctxt->userData,
7705 "extra content at the end of well balanced chunk\n");
7706 ctxt->wellFormed = 0;
7707 ctxt->disableSAX = 1;
7708 }
7709
7710 /*
7711 * SAX: end of the document processing.
7712 */
7713 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7714 (!ctxt->disableSAX))
7715 ctxt->sax->endDocument(ctxt->userData);
7716
7717 if (! ctxt->wellFormed) return(-1);
7718 return(0);
7719}
7720
7721/************************************************************************
7722 * *
7723 * Progressive parsing interfaces *
7724 * *
7725 ************************************************************************/
7726
7727/**
7728 * xmlParseLookupSequence:
7729 * @ctxt: an XML parser context
7730 * @first: the first char to lookup
7731 * @next: the next char to lookup or zero
7732 * @third: the next char to lookup or zero
7733 *
7734 * Try to find if a sequence (first, next, third) or just (first next) or
7735 * (first) is available in the input stream.
7736 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7737 * to avoid rescanning sequences of bytes, it DOES change the state of the
7738 * parser, do not use liberally.
7739 *
7740 * Returns the index to the current parsing point if the full sequence
7741 * is available, -1 otherwise.
7742 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007743static int
Owen Taylor3473f882001-02-23 17:55:21 +00007744xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7745 xmlChar next, xmlChar third) {
7746 int base, len;
7747 xmlParserInputPtr in;
7748 const xmlChar *buf;
7749
7750 in = ctxt->input;
7751 if (in == NULL) return(-1);
7752 base = in->cur - in->base;
7753 if (base < 0) return(-1);
7754 if (ctxt->checkIndex > base)
7755 base = ctxt->checkIndex;
7756 if (in->buf == NULL) {
7757 buf = in->base;
7758 len = in->length;
7759 } else {
7760 buf = in->buf->buffer->content;
7761 len = in->buf->buffer->use;
7762 }
7763 /* take into account the sequence length */
7764 if (third) len -= 2;
7765 else if (next) len --;
7766 for (;base < len;base++) {
7767 if (buf[base] == first) {
7768 if (third != 0) {
7769 if ((buf[base + 1] != next) ||
7770 (buf[base + 2] != third)) continue;
7771 } else if (next != 0) {
7772 if (buf[base + 1] != next) continue;
7773 }
7774 ctxt->checkIndex = 0;
7775#ifdef DEBUG_PUSH
7776 if (next == 0)
7777 xmlGenericError(xmlGenericErrorContext,
7778 "PP: lookup '%c' found at %d\n",
7779 first, base);
7780 else if (third == 0)
7781 xmlGenericError(xmlGenericErrorContext,
7782 "PP: lookup '%c%c' found at %d\n",
7783 first, next, base);
7784 else
7785 xmlGenericError(xmlGenericErrorContext,
7786 "PP: lookup '%c%c%c' found at %d\n",
7787 first, next, third, base);
7788#endif
7789 return(base - (in->cur - in->base));
7790 }
7791 }
7792 ctxt->checkIndex = base;
7793#ifdef DEBUG_PUSH
7794 if (next == 0)
7795 xmlGenericError(xmlGenericErrorContext,
7796 "PP: lookup '%c' failed\n", first);
7797 else if (third == 0)
7798 xmlGenericError(xmlGenericErrorContext,
7799 "PP: lookup '%c%c' failed\n", first, next);
7800 else
7801 xmlGenericError(xmlGenericErrorContext,
7802 "PP: lookup '%c%c%c' failed\n", first, next, third);
7803#endif
7804 return(-1);
7805}
7806
7807/**
7808 * xmlParseTryOrFinish:
7809 * @ctxt: an XML parser context
7810 * @terminate: last chunk indicator
7811 *
7812 * Try to progress on parsing
7813 *
7814 * Returns zero if no parsing was possible
7815 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007816static int
Owen Taylor3473f882001-02-23 17:55:21 +00007817xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7818 int ret = 0;
7819 int avail;
7820 xmlChar cur, next;
7821
7822#ifdef DEBUG_PUSH
7823 switch (ctxt->instate) {
7824 case XML_PARSER_EOF:
7825 xmlGenericError(xmlGenericErrorContext,
7826 "PP: try EOF\n"); break;
7827 case XML_PARSER_START:
7828 xmlGenericError(xmlGenericErrorContext,
7829 "PP: try START\n"); break;
7830 case XML_PARSER_MISC:
7831 xmlGenericError(xmlGenericErrorContext,
7832 "PP: try MISC\n");break;
7833 case XML_PARSER_COMMENT:
7834 xmlGenericError(xmlGenericErrorContext,
7835 "PP: try COMMENT\n");break;
7836 case XML_PARSER_PROLOG:
7837 xmlGenericError(xmlGenericErrorContext,
7838 "PP: try PROLOG\n");break;
7839 case XML_PARSER_START_TAG:
7840 xmlGenericError(xmlGenericErrorContext,
7841 "PP: try START_TAG\n");break;
7842 case XML_PARSER_CONTENT:
7843 xmlGenericError(xmlGenericErrorContext,
7844 "PP: try CONTENT\n");break;
7845 case XML_PARSER_CDATA_SECTION:
7846 xmlGenericError(xmlGenericErrorContext,
7847 "PP: try CDATA_SECTION\n");break;
7848 case XML_PARSER_END_TAG:
7849 xmlGenericError(xmlGenericErrorContext,
7850 "PP: try END_TAG\n");break;
7851 case XML_PARSER_ENTITY_DECL:
7852 xmlGenericError(xmlGenericErrorContext,
7853 "PP: try ENTITY_DECL\n");break;
7854 case XML_PARSER_ENTITY_VALUE:
7855 xmlGenericError(xmlGenericErrorContext,
7856 "PP: try ENTITY_VALUE\n");break;
7857 case XML_PARSER_ATTRIBUTE_VALUE:
7858 xmlGenericError(xmlGenericErrorContext,
7859 "PP: try ATTRIBUTE_VALUE\n");break;
7860 case XML_PARSER_DTD:
7861 xmlGenericError(xmlGenericErrorContext,
7862 "PP: try DTD\n");break;
7863 case XML_PARSER_EPILOG:
7864 xmlGenericError(xmlGenericErrorContext,
7865 "PP: try EPILOG\n");break;
7866 case XML_PARSER_PI:
7867 xmlGenericError(xmlGenericErrorContext,
7868 "PP: try PI\n");break;
7869 case XML_PARSER_IGNORE:
7870 xmlGenericError(xmlGenericErrorContext,
7871 "PP: try IGNORE\n");break;
7872 }
7873#endif
7874
7875 while (1) {
7876 /*
7877 * Pop-up of finished entities.
7878 */
7879 while ((RAW == 0) && (ctxt->inputNr > 1))
7880 xmlPopInput(ctxt);
7881
7882 if (ctxt->input ==NULL) break;
7883 if (ctxt->input->buf == NULL)
7884 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7885 else
7886 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7887 if (avail < 1)
7888 goto done;
7889 switch (ctxt->instate) {
7890 case XML_PARSER_EOF:
7891 /*
7892 * Document parsing is done !
7893 */
7894 goto done;
7895 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007896 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7897 xmlChar start[4];
7898 xmlCharEncoding enc;
7899
7900 /*
7901 * Very first chars read from the document flow.
7902 */
7903 if (avail < 4)
7904 goto done;
7905
7906 /*
7907 * Get the 4 first bytes and decode the charset
7908 * if enc != XML_CHAR_ENCODING_NONE
7909 * plug some encoding conversion routines.
7910 */
7911 start[0] = RAW;
7912 start[1] = NXT(1);
7913 start[2] = NXT(2);
7914 start[3] = NXT(3);
7915 enc = xmlDetectCharEncoding(start, 4);
7916 if (enc != XML_CHAR_ENCODING_NONE) {
7917 xmlSwitchEncoding(ctxt, enc);
7918 }
7919 break;
7920 }
Owen Taylor3473f882001-02-23 17:55:21 +00007921
7922 cur = ctxt->input->cur[0];
7923 next = ctxt->input->cur[1];
7924 if (cur == 0) {
7925 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7926 ctxt->sax->setDocumentLocator(ctxt->userData,
7927 &xmlDefaultSAXLocator);
7928 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7930 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7931 ctxt->wellFormed = 0;
7932 ctxt->disableSAX = 1;
7933 ctxt->instate = XML_PARSER_EOF;
7934#ifdef DEBUG_PUSH
7935 xmlGenericError(xmlGenericErrorContext,
7936 "PP: entering EOF\n");
7937#endif
7938 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7939 ctxt->sax->endDocument(ctxt->userData);
7940 goto done;
7941 }
7942 if ((cur == '<') && (next == '?')) {
7943 /* PI or XML decl */
7944 if (avail < 5) return(ret);
7945 if ((!terminate) &&
7946 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7947 return(ret);
7948 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7949 ctxt->sax->setDocumentLocator(ctxt->userData,
7950 &xmlDefaultSAXLocator);
7951 if ((ctxt->input->cur[2] == 'x') &&
7952 (ctxt->input->cur[3] == 'm') &&
7953 (ctxt->input->cur[4] == 'l') &&
7954 (IS_BLANK(ctxt->input->cur[5]))) {
7955 ret += 5;
7956#ifdef DEBUG_PUSH
7957 xmlGenericError(xmlGenericErrorContext,
7958 "PP: Parsing XML Decl\n");
7959#endif
7960 xmlParseXMLDecl(ctxt);
7961 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7962 /*
7963 * The XML REC instructs us to stop parsing right
7964 * here
7965 */
7966 ctxt->instate = XML_PARSER_EOF;
7967 return(0);
7968 }
7969 ctxt->standalone = ctxt->input->standalone;
7970 if ((ctxt->encoding == NULL) &&
7971 (ctxt->input->encoding != NULL))
7972 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7973 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7974 (!ctxt->disableSAX))
7975 ctxt->sax->startDocument(ctxt->userData);
7976 ctxt->instate = XML_PARSER_MISC;
7977#ifdef DEBUG_PUSH
7978 xmlGenericError(xmlGenericErrorContext,
7979 "PP: entering MISC\n");
7980#endif
7981 } else {
7982 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7983 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7984 (!ctxt->disableSAX))
7985 ctxt->sax->startDocument(ctxt->userData);
7986 ctxt->instate = XML_PARSER_MISC;
7987#ifdef DEBUG_PUSH
7988 xmlGenericError(xmlGenericErrorContext,
7989 "PP: entering MISC\n");
7990#endif
7991 }
7992 } else {
7993 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7994 ctxt->sax->setDocumentLocator(ctxt->userData,
7995 &xmlDefaultSAXLocator);
7996 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7997 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7998 (!ctxt->disableSAX))
7999 ctxt->sax->startDocument(ctxt->userData);
8000 ctxt->instate = XML_PARSER_MISC;
8001#ifdef DEBUG_PUSH
8002 xmlGenericError(xmlGenericErrorContext,
8003 "PP: entering MISC\n");
8004#endif
8005 }
8006 break;
8007 case XML_PARSER_MISC:
8008 SKIP_BLANKS;
8009 if (ctxt->input->buf == NULL)
8010 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8011 else
8012 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8013 if (avail < 2)
8014 goto done;
8015 cur = ctxt->input->cur[0];
8016 next = ctxt->input->cur[1];
8017 if ((cur == '<') && (next == '?')) {
8018 if ((!terminate) &&
8019 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8020 goto done;
8021#ifdef DEBUG_PUSH
8022 xmlGenericError(xmlGenericErrorContext,
8023 "PP: Parsing PI\n");
8024#endif
8025 xmlParsePI(ctxt);
8026 } else if ((cur == '<') && (next == '!') &&
8027 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8028 if ((!terminate) &&
8029 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8030 goto done;
8031#ifdef DEBUG_PUSH
8032 xmlGenericError(xmlGenericErrorContext,
8033 "PP: Parsing Comment\n");
8034#endif
8035 xmlParseComment(ctxt);
8036 ctxt->instate = XML_PARSER_MISC;
8037 } else if ((cur == '<') && (next == '!') &&
8038 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8039 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8040 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8041 (ctxt->input->cur[8] == 'E')) {
8042 if ((!terminate) &&
8043 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8044 goto done;
8045#ifdef DEBUG_PUSH
8046 xmlGenericError(xmlGenericErrorContext,
8047 "PP: Parsing internal subset\n");
8048#endif
8049 ctxt->inSubset = 1;
8050 xmlParseDocTypeDecl(ctxt);
8051 if (RAW == '[') {
8052 ctxt->instate = XML_PARSER_DTD;
8053#ifdef DEBUG_PUSH
8054 xmlGenericError(xmlGenericErrorContext,
8055 "PP: entering DTD\n");
8056#endif
8057 } else {
8058 /*
8059 * Create and update the external subset.
8060 */
8061 ctxt->inSubset = 2;
8062 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8063 (ctxt->sax->externalSubset != NULL))
8064 ctxt->sax->externalSubset(ctxt->userData,
8065 ctxt->intSubName, ctxt->extSubSystem,
8066 ctxt->extSubURI);
8067 ctxt->inSubset = 0;
8068 ctxt->instate = XML_PARSER_PROLOG;
8069#ifdef DEBUG_PUSH
8070 xmlGenericError(xmlGenericErrorContext,
8071 "PP: entering PROLOG\n");
8072#endif
8073 }
8074 } else if ((cur == '<') && (next == '!') &&
8075 (avail < 9)) {
8076 goto done;
8077 } else {
8078 ctxt->instate = XML_PARSER_START_TAG;
8079#ifdef DEBUG_PUSH
8080 xmlGenericError(xmlGenericErrorContext,
8081 "PP: entering START_TAG\n");
8082#endif
8083 }
8084 break;
8085 case XML_PARSER_IGNORE:
8086 xmlGenericError(xmlGenericErrorContext,
8087 "PP: internal error, state == IGNORE");
8088 ctxt->instate = XML_PARSER_DTD;
8089#ifdef DEBUG_PUSH
8090 xmlGenericError(xmlGenericErrorContext,
8091 "PP: entering DTD\n");
8092#endif
8093 break;
8094 case XML_PARSER_PROLOG:
8095 SKIP_BLANKS;
8096 if (ctxt->input->buf == NULL)
8097 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8098 else
8099 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8100 if (avail < 2)
8101 goto done;
8102 cur = ctxt->input->cur[0];
8103 next = ctxt->input->cur[1];
8104 if ((cur == '<') && (next == '?')) {
8105 if ((!terminate) &&
8106 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8107 goto done;
8108#ifdef DEBUG_PUSH
8109 xmlGenericError(xmlGenericErrorContext,
8110 "PP: Parsing PI\n");
8111#endif
8112 xmlParsePI(ctxt);
8113 } else if ((cur == '<') && (next == '!') &&
8114 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8115 if ((!terminate) &&
8116 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8117 goto done;
8118#ifdef DEBUG_PUSH
8119 xmlGenericError(xmlGenericErrorContext,
8120 "PP: Parsing Comment\n");
8121#endif
8122 xmlParseComment(ctxt);
8123 ctxt->instate = XML_PARSER_PROLOG;
8124 } else if ((cur == '<') && (next == '!') &&
8125 (avail < 4)) {
8126 goto done;
8127 } else {
8128 ctxt->instate = XML_PARSER_START_TAG;
8129#ifdef DEBUG_PUSH
8130 xmlGenericError(xmlGenericErrorContext,
8131 "PP: entering START_TAG\n");
8132#endif
8133 }
8134 break;
8135 case XML_PARSER_EPILOG:
8136 SKIP_BLANKS;
8137 if (ctxt->input->buf == NULL)
8138 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8139 else
8140 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8141 if (avail < 2)
8142 goto done;
8143 cur = ctxt->input->cur[0];
8144 next = ctxt->input->cur[1];
8145 if ((cur == '<') && (next == '?')) {
8146 if ((!terminate) &&
8147 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8148 goto done;
8149#ifdef DEBUG_PUSH
8150 xmlGenericError(xmlGenericErrorContext,
8151 "PP: Parsing PI\n");
8152#endif
8153 xmlParsePI(ctxt);
8154 ctxt->instate = XML_PARSER_EPILOG;
8155 } else if ((cur == '<') && (next == '!') &&
8156 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8157 if ((!terminate) &&
8158 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8159 goto done;
8160#ifdef DEBUG_PUSH
8161 xmlGenericError(xmlGenericErrorContext,
8162 "PP: Parsing Comment\n");
8163#endif
8164 xmlParseComment(ctxt);
8165 ctxt->instate = XML_PARSER_EPILOG;
8166 } else if ((cur == '<') && (next == '!') &&
8167 (avail < 4)) {
8168 goto done;
8169 } else {
8170 ctxt->errNo = XML_ERR_DOCUMENT_END;
8171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8172 ctxt->sax->error(ctxt->userData,
8173 "Extra content at the end of the document\n");
8174 ctxt->wellFormed = 0;
8175 ctxt->disableSAX = 1;
8176 ctxt->instate = XML_PARSER_EOF;
8177#ifdef DEBUG_PUSH
8178 xmlGenericError(xmlGenericErrorContext,
8179 "PP: entering EOF\n");
8180#endif
8181 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8182 (!ctxt->disableSAX))
8183 ctxt->sax->endDocument(ctxt->userData);
8184 goto done;
8185 }
8186 break;
8187 case XML_PARSER_START_TAG: {
8188 xmlChar *name, *oldname;
8189
8190 if ((avail < 2) && (ctxt->inputNr == 1))
8191 goto done;
8192 cur = ctxt->input->cur[0];
8193 if (cur != '<') {
8194 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8196 ctxt->sax->error(ctxt->userData,
8197 "Start tag expect, '<' not found\n");
8198 ctxt->wellFormed = 0;
8199 ctxt->disableSAX = 1;
8200 ctxt->instate = XML_PARSER_EOF;
8201#ifdef DEBUG_PUSH
8202 xmlGenericError(xmlGenericErrorContext,
8203 "PP: entering EOF\n");
8204#endif
8205 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8206 (!ctxt->disableSAX))
8207 ctxt->sax->endDocument(ctxt->userData);
8208 goto done;
8209 }
8210 if ((!terminate) &&
8211 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8212 goto done;
8213 if (ctxt->spaceNr == 0)
8214 spacePush(ctxt, -1);
8215 else
8216 spacePush(ctxt, *ctxt->space);
8217 name = xmlParseStartTag(ctxt);
8218 if (name == NULL) {
8219 spacePop(ctxt);
8220 ctxt->instate = XML_PARSER_EOF;
8221#ifdef DEBUG_PUSH
8222 xmlGenericError(xmlGenericErrorContext,
8223 "PP: entering EOF\n");
8224#endif
8225 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8226 (!ctxt->disableSAX))
8227 ctxt->sax->endDocument(ctxt->userData);
8228 goto done;
8229 }
8230 namePush(ctxt, xmlStrdup(name));
8231
8232 /*
8233 * [ VC: Root Element Type ]
8234 * The Name in the document type declaration must match
8235 * the element type of the root element.
8236 */
8237 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8238 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8239 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8240
8241 /*
8242 * Check for an Empty Element.
8243 */
8244 if ((RAW == '/') && (NXT(1) == '>')) {
8245 SKIP(2);
8246 if ((ctxt->sax != NULL) &&
8247 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8248 ctxt->sax->endElement(ctxt->userData, name);
8249 xmlFree(name);
8250 oldname = namePop(ctxt);
8251 spacePop(ctxt);
8252 if (oldname != NULL) {
8253#ifdef DEBUG_STACK
8254 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8255#endif
8256 xmlFree(oldname);
8257 }
8258 if (ctxt->name == NULL) {
8259 ctxt->instate = XML_PARSER_EPILOG;
8260#ifdef DEBUG_PUSH
8261 xmlGenericError(xmlGenericErrorContext,
8262 "PP: entering EPILOG\n");
8263#endif
8264 } else {
8265 ctxt->instate = XML_PARSER_CONTENT;
8266#ifdef DEBUG_PUSH
8267 xmlGenericError(xmlGenericErrorContext,
8268 "PP: entering CONTENT\n");
8269#endif
8270 }
8271 break;
8272 }
8273 if (RAW == '>') {
8274 NEXT;
8275 } else {
8276 ctxt->errNo = XML_ERR_GT_REQUIRED;
8277 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8278 ctxt->sax->error(ctxt->userData,
8279 "Couldn't find end of Start Tag %s\n",
8280 name);
8281 ctxt->wellFormed = 0;
8282 ctxt->disableSAX = 1;
8283
8284 /*
8285 * end of parsing of this node.
8286 */
8287 nodePop(ctxt);
8288 oldname = namePop(ctxt);
8289 spacePop(ctxt);
8290 if (oldname != NULL) {
8291#ifdef DEBUG_STACK
8292 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8293#endif
8294 xmlFree(oldname);
8295 }
8296 }
8297 xmlFree(name);
8298 ctxt->instate = XML_PARSER_CONTENT;
8299#ifdef DEBUG_PUSH
8300 xmlGenericError(xmlGenericErrorContext,
8301 "PP: entering CONTENT\n");
8302#endif
8303 break;
8304 }
8305 case XML_PARSER_CONTENT: {
8306 const xmlChar *test;
8307 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008308 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008309
8310 /*
8311 * Handle preparsed entities and charRef
8312 */
8313 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008314 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008315
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008316 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008317 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8318 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008319 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008320 ctxt->token = 0;
8321 }
8322 if ((avail < 2) && (ctxt->inputNr == 1))
8323 goto done;
8324 cur = ctxt->input->cur[0];
8325 next = ctxt->input->cur[1];
8326
8327 test = CUR_PTR;
8328 cons = ctxt->input->consumed;
8329 tok = ctxt->token;
8330 if ((cur == '<') && (next == '?')) {
8331 if ((!terminate) &&
8332 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8333 goto done;
8334#ifdef DEBUG_PUSH
8335 xmlGenericError(xmlGenericErrorContext,
8336 "PP: Parsing PI\n");
8337#endif
8338 xmlParsePI(ctxt);
8339 } else if ((cur == '<') && (next == '!') &&
8340 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8341 if ((!terminate) &&
8342 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8343 goto done;
8344#ifdef DEBUG_PUSH
8345 xmlGenericError(xmlGenericErrorContext,
8346 "PP: Parsing Comment\n");
8347#endif
8348 xmlParseComment(ctxt);
8349 ctxt->instate = XML_PARSER_CONTENT;
8350 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8351 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8352 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8353 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8354 (ctxt->input->cur[8] == '[')) {
8355 SKIP(9);
8356 ctxt->instate = XML_PARSER_CDATA_SECTION;
8357#ifdef DEBUG_PUSH
8358 xmlGenericError(xmlGenericErrorContext,
8359 "PP: entering CDATA_SECTION\n");
8360#endif
8361 break;
8362 } else if ((cur == '<') && (next == '!') &&
8363 (avail < 9)) {
8364 goto done;
8365 } else if ((cur == '<') && (next == '/')) {
8366 ctxt->instate = XML_PARSER_END_TAG;
8367#ifdef DEBUG_PUSH
8368 xmlGenericError(xmlGenericErrorContext,
8369 "PP: entering END_TAG\n");
8370#endif
8371 break;
8372 } else if (cur == '<') {
8373 ctxt->instate = XML_PARSER_START_TAG;
8374#ifdef DEBUG_PUSH
8375 xmlGenericError(xmlGenericErrorContext,
8376 "PP: entering START_TAG\n");
8377#endif
8378 break;
8379 } else if (cur == '&') {
8380 if ((!terminate) &&
8381 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8382 goto done;
8383#ifdef DEBUG_PUSH
8384 xmlGenericError(xmlGenericErrorContext,
8385 "PP: Parsing Reference\n");
8386#endif
8387 xmlParseReference(ctxt);
8388 } else {
8389 /* TODO Avoid the extra copy, handle directly !!! */
8390 /*
8391 * Goal of the following test is:
8392 * - minimize calls to the SAX 'character' callback
8393 * when they are mergeable
8394 * - handle an problem for isBlank when we only parse
8395 * a sequence of blank chars and the next one is
8396 * not available to check against '<' presence.
8397 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008398 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008399 * of the parser.
8400 */
8401 if ((ctxt->inputNr == 1) &&
8402 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8403 if ((!terminate) &&
8404 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8405 goto done;
8406 }
8407 ctxt->checkIndex = 0;
8408#ifdef DEBUG_PUSH
8409 xmlGenericError(xmlGenericErrorContext,
8410 "PP: Parsing char data\n");
8411#endif
8412 xmlParseCharData(ctxt, 0);
8413 }
8414 /*
8415 * Pop-up of finished entities.
8416 */
8417 while ((RAW == 0) && (ctxt->inputNr > 1))
8418 xmlPopInput(ctxt);
8419 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8420 (tok == ctxt->token)) {
8421 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8423 ctxt->sax->error(ctxt->userData,
8424 "detected an error in element content\n");
8425 ctxt->wellFormed = 0;
8426 ctxt->disableSAX = 1;
8427 ctxt->instate = XML_PARSER_EOF;
8428 break;
8429 }
8430 break;
8431 }
8432 case XML_PARSER_CDATA_SECTION: {
8433 /*
8434 * The Push mode need to have the SAX callback for
8435 * cdataBlock merge back contiguous callbacks.
8436 */
8437 int base;
8438
8439 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8440 if (base < 0) {
8441 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8442 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8443 if (ctxt->sax->cdataBlock != NULL)
8444 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8445 XML_PARSER_BIG_BUFFER_SIZE);
8446 }
8447 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8448 ctxt->checkIndex = 0;
8449 }
8450 goto done;
8451 } else {
8452 if ((ctxt->sax != NULL) && (base > 0) &&
8453 (!ctxt->disableSAX)) {
8454 if (ctxt->sax->cdataBlock != NULL)
8455 ctxt->sax->cdataBlock(ctxt->userData,
8456 ctxt->input->cur, base);
8457 }
8458 SKIP(base + 3);
8459 ctxt->checkIndex = 0;
8460 ctxt->instate = XML_PARSER_CONTENT;
8461#ifdef DEBUG_PUSH
8462 xmlGenericError(xmlGenericErrorContext,
8463 "PP: entering CONTENT\n");
8464#endif
8465 }
8466 break;
8467 }
8468 case XML_PARSER_END_TAG:
8469 if (avail < 2)
8470 goto done;
8471 if ((!terminate) &&
8472 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8473 goto done;
8474 xmlParseEndTag(ctxt);
8475 if (ctxt->name == NULL) {
8476 ctxt->instate = XML_PARSER_EPILOG;
8477#ifdef DEBUG_PUSH
8478 xmlGenericError(xmlGenericErrorContext,
8479 "PP: entering EPILOG\n");
8480#endif
8481 } else {
8482 ctxt->instate = XML_PARSER_CONTENT;
8483#ifdef DEBUG_PUSH
8484 xmlGenericError(xmlGenericErrorContext,
8485 "PP: entering CONTENT\n");
8486#endif
8487 }
8488 break;
8489 case XML_PARSER_DTD: {
8490 /*
8491 * Sorry but progressive parsing of the internal subset
8492 * is not expected to be supported. We first check that
8493 * the full content of the internal subset is available and
8494 * the parsing is launched only at that point.
8495 * Internal subset ends up with "']' S? '>'" in an unescaped
8496 * section and not in a ']]>' sequence which are conditional
8497 * sections (whoever argued to keep that crap in XML deserve
8498 * a place in hell !).
8499 */
8500 int base, i;
8501 xmlChar *buf;
8502 xmlChar quote = 0;
8503
8504 base = ctxt->input->cur - ctxt->input->base;
8505 if (base < 0) return(0);
8506 if (ctxt->checkIndex > base)
8507 base = ctxt->checkIndex;
8508 buf = ctxt->input->buf->buffer->content;
8509 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8510 base++) {
8511 if (quote != 0) {
8512 if (buf[base] == quote)
8513 quote = 0;
8514 continue;
8515 }
8516 if (buf[base] == '"') {
8517 quote = '"';
8518 continue;
8519 }
8520 if (buf[base] == '\'') {
8521 quote = '\'';
8522 continue;
8523 }
8524 if (buf[base] == ']') {
8525 if ((unsigned int) base +1 >=
8526 ctxt->input->buf->buffer->use)
8527 break;
8528 if (buf[base + 1] == ']') {
8529 /* conditional crap, skip both ']' ! */
8530 base++;
8531 continue;
8532 }
8533 for (i = 0;
8534 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8535 i++) {
8536 if (buf[base + i] == '>')
8537 goto found_end_int_subset;
8538 }
8539 break;
8540 }
8541 }
8542 /*
8543 * We didn't found the end of the Internal subset
8544 */
8545 if (quote == 0)
8546 ctxt->checkIndex = base;
8547#ifdef DEBUG_PUSH
8548 if (next == 0)
8549 xmlGenericError(xmlGenericErrorContext,
8550 "PP: lookup of int subset end filed\n");
8551#endif
8552 goto done;
8553
8554found_end_int_subset:
8555 xmlParseInternalSubset(ctxt);
8556 ctxt->inSubset = 2;
8557 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8558 (ctxt->sax->externalSubset != NULL))
8559 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8560 ctxt->extSubSystem, ctxt->extSubURI);
8561 ctxt->inSubset = 0;
8562 ctxt->instate = XML_PARSER_PROLOG;
8563 ctxt->checkIndex = 0;
8564#ifdef DEBUG_PUSH
8565 xmlGenericError(xmlGenericErrorContext,
8566 "PP: entering PROLOG\n");
8567#endif
8568 break;
8569 }
8570 case XML_PARSER_COMMENT:
8571 xmlGenericError(xmlGenericErrorContext,
8572 "PP: internal error, state == COMMENT\n");
8573 ctxt->instate = XML_PARSER_CONTENT;
8574#ifdef DEBUG_PUSH
8575 xmlGenericError(xmlGenericErrorContext,
8576 "PP: entering CONTENT\n");
8577#endif
8578 break;
8579 case XML_PARSER_PI:
8580 xmlGenericError(xmlGenericErrorContext,
8581 "PP: internal error, state == PI\n");
8582 ctxt->instate = XML_PARSER_CONTENT;
8583#ifdef DEBUG_PUSH
8584 xmlGenericError(xmlGenericErrorContext,
8585 "PP: entering CONTENT\n");
8586#endif
8587 break;
8588 case XML_PARSER_ENTITY_DECL:
8589 xmlGenericError(xmlGenericErrorContext,
8590 "PP: internal error, state == ENTITY_DECL\n");
8591 ctxt->instate = XML_PARSER_DTD;
8592#ifdef DEBUG_PUSH
8593 xmlGenericError(xmlGenericErrorContext,
8594 "PP: entering DTD\n");
8595#endif
8596 break;
8597 case XML_PARSER_ENTITY_VALUE:
8598 xmlGenericError(xmlGenericErrorContext,
8599 "PP: internal error, state == ENTITY_VALUE\n");
8600 ctxt->instate = XML_PARSER_CONTENT;
8601#ifdef DEBUG_PUSH
8602 xmlGenericError(xmlGenericErrorContext,
8603 "PP: entering DTD\n");
8604#endif
8605 break;
8606 case XML_PARSER_ATTRIBUTE_VALUE:
8607 xmlGenericError(xmlGenericErrorContext,
8608 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8609 ctxt->instate = XML_PARSER_START_TAG;
8610#ifdef DEBUG_PUSH
8611 xmlGenericError(xmlGenericErrorContext,
8612 "PP: entering START_TAG\n");
8613#endif
8614 break;
8615 case XML_PARSER_SYSTEM_LITERAL:
8616 xmlGenericError(xmlGenericErrorContext,
8617 "PP: internal error, state == SYSTEM_LITERAL\n");
8618 ctxt->instate = XML_PARSER_START_TAG;
8619#ifdef DEBUG_PUSH
8620 xmlGenericError(xmlGenericErrorContext,
8621 "PP: entering START_TAG\n");
8622#endif
8623 break;
8624 }
8625 }
8626done:
8627#ifdef DEBUG_PUSH
8628 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8629#endif
8630 return(ret);
8631}
8632
8633/**
Owen Taylor3473f882001-02-23 17:55:21 +00008634 * xmlParseChunk:
8635 * @ctxt: an XML parser context
8636 * @chunk: an char array
8637 * @size: the size in byte of the chunk
8638 * @terminate: last chunk indicator
8639 *
8640 * Parse a Chunk of memory
8641 *
8642 * Returns zero if no error, the xmlParserErrors otherwise.
8643 */
8644int
8645xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8646 int terminate) {
8647 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8648 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8649 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8650 int cur = ctxt->input->cur - ctxt->input->base;
8651
8652 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8653 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8654 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008655 ctxt->input->end =
8656 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008657#ifdef DEBUG_PUSH
8658 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8659#endif
8660
8661 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8662 xmlParseTryOrFinish(ctxt, terminate);
8663 } else if (ctxt->instate != XML_PARSER_EOF) {
8664 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8665 xmlParserInputBufferPtr in = ctxt->input->buf;
8666 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8667 (in->raw != NULL)) {
8668 int nbchars;
8669
8670 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8671 if (nbchars < 0) {
8672 xmlGenericError(xmlGenericErrorContext,
8673 "xmlParseChunk: encoder error\n");
8674 return(XML_ERR_INVALID_ENCODING);
8675 }
8676 }
8677 }
8678 }
8679 xmlParseTryOrFinish(ctxt, terminate);
8680 if (terminate) {
8681 /*
8682 * Check for termination
8683 */
8684 if ((ctxt->instate != XML_PARSER_EOF) &&
8685 (ctxt->instate != XML_PARSER_EPILOG)) {
8686 ctxt->errNo = XML_ERR_DOCUMENT_END;
8687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8688 ctxt->sax->error(ctxt->userData,
8689 "Extra content at the end of the document\n");
8690 ctxt->wellFormed = 0;
8691 ctxt->disableSAX = 1;
8692 }
8693 if (ctxt->instate != XML_PARSER_EOF) {
8694 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8695 (!ctxt->disableSAX))
8696 ctxt->sax->endDocument(ctxt->userData);
8697 }
8698 ctxt->instate = XML_PARSER_EOF;
8699 }
8700 return((xmlParserErrors) ctxt->errNo);
8701}
8702
8703/************************************************************************
8704 * *
8705 * I/O front end functions to the parser *
8706 * *
8707 ************************************************************************/
8708
8709/**
8710 * xmlStopParser:
8711 * @ctxt: an XML parser context
8712 *
8713 * Blocks further parser processing
8714 */
8715void
8716xmlStopParser(xmlParserCtxtPtr ctxt) {
8717 ctxt->instate = XML_PARSER_EOF;
8718 if (ctxt->input != NULL)
8719 ctxt->input->cur = BAD_CAST"";
8720}
8721
8722/**
8723 * xmlCreatePushParserCtxt:
8724 * @sax: a SAX handler
8725 * @user_data: The user data returned on SAX callbacks
8726 * @chunk: a pointer to an array of chars
8727 * @size: number of chars in the array
8728 * @filename: an optional file name or URI
8729 *
8730 * Create a parser context for using the XML parser in push mode
8731 * To allow content encoding detection, @size should be >= 4
8732 * The value of @filename is used for fetching external entities
8733 * and error/warning reports.
8734 *
8735 * Returns the new parser context or NULL
8736 */
8737xmlParserCtxtPtr
8738xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8739 const char *chunk, int size, const char *filename) {
8740 xmlParserCtxtPtr ctxt;
8741 xmlParserInputPtr inputStream;
8742 xmlParserInputBufferPtr buf;
8743 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8744
8745 /*
8746 * plug some encoding conversion routines
8747 */
8748 if ((chunk != NULL) && (size >= 4))
8749 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8750
8751 buf = xmlAllocParserInputBuffer(enc);
8752 if (buf == NULL) return(NULL);
8753
8754 ctxt = xmlNewParserCtxt();
8755 if (ctxt == NULL) {
8756 xmlFree(buf);
8757 return(NULL);
8758 }
8759 if (sax != NULL) {
8760 if (ctxt->sax != &xmlDefaultSAXHandler)
8761 xmlFree(ctxt->sax);
8762 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8763 if (ctxt->sax == NULL) {
8764 xmlFree(buf);
8765 xmlFree(ctxt);
8766 return(NULL);
8767 }
8768 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8769 if (user_data != NULL)
8770 ctxt->userData = user_data;
8771 }
8772 if (filename == NULL) {
8773 ctxt->directory = NULL;
8774 } else {
8775 ctxt->directory = xmlParserGetDirectory(filename);
8776 }
8777
8778 inputStream = xmlNewInputStream(ctxt);
8779 if (inputStream == NULL) {
8780 xmlFreeParserCtxt(ctxt);
8781 return(NULL);
8782 }
8783
8784 if (filename == NULL)
8785 inputStream->filename = NULL;
8786 else
8787 inputStream->filename = xmlMemStrdup(filename);
8788 inputStream->buf = buf;
8789 inputStream->base = inputStream->buf->buffer->content;
8790 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008791 inputStream->end =
8792 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008793
8794 inputPush(ctxt, inputStream);
8795
8796 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8797 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008798 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8799 int cur = ctxt->input->cur - ctxt->input->base;
8800
Owen Taylor3473f882001-02-23 17:55:21 +00008801 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008802
8803 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8804 ctxt->input->cur = ctxt->input->base + cur;
8805 ctxt->input->end =
8806 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008807#ifdef DEBUG_PUSH
8808 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8809#endif
8810 }
8811
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008812 if (enc != XML_CHAR_ENCODING_NONE) {
8813 xmlSwitchEncoding(ctxt, enc);
8814 }
8815
Owen Taylor3473f882001-02-23 17:55:21 +00008816 return(ctxt);
8817}
8818
8819/**
8820 * xmlCreateIOParserCtxt:
8821 * @sax: a SAX handler
8822 * @user_data: The user data returned on SAX callbacks
8823 * @ioread: an I/O read function
8824 * @ioclose: an I/O close function
8825 * @ioctx: an I/O handler
8826 * @enc: the charset encoding if known
8827 *
8828 * Create a parser context for using the XML parser with an existing
8829 * I/O stream
8830 *
8831 * Returns the new parser context or NULL
8832 */
8833xmlParserCtxtPtr
8834xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8835 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8836 void *ioctx, xmlCharEncoding enc) {
8837 xmlParserCtxtPtr ctxt;
8838 xmlParserInputPtr inputStream;
8839 xmlParserInputBufferPtr buf;
8840
8841 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8842 if (buf == NULL) return(NULL);
8843
8844 ctxt = xmlNewParserCtxt();
8845 if (ctxt == NULL) {
8846 xmlFree(buf);
8847 return(NULL);
8848 }
8849 if (sax != NULL) {
8850 if (ctxt->sax != &xmlDefaultSAXHandler)
8851 xmlFree(ctxt->sax);
8852 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8853 if (ctxt->sax == NULL) {
8854 xmlFree(buf);
8855 xmlFree(ctxt);
8856 return(NULL);
8857 }
8858 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8859 if (user_data != NULL)
8860 ctxt->userData = user_data;
8861 }
8862
8863 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8864 if (inputStream == NULL) {
8865 xmlFreeParserCtxt(ctxt);
8866 return(NULL);
8867 }
8868 inputPush(ctxt, inputStream);
8869
8870 return(ctxt);
8871}
8872
8873/************************************************************************
8874 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008875 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00008876 * *
8877 ************************************************************************/
8878
8879/**
8880 * xmlIOParseDTD:
8881 * @sax: the SAX handler block or NULL
8882 * @input: an Input Buffer
8883 * @enc: the charset encoding if known
8884 *
8885 * Load and parse a DTD
8886 *
8887 * Returns the resulting xmlDtdPtr or NULL in case of error.
8888 * @input will be freed at parsing end.
8889 */
8890
8891xmlDtdPtr
8892xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8893 xmlCharEncoding enc) {
8894 xmlDtdPtr ret = NULL;
8895 xmlParserCtxtPtr ctxt;
8896 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008897 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008898
8899 if (input == NULL)
8900 return(NULL);
8901
8902 ctxt = xmlNewParserCtxt();
8903 if (ctxt == NULL) {
8904 return(NULL);
8905 }
8906
8907 /*
8908 * Set-up the SAX context
8909 */
8910 if (sax != NULL) {
8911 if (ctxt->sax != NULL)
8912 xmlFree(ctxt->sax);
8913 ctxt->sax = sax;
8914 ctxt->userData = NULL;
8915 }
8916
8917 /*
8918 * generate a parser input from the I/O handler
8919 */
8920
8921 pinput = xmlNewIOInputStream(ctxt, input, enc);
8922 if (pinput == NULL) {
8923 if (sax != NULL) ctxt->sax = NULL;
8924 xmlFreeParserCtxt(ctxt);
8925 return(NULL);
8926 }
8927
8928 /*
8929 * plug some encoding conversion routines here.
8930 */
8931 xmlPushInput(ctxt, pinput);
8932
8933 pinput->filename = NULL;
8934 pinput->line = 1;
8935 pinput->col = 1;
8936 pinput->base = ctxt->input->cur;
8937 pinput->cur = ctxt->input->cur;
8938 pinput->free = NULL;
8939
8940 /*
8941 * let's parse that entity knowing it's an external subset.
8942 */
8943 ctxt->inSubset = 2;
8944 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8945 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8946 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008947
8948 if (enc == XML_CHAR_ENCODING_NONE) {
8949 /*
8950 * Get the 4 first bytes and decode the charset
8951 * if enc != XML_CHAR_ENCODING_NONE
8952 * plug some encoding conversion routines.
8953 */
8954 start[0] = RAW;
8955 start[1] = NXT(1);
8956 start[2] = NXT(2);
8957 start[3] = NXT(3);
8958 enc = xmlDetectCharEncoding(start, 4);
8959 if (enc != XML_CHAR_ENCODING_NONE) {
8960 xmlSwitchEncoding(ctxt, enc);
8961 }
8962 }
8963
Owen Taylor3473f882001-02-23 17:55:21 +00008964 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8965
8966 if (ctxt->myDoc != NULL) {
8967 if (ctxt->wellFormed) {
8968 ret = ctxt->myDoc->extSubset;
8969 ctxt->myDoc->extSubset = NULL;
8970 } else {
8971 ret = NULL;
8972 }
8973 xmlFreeDoc(ctxt->myDoc);
8974 ctxt->myDoc = NULL;
8975 }
8976 if (sax != NULL) ctxt->sax = NULL;
8977 xmlFreeParserCtxt(ctxt);
8978
8979 return(ret);
8980}
8981
8982/**
8983 * xmlSAXParseDTD:
8984 * @sax: the SAX handler block
8985 * @ExternalID: a NAME* containing the External ID of the DTD
8986 * @SystemID: a NAME* containing the URL to the DTD
8987 *
8988 * Load and parse an external subset.
8989 *
8990 * Returns the resulting xmlDtdPtr or NULL in case of error.
8991 */
8992
8993xmlDtdPtr
8994xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8995 const xmlChar *SystemID) {
8996 xmlDtdPtr ret = NULL;
8997 xmlParserCtxtPtr ctxt;
8998 xmlParserInputPtr input = NULL;
8999 xmlCharEncoding enc;
9000
9001 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9002
9003 ctxt = xmlNewParserCtxt();
9004 if (ctxt == NULL) {
9005 return(NULL);
9006 }
9007
9008 /*
9009 * Set-up the SAX context
9010 */
9011 if (sax != NULL) {
9012 if (ctxt->sax != NULL)
9013 xmlFree(ctxt->sax);
9014 ctxt->sax = sax;
9015 ctxt->userData = NULL;
9016 }
9017
9018 /*
9019 * Ask the Entity resolver to load the damn thing
9020 */
9021
9022 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9023 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9024 if (input == NULL) {
9025 if (sax != NULL) ctxt->sax = NULL;
9026 xmlFreeParserCtxt(ctxt);
9027 return(NULL);
9028 }
9029
9030 /*
9031 * plug some encoding conversion routines here.
9032 */
9033 xmlPushInput(ctxt, input);
9034 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9035 xmlSwitchEncoding(ctxt, enc);
9036
9037 if (input->filename == NULL)
9038 input->filename = (char *) xmlStrdup(SystemID);
9039 input->line = 1;
9040 input->col = 1;
9041 input->base = ctxt->input->cur;
9042 input->cur = ctxt->input->cur;
9043 input->free = NULL;
9044
9045 /*
9046 * let's parse that entity knowing it's an external subset.
9047 */
9048 ctxt->inSubset = 2;
9049 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9050 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9051 ExternalID, SystemID);
9052 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9053
9054 if (ctxt->myDoc != NULL) {
9055 if (ctxt->wellFormed) {
9056 ret = ctxt->myDoc->extSubset;
9057 ctxt->myDoc->extSubset = NULL;
9058 } else {
9059 ret = NULL;
9060 }
9061 xmlFreeDoc(ctxt->myDoc);
9062 ctxt->myDoc = NULL;
9063 }
9064 if (sax != NULL) ctxt->sax = NULL;
9065 xmlFreeParserCtxt(ctxt);
9066
9067 return(ret);
9068}
9069
9070/**
9071 * xmlParseDTD:
9072 * @ExternalID: a NAME* containing the External ID of the DTD
9073 * @SystemID: a NAME* containing the URL to the DTD
9074 *
9075 * Load and parse an external subset.
9076 *
9077 * Returns the resulting xmlDtdPtr or NULL in case of error.
9078 */
9079
9080xmlDtdPtr
9081xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9082 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9083}
9084
9085/************************************************************************
9086 * *
9087 * Front ends when parsing an Entity *
9088 * *
9089 ************************************************************************/
9090
9091/**
Owen Taylor3473f882001-02-23 17:55:21 +00009092 * xmlParseCtxtExternalEntity:
9093 * @ctx: the existing parsing context
9094 * @URL: the URL for the entity to load
9095 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009096 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009097 *
9098 * Parse an external general entity within an existing parsing context
9099 * An external general parsed entity is well-formed if it matches the
9100 * production labeled extParsedEnt.
9101 *
9102 * [78] extParsedEnt ::= TextDecl? content
9103 *
9104 * Returns 0 if the entity is well formed, -1 in case of args problem and
9105 * the parser error code otherwise
9106 */
9107
9108int
9109xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009110 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009111 xmlParserCtxtPtr ctxt;
9112 xmlDocPtr newDoc;
9113 xmlSAXHandlerPtr oldsax = NULL;
9114 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009115 xmlChar start[4];
9116 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009117
9118 if (ctx->depth > 40) {
9119 return(XML_ERR_ENTITY_LOOP);
9120 }
9121
Daniel Veillardcda96922001-08-21 10:56:31 +00009122 if (lst != NULL)
9123 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009124 if ((URL == NULL) && (ID == NULL))
9125 return(-1);
9126 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9127 return(-1);
9128
9129
9130 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9131 if (ctxt == NULL) return(-1);
9132 ctxt->userData = ctxt;
9133 oldsax = ctxt->sax;
9134 ctxt->sax = ctx->sax;
9135 newDoc = xmlNewDoc(BAD_CAST "1.0");
9136 if (newDoc == NULL) {
9137 xmlFreeParserCtxt(ctxt);
9138 return(-1);
9139 }
9140 if (ctx->myDoc != NULL) {
9141 newDoc->intSubset = ctx->myDoc->intSubset;
9142 newDoc->extSubset = ctx->myDoc->extSubset;
9143 }
9144 if (ctx->myDoc->URL != NULL) {
9145 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9146 }
9147 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9148 if (newDoc->children == NULL) {
9149 ctxt->sax = oldsax;
9150 xmlFreeParserCtxt(ctxt);
9151 newDoc->intSubset = NULL;
9152 newDoc->extSubset = NULL;
9153 xmlFreeDoc(newDoc);
9154 return(-1);
9155 }
9156 nodePush(ctxt, newDoc->children);
9157 if (ctx->myDoc == NULL) {
9158 ctxt->myDoc = newDoc;
9159 } else {
9160 ctxt->myDoc = ctx->myDoc;
9161 newDoc->children->doc = ctx->myDoc;
9162 }
9163
Daniel Veillard87a764e2001-06-20 17:41:10 +00009164 /*
9165 * Get the 4 first bytes and decode the charset
9166 * if enc != XML_CHAR_ENCODING_NONE
9167 * plug some encoding conversion routines.
9168 */
9169 GROW
9170 start[0] = RAW;
9171 start[1] = NXT(1);
9172 start[2] = NXT(2);
9173 start[3] = NXT(3);
9174 enc = xmlDetectCharEncoding(start, 4);
9175 if (enc != XML_CHAR_ENCODING_NONE) {
9176 xmlSwitchEncoding(ctxt, enc);
9177 }
9178
Owen Taylor3473f882001-02-23 17:55:21 +00009179 /*
9180 * Parse a possible text declaration first
9181 */
Owen Taylor3473f882001-02-23 17:55:21 +00009182 if ((RAW == '<') && (NXT(1) == '?') &&
9183 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9184 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9185 xmlParseTextDecl(ctxt);
9186 }
9187
9188 /*
9189 * Doing validity checking on chunk doesn't make sense
9190 */
9191 ctxt->instate = XML_PARSER_CONTENT;
9192 ctxt->validate = ctx->validate;
9193 ctxt->loadsubset = ctx->loadsubset;
9194 ctxt->depth = ctx->depth + 1;
9195 ctxt->replaceEntities = ctx->replaceEntities;
9196 if (ctxt->validate) {
9197 ctxt->vctxt.error = ctx->vctxt.error;
9198 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009199 } else {
9200 ctxt->vctxt.error = NULL;
9201 ctxt->vctxt.warning = NULL;
9202 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009203 ctxt->vctxt.nodeTab = NULL;
9204 ctxt->vctxt.nodeNr = 0;
9205 ctxt->vctxt.nodeMax = 0;
9206 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009207
9208 xmlParseContent(ctxt);
9209
9210 if ((RAW == '<') && (NXT(1) == '/')) {
9211 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9213 ctxt->sax->error(ctxt->userData,
9214 "chunk is not well balanced\n");
9215 ctxt->wellFormed = 0;
9216 ctxt->disableSAX = 1;
9217 } else if (RAW != 0) {
9218 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9220 ctxt->sax->error(ctxt->userData,
9221 "extra content at the end of well balanced chunk\n");
9222 ctxt->wellFormed = 0;
9223 ctxt->disableSAX = 1;
9224 }
9225 if (ctxt->node != newDoc->children) {
9226 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9228 ctxt->sax->error(ctxt->userData,
9229 "chunk is not well balanced\n");
9230 ctxt->wellFormed = 0;
9231 ctxt->disableSAX = 1;
9232 }
9233
9234 if (!ctxt->wellFormed) {
9235 if (ctxt->errNo == 0)
9236 ret = 1;
9237 else
9238 ret = ctxt->errNo;
9239 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009240 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009241 xmlNodePtr cur;
9242
9243 /*
9244 * Return the newly created nodeset after unlinking it from
9245 * they pseudo parent.
9246 */
9247 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009248 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009249 while (cur != NULL) {
9250 cur->parent = NULL;
9251 cur = cur->next;
9252 }
9253 newDoc->children->children = NULL;
9254 }
9255 ret = 0;
9256 }
9257 ctxt->sax = oldsax;
9258 xmlFreeParserCtxt(ctxt);
9259 newDoc->intSubset = NULL;
9260 newDoc->extSubset = NULL;
9261 xmlFreeDoc(newDoc);
9262
9263 return(ret);
9264}
9265
9266/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009267 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009268 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009269 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009270 * @sax: the SAX handler bloc (possibly NULL)
9271 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9272 * @depth: Used for loop detection, use 0
9273 * @URL: the URL for the entity to load
9274 * @ID: the System ID for the entity to load
9275 * @list: the return value for the set of parsed nodes
9276 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009277 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009278 *
9279 * Returns 0 if the entity is well formed, -1 in case of args problem and
9280 * the parser error code otherwise
9281 */
9282
Daniel Veillard257d9102001-05-08 10:41:44 +00009283static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009284xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9285 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009286 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009287 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009288 xmlParserCtxtPtr ctxt;
9289 xmlDocPtr newDoc;
9290 xmlSAXHandlerPtr oldsax = NULL;
9291 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009292 xmlChar start[4];
9293 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009294
9295 if (depth > 40) {
9296 return(XML_ERR_ENTITY_LOOP);
9297 }
9298
9299
9300
9301 if (list != NULL)
9302 *list = NULL;
9303 if ((URL == NULL) && (ID == NULL))
9304 return(-1);
9305 if (doc == NULL) /* @@ relax but check for dereferences */
9306 return(-1);
9307
9308
9309 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9310 if (ctxt == NULL) return(-1);
9311 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009312 if (oldctxt != NULL) {
9313 ctxt->_private = oldctxt->_private;
9314 ctxt->loadsubset = oldctxt->loadsubset;
9315 ctxt->validate = oldctxt->validate;
9316 ctxt->external = oldctxt->external;
9317 } else {
9318 /*
9319 * Doing validity checking on chunk without context
9320 * doesn't make sense
9321 */
9322 ctxt->_private = NULL;
9323 ctxt->validate = 0;
9324 ctxt->external = 2;
9325 ctxt->loadsubset = 0;
9326 }
Owen Taylor3473f882001-02-23 17:55:21 +00009327 if (sax != NULL) {
9328 oldsax = ctxt->sax;
9329 ctxt->sax = sax;
9330 if (user_data != NULL)
9331 ctxt->userData = user_data;
9332 }
9333 newDoc = xmlNewDoc(BAD_CAST "1.0");
9334 if (newDoc == NULL) {
9335 xmlFreeParserCtxt(ctxt);
9336 return(-1);
9337 }
9338 if (doc != NULL) {
9339 newDoc->intSubset = doc->intSubset;
9340 newDoc->extSubset = doc->extSubset;
9341 }
9342 if (doc->URL != NULL) {
9343 newDoc->URL = xmlStrdup(doc->URL);
9344 }
9345 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9346 if (newDoc->children == NULL) {
9347 if (sax != NULL)
9348 ctxt->sax = oldsax;
9349 xmlFreeParserCtxt(ctxt);
9350 newDoc->intSubset = NULL;
9351 newDoc->extSubset = NULL;
9352 xmlFreeDoc(newDoc);
9353 return(-1);
9354 }
9355 nodePush(ctxt, newDoc->children);
9356 if (doc == NULL) {
9357 ctxt->myDoc = newDoc;
9358 } else {
9359 ctxt->myDoc = doc;
9360 newDoc->children->doc = doc;
9361 }
9362
Daniel Veillard87a764e2001-06-20 17:41:10 +00009363 /*
9364 * Get the 4 first bytes and decode the charset
9365 * if enc != XML_CHAR_ENCODING_NONE
9366 * plug some encoding conversion routines.
9367 */
9368 GROW;
9369 start[0] = RAW;
9370 start[1] = NXT(1);
9371 start[2] = NXT(2);
9372 start[3] = NXT(3);
9373 enc = xmlDetectCharEncoding(start, 4);
9374 if (enc != XML_CHAR_ENCODING_NONE) {
9375 xmlSwitchEncoding(ctxt, enc);
9376 }
9377
Owen Taylor3473f882001-02-23 17:55:21 +00009378 /*
9379 * Parse a possible text declaration first
9380 */
Owen Taylor3473f882001-02-23 17:55:21 +00009381 if ((RAW == '<') && (NXT(1) == '?') &&
9382 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9383 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9384 xmlParseTextDecl(ctxt);
9385 }
9386
Owen Taylor3473f882001-02-23 17:55:21 +00009387 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009388 ctxt->depth = depth;
9389
9390 xmlParseContent(ctxt);
9391
9392 if ((RAW == '<') && (NXT(1) == '/')) {
9393 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9394 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9395 ctxt->sax->error(ctxt->userData,
9396 "chunk is not well balanced\n");
9397 ctxt->wellFormed = 0;
9398 ctxt->disableSAX = 1;
9399 } else if (RAW != 0) {
9400 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9402 ctxt->sax->error(ctxt->userData,
9403 "extra content at the end of well balanced chunk\n");
9404 ctxt->wellFormed = 0;
9405 ctxt->disableSAX = 1;
9406 }
9407 if (ctxt->node != newDoc->children) {
9408 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9410 ctxt->sax->error(ctxt->userData,
9411 "chunk is not well balanced\n");
9412 ctxt->wellFormed = 0;
9413 ctxt->disableSAX = 1;
9414 }
9415
9416 if (!ctxt->wellFormed) {
9417 if (ctxt->errNo == 0)
9418 ret = 1;
9419 else
9420 ret = ctxt->errNo;
9421 } else {
9422 if (list != NULL) {
9423 xmlNodePtr cur;
9424
9425 /*
9426 * Return the newly created nodeset after unlinking it from
9427 * they pseudo parent.
9428 */
9429 cur = newDoc->children->children;
9430 *list = cur;
9431 while (cur != NULL) {
9432 cur->parent = NULL;
9433 cur = cur->next;
9434 }
9435 newDoc->children->children = NULL;
9436 }
9437 ret = 0;
9438 }
9439 if (sax != NULL)
9440 ctxt->sax = oldsax;
9441 xmlFreeParserCtxt(ctxt);
9442 newDoc->intSubset = NULL;
9443 newDoc->extSubset = NULL;
9444 xmlFreeDoc(newDoc);
9445
9446 return(ret);
9447}
9448
9449/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009450 * xmlParseExternalEntity:
9451 * @doc: the document the chunk pertains to
9452 * @sax: the SAX handler bloc (possibly NULL)
9453 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9454 * @depth: Used for loop detection, use 0
9455 * @URL: the URL for the entity to load
9456 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009457 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009458 *
9459 * Parse an external general entity
9460 * An external general parsed entity is well-formed if it matches the
9461 * production labeled extParsedEnt.
9462 *
9463 * [78] extParsedEnt ::= TextDecl? content
9464 *
9465 * Returns 0 if the entity is well formed, -1 in case of args problem and
9466 * the parser error code otherwise
9467 */
9468
9469int
9470xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009471 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009472 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009473 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009474}
9475
9476/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009477 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009478 * @doc: the document the chunk pertains to
9479 * @sax: the SAX handler bloc (possibly NULL)
9480 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9481 * @depth: Used for loop detection, use 0
9482 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009483 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009484 *
9485 * Parse a well-balanced chunk of an XML document
9486 * called by the parser
9487 * The allowed sequence for the Well Balanced Chunk is the one defined by
9488 * the content production in the XML grammar:
9489 *
9490 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9491 *
9492 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9493 * the parser error code otherwise
9494 */
9495
9496int
9497xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009498 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009499 xmlParserCtxtPtr ctxt;
9500 xmlDocPtr newDoc;
9501 xmlSAXHandlerPtr oldsax = NULL;
9502 int size;
9503 int ret = 0;
9504
9505 if (depth > 40) {
9506 return(XML_ERR_ENTITY_LOOP);
9507 }
9508
9509
Daniel Veillardcda96922001-08-21 10:56:31 +00009510 if (lst != NULL)
9511 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009512 if (string == NULL)
9513 return(-1);
9514
9515 size = xmlStrlen(string);
9516
9517 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9518 if (ctxt == NULL) return(-1);
9519 ctxt->userData = ctxt;
9520 if (sax != NULL) {
9521 oldsax = ctxt->sax;
9522 ctxt->sax = sax;
9523 if (user_data != NULL)
9524 ctxt->userData = user_data;
9525 }
9526 newDoc = xmlNewDoc(BAD_CAST "1.0");
9527 if (newDoc == NULL) {
9528 xmlFreeParserCtxt(ctxt);
9529 return(-1);
9530 }
9531 if (doc != NULL) {
9532 newDoc->intSubset = doc->intSubset;
9533 newDoc->extSubset = doc->extSubset;
9534 }
9535 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9536 if (newDoc->children == NULL) {
9537 if (sax != NULL)
9538 ctxt->sax = oldsax;
9539 xmlFreeParserCtxt(ctxt);
9540 newDoc->intSubset = NULL;
9541 newDoc->extSubset = NULL;
9542 xmlFreeDoc(newDoc);
9543 return(-1);
9544 }
9545 nodePush(ctxt, newDoc->children);
9546 if (doc == NULL) {
9547 ctxt->myDoc = newDoc;
9548 } else {
9549 ctxt->myDoc = doc;
9550 newDoc->children->doc = doc;
9551 }
9552 ctxt->instate = XML_PARSER_CONTENT;
9553 ctxt->depth = depth;
9554
9555 /*
9556 * Doing validity checking on chunk doesn't make sense
9557 */
9558 ctxt->validate = 0;
9559 ctxt->loadsubset = 0;
9560
9561 xmlParseContent(ctxt);
9562
9563 if ((RAW == '<') && (NXT(1) == '/')) {
9564 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9566 ctxt->sax->error(ctxt->userData,
9567 "chunk is not well balanced\n");
9568 ctxt->wellFormed = 0;
9569 ctxt->disableSAX = 1;
9570 } else if (RAW != 0) {
9571 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9573 ctxt->sax->error(ctxt->userData,
9574 "extra content at the end of well balanced chunk\n");
9575 ctxt->wellFormed = 0;
9576 ctxt->disableSAX = 1;
9577 }
9578 if (ctxt->node != newDoc->children) {
9579 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9581 ctxt->sax->error(ctxt->userData,
9582 "chunk is not well balanced\n");
9583 ctxt->wellFormed = 0;
9584 ctxt->disableSAX = 1;
9585 }
9586
9587 if (!ctxt->wellFormed) {
9588 if (ctxt->errNo == 0)
9589 ret = 1;
9590 else
9591 ret = ctxt->errNo;
9592 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009593 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009594 xmlNodePtr cur;
9595
9596 /*
9597 * Return the newly created nodeset after unlinking it from
9598 * they pseudo parent.
9599 */
9600 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009601 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009602 while (cur != NULL) {
9603 cur->parent = NULL;
9604 cur = cur->next;
9605 }
9606 newDoc->children->children = NULL;
9607 }
9608 ret = 0;
9609 }
9610 if (sax != NULL)
9611 ctxt->sax = oldsax;
9612 xmlFreeParserCtxt(ctxt);
9613 newDoc->intSubset = NULL;
9614 newDoc->extSubset = NULL;
9615 xmlFreeDoc(newDoc);
9616
9617 return(ret);
9618}
9619
9620/**
9621 * xmlSAXParseEntity:
9622 * @sax: the SAX handler block
9623 * @filename: the filename
9624 *
9625 * parse an XML external entity out of context and build a tree.
9626 * It use the given SAX function block to handle the parsing callback.
9627 * If sax is NULL, fallback to the default DOM tree building routines.
9628 *
9629 * [78] extParsedEnt ::= TextDecl? content
9630 *
9631 * This correspond to a "Well Balanced" chunk
9632 *
9633 * Returns the resulting document tree
9634 */
9635
9636xmlDocPtr
9637xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9638 xmlDocPtr ret;
9639 xmlParserCtxtPtr ctxt;
9640 char *directory = NULL;
9641
9642 ctxt = xmlCreateFileParserCtxt(filename);
9643 if (ctxt == NULL) {
9644 return(NULL);
9645 }
9646 if (sax != NULL) {
9647 if (ctxt->sax != NULL)
9648 xmlFree(ctxt->sax);
9649 ctxt->sax = sax;
9650 ctxt->userData = NULL;
9651 }
9652
9653 if ((ctxt->directory == NULL) && (directory == NULL))
9654 directory = xmlParserGetDirectory(filename);
9655
9656 xmlParseExtParsedEnt(ctxt);
9657
9658 if (ctxt->wellFormed)
9659 ret = ctxt->myDoc;
9660 else {
9661 ret = NULL;
9662 xmlFreeDoc(ctxt->myDoc);
9663 ctxt->myDoc = NULL;
9664 }
9665 if (sax != NULL)
9666 ctxt->sax = NULL;
9667 xmlFreeParserCtxt(ctxt);
9668
9669 return(ret);
9670}
9671
9672/**
9673 * xmlParseEntity:
9674 * @filename: the filename
9675 *
9676 * parse an XML external entity out of context and build a tree.
9677 *
9678 * [78] extParsedEnt ::= TextDecl? content
9679 *
9680 * This correspond to a "Well Balanced" chunk
9681 *
9682 * Returns the resulting document tree
9683 */
9684
9685xmlDocPtr
9686xmlParseEntity(const char *filename) {
9687 return(xmlSAXParseEntity(NULL, filename));
9688}
9689
9690/**
9691 * xmlCreateEntityParserCtxt:
9692 * @URL: the entity URL
9693 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009694 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009695 *
9696 * Create a parser context for an external entity
9697 * Automatic support for ZLIB/Compress compressed document is provided
9698 * by default if found at compile-time.
9699 *
9700 * Returns the new parser context or NULL
9701 */
9702xmlParserCtxtPtr
9703xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9704 const xmlChar *base) {
9705 xmlParserCtxtPtr ctxt;
9706 xmlParserInputPtr inputStream;
9707 char *directory = NULL;
9708 xmlChar *uri;
9709
9710 ctxt = xmlNewParserCtxt();
9711 if (ctxt == NULL) {
9712 return(NULL);
9713 }
9714
9715 uri = xmlBuildURI(URL, base);
9716
9717 if (uri == NULL) {
9718 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9719 if (inputStream == NULL) {
9720 xmlFreeParserCtxt(ctxt);
9721 return(NULL);
9722 }
9723
9724 inputPush(ctxt, inputStream);
9725
9726 if ((ctxt->directory == NULL) && (directory == NULL))
9727 directory = xmlParserGetDirectory((char *)URL);
9728 if ((ctxt->directory == NULL) && (directory != NULL))
9729 ctxt->directory = directory;
9730 } else {
9731 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9732 if (inputStream == NULL) {
9733 xmlFree(uri);
9734 xmlFreeParserCtxt(ctxt);
9735 return(NULL);
9736 }
9737
9738 inputPush(ctxt, inputStream);
9739
9740 if ((ctxt->directory == NULL) && (directory == NULL))
9741 directory = xmlParserGetDirectory((char *)uri);
9742 if ((ctxt->directory == NULL) && (directory != NULL))
9743 ctxt->directory = directory;
9744 xmlFree(uri);
9745 }
9746
9747 return(ctxt);
9748}
9749
9750/************************************************************************
9751 * *
9752 * Front ends when parsing from a file *
9753 * *
9754 ************************************************************************/
9755
9756/**
9757 * xmlCreateFileParserCtxt:
9758 * @filename: the filename
9759 *
9760 * Create a parser context for a file content.
9761 * Automatic support for ZLIB/Compress compressed document is provided
9762 * by default if found at compile-time.
9763 *
9764 * Returns the new parser context or NULL
9765 */
9766xmlParserCtxtPtr
9767xmlCreateFileParserCtxt(const char *filename)
9768{
9769 xmlParserCtxtPtr ctxt;
9770 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009771 char *directory = NULL;
9772
Owen Taylor3473f882001-02-23 17:55:21 +00009773 ctxt = xmlNewParserCtxt();
9774 if (ctxt == NULL) {
9775 if (xmlDefaultSAXHandler.error != NULL) {
9776 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9777 }
9778 return(NULL);
9779 }
9780
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009781 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009782 if (inputStream == NULL) {
9783 xmlFreeParserCtxt(ctxt);
9784 return(NULL);
9785 }
9786
Owen Taylor3473f882001-02-23 17:55:21 +00009787 inputPush(ctxt, inputStream);
9788 if ((ctxt->directory == NULL) && (directory == NULL))
9789 directory = xmlParserGetDirectory(filename);
9790 if ((ctxt->directory == NULL) && (directory != NULL))
9791 ctxt->directory = directory;
9792
9793 return(ctxt);
9794}
9795
9796/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009797 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009798 * @sax: the SAX handler block
9799 * @filename: the filename
9800 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9801 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009802 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009803 *
9804 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9805 * compressed document is provided by default if found at compile-time.
9806 * It use the given SAX function block to handle the parsing callback.
9807 * If sax is NULL, fallback to the default DOM tree building routines.
9808 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009809 * User data (void *) is stored within the parser context, so it is
9810 * available nearly everywhere in libxml.
9811 *
Owen Taylor3473f882001-02-23 17:55:21 +00009812 * Returns the resulting document tree
9813 */
9814
9815xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009816xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9817 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009818 xmlDocPtr ret;
9819 xmlParserCtxtPtr ctxt;
9820 char *directory = NULL;
9821
Daniel Veillard635ef722001-10-29 11:48:19 +00009822 xmlInitParser();
9823
Owen Taylor3473f882001-02-23 17:55:21 +00009824 ctxt = xmlCreateFileParserCtxt(filename);
9825 if (ctxt == NULL) {
9826 return(NULL);
9827 }
9828 if (sax != NULL) {
9829 if (ctxt->sax != NULL)
9830 xmlFree(ctxt->sax);
9831 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009832 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009833 if (data!=NULL) {
9834 ctxt->_private=data;
9835 }
Owen Taylor3473f882001-02-23 17:55:21 +00009836
9837 if ((ctxt->directory == NULL) && (directory == NULL))
9838 directory = xmlParserGetDirectory(filename);
9839 if ((ctxt->directory == NULL) && (directory != NULL))
9840 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9841
9842 xmlParseDocument(ctxt);
9843
9844 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9845 else {
9846 ret = NULL;
9847 xmlFreeDoc(ctxt->myDoc);
9848 ctxt->myDoc = NULL;
9849 }
9850 if (sax != NULL)
9851 ctxt->sax = NULL;
9852 xmlFreeParserCtxt(ctxt);
9853
9854 return(ret);
9855}
9856
9857/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009858 * xmlSAXParseFile:
9859 * @sax: the SAX handler block
9860 * @filename: the filename
9861 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9862 * documents
9863 *
9864 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9865 * compressed document is provided by default if found at compile-time.
9866 * It use the given SAX function block to handle the parsing callback.
9867 * If sax is NULL, fallback to the default DOM tree building routines.
9868 *
9869 * Returns the resulting document tree
9870 */
9871
9872xmlDocPtr
9873xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9874 int recovery) {
9875 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
9876}
9877
9878/**
Owen Taylor3473f882001-02-23 17:55:21 +00009879 * xmlRecoverDoc:
9880 * @cur: a pointer to an array of xmlChar
9881 *
9882 * parse an XML in-memory document and build a tree.
9883 * In the case the document is not Well Formed, a tree is built anyway
9884 *
9885 * Returns the resulting document tree
9886 */
9887
9888xmlDocPtr
9889xmlRecoverDoc(xmlChar *cur) {
9890 return(xmlSAXParseDoc(NULL, cur, 1));
9891}
9892
9893/**
9894 * xmlParseFile:
9895 * @filename: the filename
9896 *
9897 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9898 * compressed document is provided by default if found at compile-time.
9899 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009900 * Returns the resulting document tree if the file was wellformed,
9901 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009902 */
9903
9904xmlDocPtr
9905xmlParseFile(const char *filename) {
9906 return(xmlSAXParseFile(NULL, filename, 0));
9907}
9908
9909/**
9910 * xmlRecoverFile:
9911 * @filename: the filename
9912 *
9913 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9914 * compressed document is provided by default if found at compile-time.
9915 * In the case the document is not Well Formed, a tree is built anyway
9916 *
9917 * Returns the resulting document tree
9918 */
9919
9920xmlDocPtr
9921xmlRecoverFile(const char *filename) {
9922 return(xmlSAXParseFile(NULL, filename, 1));
9923}
9924
9925
9926/**
9927 * xmlSetupParserForBuffer:
9928 * @ctxt: an XML parser context
9929 * @buffer: a xmlChar * buffer
9930 * @filename: a file name
9931 *
9932 * Setup the parser context to parse a new buffer; Clears any prior
9933 * contents from the parser context. The buffer parameter must not be
9934 * NULL, but the filename parameter can be
9935 */
9936void
9937xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9938 const char* filename)
9939{
9940 xmlParserInputPtr input;
9941
9942 input = xmlNewInputStream(ctxt);
9943 if (input == NULL) {
9944 perror("malloc");
9945 xmlFree(ctxt);
9946 return;
9947 }
9948
9949 xmlClearParserCtxt(ctxt);
9950 if (filename != NULL)
9951 input->filename = xmlMemStrdup(filename);
9952 input->base = buffer;
9953 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009954 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009955 inputPush(ctxt, input);
9956}
9957
9958/**
9959 * xmlSAXUserParseFile:
9960 * @sax: a SAX handler
9961 * @user_data: The user data returned on SAX callbacks
9962 * @filename: a file name
9963 *
9964 * parse an XML file and call the given SAX handler routines.
9965 * Automatic support for ZLIB/Compress compressed document is provided
9966 *
9967 * Returns 0 in case of success or a error number otherwise
9968 */
9969int
9970xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9971 const char *filename) {
9972 int ret = 0;
9973 xmlParserCtxtPtr ctxt;
9974
9975 ctxt = xmlCreateFileParserCtxt(filename);
9976 if (ctxt == NULL) return -1;
9977 if (ctxt->sax != &xmlDefaultSAXHandler)
9978 xmlFree(ctxt->sax);
9979 ctxt->sax = sax;
9980 if (user_data != NULL)
9981 ctxt->userData = user_data;
9982
9983 xmlParseDocument(ctxt);
9984
9985 if (ctxt->wellFormed)
9986 ret = 0;
9987 else {
9988 if (ctxt->errNo != 0)
9989 ret = ctxt->errNo;
9990 else
9991 ret = -1;
9992 }
9993 if (sax != NULL)
9994 ctxt->sax = NULL;
9995 xmlFreeParserCtxt(ctxt);
9996
9997 return ret;
9998}
9999
10000/************************************************************************
10001 * *
10002 * Front ends when parsing from memory *
10003 * *
10004 ************************************************************************/
10005
10006/**
10007 * xmlCreateMemoryParserCtxt:
10008 * @buffer: a pointer to a char array
10009 * @size: the size of the array
10010 *
10011 * Create a parser context for an XML in-memory document.
10012 *
10013 * Returns the new parser context or NULL
10014 */
10015xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010016xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010017 xmlParserCtxtPtr ctxt;
10018 xmlParserInputPtr input;
10019 xmlParserInputBufferPtr buf;
10020
10021 if (buffer == NULL)
10022 return(NULL);
10023 if (size <= 0)
10024 return(NULL);
10025
10026 ctxt = xmlNewParserCtxt();
10027 if (ctxt == NULL)
10028 return(NULL);
10029
10030 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10031 if (buf == NULL) return(NULL);
10032
10033 input = xmlNewInputStream(ctxt);
10034 if (input == NULL) {
10035 xmlFreeParserCtxt(ctxt);
10036 return(NULL);
10037 }
10038
10039 input->filename = NULL;
10040 input->buf = buf;
10041 input->base = input->buf->buffer->content;
10042 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010043 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010044
10045 inputPush(ctxt, input);
10046 return(ctxt);
10047}
10048
10049/**
10050 * xmlSAXParseMemory:
10051 * @sax: the SAX handler block
10052 * @buffer: an pointer to a char array
10053 * @size: the size of the array
10054 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10055 * documents
10056 *
10057 * parse an XML in-memory block and use the given SAX function block
10058 * to handle the parsing callback. If sax is NULL, fallback to the default
10059 * DOM tree building routines.
10060 *
10061 * Returns the resulting document tree
10062 */
10063xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010064xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10065 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010066 xmlDocPtr ret;
10067 xmlParserCtxtPtr ctxt;
10068
10069 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10070 if (ctxt == NULL) return(NULL);
10071 if (sax != NULL) {
10072 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010073 }
10074
10075 xmlParseDocument(ctxt);
10076
10077 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10078 else {
10079 ret = NULL;
10080 xmlFreeDoc(ctxt->myDoc);
10081 ctxt->myDoc = NULL;
10082 }
10083 if (sax != NULL)
10084 ctxt->sax = NULL;
10085 xmlFreeParserCtxt(ctxt);
10086
10087 return(ret);
10088}
10089
10090/**
10091 * xmlParseMemory:
10092 * @buffer: an pointer to a char array
10093 * @size: the size of the array
10094 *
10095 * parse an XML in-memory block and build a tree.
10096 *
10097 * Returns the resulting document tree
10098 */
10099
Daniel Veillard50822cb2001-07-26 20:05:51 +000010100xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010101 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10102}
10103
10104/**
10105 * xmlRecoverMemory:
10106 * @buffer: an pointer to a char array
10107 * @size: the size of the array
10108 *
10109 * parse an XML in-memory block and build a tree.
10110 * In the case the document is not Well Formed, a tree is built anyway
10111 *
10112 * Returns the resulting document tree
10113 */
10114
Daniel Veillard50822cb2001-07-26 20:05:51 +000010115xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010116 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10117}
10118
10119/**
10120 * xmlSAXUserParseMemory:
10121 * @sax: a SAX handler
10122 * @user_data: The user data returned on SAX callbacks
10123 * @buffer: an in-memory XML document input
10124 * @size: the length of the XML document in bytes
10125 *
10126 * A better SAX parsing routine.
10127 * parse an XML in-memory buffer and call the given SAX handler routines.
10128 *
10129 * Returns 0 in case of success or a error number otherwise
10130 */
10131int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010132 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010133 int ret = 0;
10134 xmlParserCtxtPtr ctxt;
10135 xmlSAXHandlerPtr oldsax = NULL;
10136
10137 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10138 if (ctxt == NULL) return -1;
10139 if (sax != NULL) {
10140 oldsax = ctxt->sax;
10141 ctxt->sax = sax;
10142 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010143 if (user_data != NULL)
10144 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010145
10146 xmlParseDocument(ctxt);
10147
10148 if (ctxt->wellFormed)
10149 ret = 0;
10150 else {
10151 if (ctxt->errNo != 0)
10152 ret = ctxt->errNo;
10153 else
10154 ret = -1;
10155 }
10156 if (sax != NULL) {
10157 ctxt->sax = oldsax;
10158 }
10159 xmlFreeParserCtxt(ctxt);
10160
10161 return ret;
10162}
10163
10164/**
10165 * xmlCreateDocParserCtxt:
10166 * @cur: a pointer to an array of xmlChar
10167 *
10168 * Creates a parser context for an XML in-memory document.
10169 *
10170 * Returns the new parser context or NULL
10171 */
10172xmlParserCtxtPtr
10173xmlCreateDocParserCtxt(xmlChar *cur) {
10174 int len;
10175
10176 if (cur == NULL)
10177 return(NULL);
10178 len = xmlStrlen(cur);
10179 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10180}
10181
10182/**
10183 * xmlSAXParseDoc:
10184 * @sax: the SAX handler block
10185 * @cur: a pointer to an array of xmlChar
10186 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10187 * documents
10188 *
10189 * parse an XML in-memory document and build a tree.
10190 * It use the given SAX function block to handle the parsing callback.
10191 * If sax is NULL, fallback to the default DOM tree building routines.
10192 *
10193 * Returns the resulting document tree
10194 */
10195
10196xmlDocPtr
10197xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10198 xmlDocPtr ret;
10199 xmlParserCtxtPtr ctxt;
10200
10201 if (cur == NULL) return(NULL);
10202
10203
10204 ctxt = xmlCreateDocParserCtxt(cur);
10205 if (ctxt == NULL) return(NULL);
10206 if (sax != NULL) {
10207 ctxt->sax = sax;
10208 ctxt->userData = NULL;
10209 }
10210
10211 xmlParseDocument(ctxt);
10212 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10213 else {
10214 ret = NULL;
10215 xmlFreeDoc(ctxt->myDoc);
10216 ctxt->myDoc = NULL;
10217 }
10218 if (sax != NULL)
10219 ctxt->sax = NULL;
10220 xmlFreeParserCtxt(ctxt);
10221
10222 return(ret);
10223}
10224
10225/**
10226 * xmlParseDoc:
10227 * @cur: a pointer to an array of xmlChar
10228 *
10229 * parse an XML in-memory document and build a tree.
10230 *
10231 * Returns the resulting document tree
10232 */
10233
10234xmlDocPtr
10235xmlParseDoc(xmlChar *cur) {
10236 return(xmlSAXParseDoc(NULL, cur, 0));
10237}
10238
Daniel Veillard8107a222002-01-13 14:10:10 +000010239/************************************************************************
10240 * *
10241 * Specific function to keep track of entities references *
10242 * and used by the XSLT debugger *
10243 * *
10244 ************************************************************************/
10245
10246static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10247
10248/**
10249 * xmlAddEntityReference:
10250 * @ent : A valid entity
10251 * @firstNode : A valid first node for children of entity
10252 * @lastNode : A valid last node of children entity
10253 *
10254 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10255 */
10256static void
10257xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10258 xmlNodePtr lastNode)
10259{
10260 if (xmlEntityRefFunc != NULL) {
10261 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10262 }
10263}
10264
10265
10266/**
10267 * xmlSetEntityReferenceFunc:
10268 * @func : A valid function
10269 *
10270 * Set the function to call call back when a xml reference has been made
10271 */
10272void
10273xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10274{
10275 xmlEntityRefFunc = func;
10276}
Owen Taylor3473f882001-02-23 17:55:21 +000010277
10278/************************************************************************
10279 * *
10280 * Miscellaneous *
10281 * *
10282 ************************************************************************/
10283
10284#ifdef LIBXML_XPATH_ENABLED
10285#include <libxml/xpath.h>
10286#endif
10287
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010288extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010289static int xmlParserInitialized = 0;
10290
10291/**
10292 * xmlInitParser:
10293 *
10294 * Initialization function for the XML parser.
10295 * This is not reentrant. Call once before processing in case of
10296 * use in multithreaded programs.
10297 */
10298
10299void
10300xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010301 if (xmlParserInitialized != 0)
10302 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010303
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010304 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10305 (xmlGenericError == NULL))
10306 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010307 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010308 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010309 xmlInitCharEncodingHandlers();
10310 xmlInitializePredefinedEntities();
10311 xmlDefaultSAXHandlerInit();
10312 xmlRegisterDefaultInputCallbacks();
10313 xmlRegisterDefaultOutputCallbacks();
10314#ifdef LIBXML_HTML_ENABLED
10315 htmlInitAutoClose();
10316 htmlDefaultSAXHandlerInit();
10317#endif
10318#ifdef LIBXML_XPATH_ENABLED
10319 xmlXPathInit();
10320#endif
10321 xmlParserInitialized = 1;
10322}
10323
10324/**
10325 * xmlCleanupParser:
10326 *
10327 * Cleanup function for the XML parser. It tries to reclaim all
10328 * parsing related global memory allocated for the parser processing.
10329 * It doesn't deallocate any document related memory. Calling this
10330 * function should not prevent reusing the parser.
10331 */
10332
10333void
10334xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010335 xmlCleanupCharEncodingHandlers();
10336 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010337#ifdef LIBXML_CATALOG_ENABLED
10338 xmlCatalogCleanup();
10339#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010340 xmlCleanupThreads();
10341 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010342}