blob: d93de3ba2adfa6a9fd71513b26d927ad334745c1 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
83int xmlGetWarningsDefaultValue = 1;
84int xmlParserDebugEntities = 0;
85#ifdef VMS
86int xmlSubstituteEntitiesDefaultVal = 0;
87#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
88int xmlDoValidityCheckingDefaultVal = 0;
89#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
90#else
91int xmlSubstituteEntitiesDefaultValue = 0;
92int xmlDoValidityCheckingDefaultValue = 0;
93#endif
94int xmlLoadExtDtdDefaultValue = 0;
95int xmlPedanticParserDefaultValue = 0;
96int xmlKeepBlanksDefaultValue = 1;
97
98/*
99 * List of XML prefixed PI allowed by W3C specs
100 */
101
102const char *xmlW3CPIs[] = {
103 "xml-stylesheet",
104 NULL
105};
106
107/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
108void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110 const xmlChar **str);
111
Daniel Veillard257d9102001-05-08 10:41:44 +0000112static int
113xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlSAXHandlerPtr sax,
114 void *user_data, int depth, const xmlChar *URL,
115 const xmlChar *ID, xmlNodePtr *list, void *private);
Owen Taylor3473f882001-02-23 17:55:21 +0000116
117/************************************************************************
118 * *
119 * Parser stacks related functions and macros *
120 * *
121 ************************************************************************/
122
123xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
124 const xmlChar ** str);
125
126/*
127 * Generic function for accessing stacks in the Parser Context
128 */
129
130#define PUSH_AND_POP(scope, type, name) \
131scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
132 if (ctxt->name##Nr >= ctxt->name##Max) { \
133 ctxt->name##Max *= 2; \
134 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
135 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
136 if (ctxt->name##Tab == NULL) { \
137 xmlGenericError(xmlGenericErrorContext, \
138 "realloc failed !\n"); \
139 return(0); \
140 } \
141 } \
142 ctxt->name##Tab[ctxt->name##Nr] = value; \
143 ctxt->name = value; \
144 return(ctxt->name##Nr++); \
145} \
146scope type name##Pop(xmlParserCtxtPtr ctxt) { \
147 type ret; \
148 if (ctxt->name##Nr <= 0) return(0); \
149 ctxt->name##Nr--; \
150 if (ctxt->name##Nr > 0) \
151 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
152 else \
153 ctxt->name = NULL; \
154 ret = ctxt->name##Tab[ctxt->name##Nr]; \
155 ctxt->name##Tab[ctxt->name##Nr] = 0; \
156 return(ret); \
157} \
158
159/*
160 * Those macros actually generate the functions
161 */
162PUSH_AND_POP(extern, xmlParserInputPtr, input)
163PUSH_AND_POP(extern, xmlNodePtr, node)
164PUSH_AND_POP(extern, xmlChar*, name)
165
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000166static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000167 if (ctxt->spaceNr >= ctxt->spaceMax) {
168 ctxt->spaceMax *= 2;
169 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
170 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
171 if (ctxt->spaceTab == NULL) {
172 xmlGenericError(xmlGenericErrorContext,
173 "realloc failed !\n");
174 return(0);
175 }
176 }
177 ctxt->spaceTab[ctxt->spaceNr] = val;
178 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
179 return(ctxt->spaceNr++);
180}
181
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000182static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000183 int ret;
184 if (ctxt->spaceNr <= 0) return(0);
185 ctxt->spaceNr--;
186 if (ctxt->spaceNr > 0)
187 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
188 else
189 ctxt->space = NULL;
190 ret = ctxt->spaceTab[ctxt->spaceNr];
191 ctxt->spaceTab[ctxt->spaceNr] = -1;
192 return(ret);
193}
194
195/*
196 * Macros for accessing the content. Those should be used only by the parser,
197 * and not exported.
198 *
199 * Dirty macros, i.e. one often need to make assumption on the context to
200 * use them
201 *
202 * CUR_PTR return the current pointer to the xmlChar to be parsed.
203 * To be used with extreme caution since operations consuming
204 * characters may move the input buffer to a different location !
205 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
206 * This should be used internally by the parser
207 * only to compare to ASCII values otherwise it would break when
208 * running with UTF-8 encoding.
209 * RAW same as CUR but in the input buffer, bypass any token
210 * extraction that may have been done
211 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
212 * to compare on ASCII based substring.
213 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
214 * strings within the parser.
215 *
216 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
217 *
218 * NEXT Skip to the next character, this does the proper decoding
219 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
220 * NEXTL(l) Skip l xmlChars in the input buffer
221 * CUR_CHAR(l) returns the current unicode character (int), set l
222 * to the number of xmlChars used for the encoding [0-5].
223 * CUR_SCHAR same but operate on a string instead of the context
224 * COPY_BUF copy the current unicode char to the target buffer, increment
225 * the index
226 * GROW, SHRINK handling of input buffers
227 */
228
229#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
230#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
231#define NXT(val) ctxt->input->cur[(val)]
232#define CUR_PTR ctxt->input->cur
233
234#define SKIP(val) do { \
235 ctxt->nbChars += (val),ctxt->input->cur += (val); \
236 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000237 if ((*ctxt->input->cur == 0) && \
238 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
239 xmlPopInput(ctxt); \
240 } while (0)
241
Daniel Veillard48b2f892001-02-25 16:11:03 +0000242#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000243 xmlParserInputShrink(ctxt->input); \
244 if ((*ctxt->input->cur == 0) && \
245 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
246 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000247 }
Owen Taylor3473f882001-02-23 17:55:21 +0000248
Daniel Veillard48b2f892001-02-25 16:11:03 +0000249#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000250 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
251 if ((*ctxt->input->cur == 0) && \
252 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
253 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000254 }
Owen Taylor3473f882001-02-23 17:55:21 +0000255
256#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
257
258#define NEXT xmlNextChar(ctxt)
259
Daniel Veillard21a0f912001-02-25 19:54:14 +0000260#define NEXT1 { \
261 ctxt->input->cur++; \
262 ctxt->nbChars++; \
263 if (*ctxt->input->cur == 0) \
264 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
265 }
266
Owen Taylor3473f882001-02-23 17:55:21 +0000267#define NEXTL(l) do { \
268 if (*(ctxt->input->cur) == '\n') { \
269 ctxt->input->line++; ctxt->input->col = 1; \
270 } else ctxt->input->col++; \
271 ctxt->token = 0; ctxt->input->cur += l; \
272 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000273 } while (0)
274
275#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
276#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
277
278#define COPY_BUF(l,b,i,v) \
279 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000280 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000281
282/**
283 * xmlSkipBlankChars:
284 * @ctxt: the XML parser context
285 *
286 * skip all blanks character found at that point in the input streams.
287 * It pops up finished entities in the process if allowable at that point.
288 *
289 * Returns the number of space chars skipped
290 */
291
292int
293xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000294 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000295
Daniel Veillard02141ea2001-04-30 11:46:40 +0000296 if (ctxt->token != 0) {
297 if (!IS_BLANK(ctxt->token))
298 return(0);
299 ctxt->token = 0;
300 res++;
301 }
Owen Taylor3473f882001-02-23 17:55:21 +0000302 /*
303 * It's Okay to use CUR/NEXT here since all the blanks are on
304 * the ASCII range.
305 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000306 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
307 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000308 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000309 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000310 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000311 cur = ctxt->input->cur;
312 while (IS_BLANK(*cur)) {
313 if (*cur == '\n') {
314 ctxt->input->line++; ctxt->input->col = 1;
315 }
316 cur++;
317 res++;
318 if (*cur == 0) {
319 ctxt->input->cur = cur;
320 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
321 cur = ctxt->input->cur;
322 }
323 }
324 ctxt->input->cur = cur;
325 } else {
326 int cur;
327 do {
328 cur = CUR;
329 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
330 NEXT;
331 cur = CUR;
332 res++;
333 }
334 while ((cur == 0) && (ctxt->inputNr > 1) &&
335 (ctxt->instate != XML_PARSER_COMMENT)) {
336 xmlPopInput(ctxt);
337 cur = CUR;
338 }
339 /*
340 * Need to handle support of entities branching here
341 */
342 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
343 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
344 }
Owen Taylor3473f882001-02-23 17:55:21 +0000345 return(res);
346}
347
348/************************************************************************
349 * *
350 * Commodity functions to handle entities *
351 * *
352 ************************************************************************/
353
354/**
355 * xmlPopInput:
356 * @ctxt: an XML parser context
357 *
358 * xmlPopInput: the current input pointed by ctxt->input came to an end
359 * pop it and return the next char.
360 *
361 * Returns the current xmlChar in the parser context
362 */
363xmlChar
364xmlPopInput(xmlParserCtxtPtr ctxt) {
365 if (ctxt->inputNr == 1) return(0); /* End of main Input */
366 if (xmlParserDebugEntities)
367 xmlGenericError(xmlGenericErrorContext,
368 "Popping input %d\n", ctxt->inputNr);
369 xmlFreeInputStream(inputPop(ctxt));
370 if ((*ctxt->input->cur == 0) &&
371 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
372 return(xmlPopInput(ctxt));
373 return(CUR);
374}
375
376/**
377 * xmlPushInput:
378 * @ctxt: an XML parser context
379 * @input: an XML parser input fragment (entity, XML fragment ...).
380 *
381 * xmlPushInput: switch to a new input stream which is stacked on top
382 * of the previous one(s).
383 */
384void
385xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
386 if (input == NULL) return;
387
388 if (xmlParserDebugEntities) {
389 if ((ctxt->input != NULL) && (ctxt->input->filename))
390 xmlGenericError(xmlGenericErrorContext,
391 "%s(%d): ", ctxt->input->filename,
392 ctxt->input->line);
393 xmlGenericError(xmlGenericErrorContext,
394 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
395 }
396 inputPush(ctxt, input);
397 GROW;
398}
399
400/**
401 * xmlParseCharRef:
402 * @ctxt: an XML parser context
403 *
404 * parse Reference declarations
405 *
406 * [66] CharRef ::= '&#' [0-9]+ ';' |
407 * '&#x' [0-9a-fA-F]+ ';'
408 *
409 * [ WFC: Legal Character ]
410 * Characters referred to using character references must match the
411 * production for Char.
412 *
413 * Returns the value parsed (as an int), 0 in case of error
414 */
415int
416xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000417 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000418 int count = 0;
419
420 if (ctxt->token != 0) {
421 val = ctxt->token;
422 ctxt->token = 0;
423 return(val);
424 }
425 /*
426 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
427 */
428 if ((RAW == '&') && (NXT(1) == '#') &&
429 (NXT(2) == 'x')) {
430 SKIP(3);
431 GROW;
432 while (RAW != ';') { /* loop blocked by count */
433 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
434 val = val * 16 + (CUR - '0');
435 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
436 val = val * 16 + (CUR - 'a') + 10;
437 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
438 val = val * 16 + (CUR - 'A') + 10;
439 else {
440 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
442 ctxt->sax->error(ctxt->userData,
443 "xmlParseCharRef: invalid hexadecimal value\n");
444 ctxt->wellFormed = 0;
445 ctxt->disableSAX = 1;
446 val = 0;
447 break;
448 }
449 NEXT;
450 count++;
451 }
452 if (RAW == ';') {
453 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
454 ctxt->nbChars ++;
455 ctxt->input->cur++;
456 }
457 } else if ((RAW == '&') && (NXT(1) == '#')) {
458 SKIP(2);
459 GROW;
460 while (RAW != ';') { /* loop blocked by count */
461 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
462 val = val * 10 + (CUR - '0');
463 else {
464 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
466 ctxt->sax->error(ctxt->userData,
467 "xmlParseCharRef: invalid decimal value\n");
468 ctxt->wellFormed = 0;
469 ctxt->disableSAX = 1;
470 val = 0;
471 break;
472 }
473 NEXT;
474 count++;
475 }
476 if (RAW == ';') {
477 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
478 ctxt->nbChars ++;
479 ctxt->input->cur++;
480 }
481 } else {
482 ctxt->errNo = XML_ERR_INVALID_CHARREF;
483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
484 ctxt->sax->error(ctxt->userData,
485 "xmlParseCharRef: invalid value\n");
486 ctxt->wellFormed = 0;
487 ctxt->disableSAX = 1;
488 }
489
490 /*
491 * [ WFC: Legal Character ]
492 * Characters referred to using character references must match the
493 * production for Char.
494 */
495 if (IS_CHAR(val)) {
496 return(val);
497 } else {
498 ctxt->errNo = XML_ERR_INVALID_CHAR;
499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
500 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
501 val);
502 ctxt->wellFormed = 0;
503 ctxt->disableSAX = 1;
504 }
505 return(0);
506}
507
508/**
509 * xmlParseStringCharRef:
510 * @ctxt: an XML parser context
511 * @str: a pointer to an index in the string
512 *
513 * parse Reference declarations, variant parsing from a string rather
514 * than an an input flow.
515 *
516 * [66] CharRef ::= '&#' [0-9]+ ';' |
517 * '&#x' [0-9a-fA-F]+ ';'
518 *
519 * [ WFC: Legal Character ]
520 * Characters referred to using character references must match the
521 * production for Char.
522 *
523 * Returns the value parsed (as an int), 0 in case of error, str will be
524 * updated to the current value of the index
525 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000526static int
Owen Taylor3473f882001-02-23 17:55:21 +0000527xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
528 const xmlChar *ptr;
529 xmlChar cur;
530 int val = 0;
531
532 if ((str == NULL) || (*str == NULL)) return(0);
533 ptr = *str;
534 cur = *ptr;
535 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
536 ptr += 3;
537 cur = *ptr;
538 while (cur != ';') { /* Non input consuming loop */
539 if ((cur >= '0') && (cur <= '9'))
540 val = val * 16 + (cur - '0');
541 else if ((cur >= 'a') && (cur <= 'f'))
542 val = val * 16 + (cur - 'a') + 10;
543 else if ((cur >= 'A') && (cur <= 'F'))
544 val = val * 16 + (cur - 'A') + 10;
545 else {
546 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
548 ctxt->sax->error(ctxt->userData,
549 "xmlParseStringCharRef: invalid hexadecimal value\n");
550 ctxt->wellFormed = 0;
551 ctxt->disableSAX = 1;
552 val = 0;
553 break;
554 }
555 ptr++;
556 cur = *ptr;
557 }
558 if (cur == ';')
559 ptr++;
560 } else if ((cur == '&') && (ptr[1] == '#')){
561 ptr += 2;
562 cur = *ptr;
563 while (cur != ';') { /* Non input consuming loops */
564 if ((cur >= '0') && (cur <= '9'))
565 val = val * 10 + (cur - '0');
566 else {
567 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
569 ctxt->sax->error(ctxt->userData,
570 "xmlParseStringCharRef: invalid decimal value\n");
571 ctxt->wellFormed = 0;
572 ctxt->disableSAX = 1;
573 val = 0;
574 break;
575 }
576 ptr++;
577 cur = *ptr;
578 }
579 if (cur == ';')
580 ptr++;
581 } else {
582 ctxt->errNo = XML_ERR_INVALID_CHARREF;
583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
584 ctxt->sax->error(ctxt->userData,
585 "xmlParseCharRef: invalid value\n");
586 ctxt->wellFormed = 0;
587 ctxt->disableSAX = 1;
588 return(0);
589 }
590 *str = ptr;
591
592 /*
593 * [ WFC: Legal Character ]
594 * Characters referred to using character references must match the
595 * production for Char.
596 */
597 if (IS_CHAR(val)) {
598 return(val);
599 } else {
600 ctxt->errNo = XML_ERR_INVALID_CHAR;
601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
602 ctxt->sax->error(ctxt->userData,
603 "CharRef: invalid xmlChar value %d\n", val);
604 ctxt->wellFormed = 0;
605 ctxt->disableSAX = 1;
606 }
607 return(0);
608}
609
610/**
611 * xmlParserHandlePEReference:
612 * @ctxt: the parser context
613 *
614 * [69] PEReference ::= '%' Name ';'
615 *
616 * [ WFC: No Recursion ]
617 * A parsed entity must not contain a recursive
618 * reference to itself, either directly or indirectly.
619 *
620 * [ WFC: Entity Declared ]
621 * In a document without any DTD, a document with only an internal DTD
622 * subset which contains no parameter entity references, or a document
623 * with "standalone='yes'", ... ... The declaration of a parameter
624 * entity must precede any reference to it...
625 *
626 * [ VC: Entity Declared ]
627 * In a document with an external subset or external parameter entities
628 * with "standalone='no'", ... ... The declaration of a parameter entity
629 * must precede any reference to it...
630 *
631 * [ WFC: In DTD ]
632 * Parameter-entity references may only appear in the DTD.
633 * NOTE: misleading but this is handled.
634 *
635 * A PEReference may have been detected in the current input stream
636 * the handling is done accordingly to
637 * http://www.w3.org/TR/REC-xml#entproc
638 * i.e.
639 * - Included in literal in entity values
640 * - Included as Paraemeter Entity reference within DTDs
641 */
642void
643xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
644 xmlChar *name;
645 xmlEntityPtr entity = NULL;
646 xmlParserInputPtr input;
647
648 if (ctxt->token != 0) {
649 return;
650 }
651 if (RAW != '%') return;
652 switch(ctxt->instate) {
653 case XML_PARSER_CDATA_SECTION:
654 return;
655 case XML_PARSER_COMMENT:
656 return;
657 case XML_PARSER_START_TAG:
658 return;
659 case XML_PARSER_END_TAG:
660 return;
661 case XML_PARSER_EOF:
662 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
664 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
665 ctxt->wellFormed = 0;
666 ctxt->disableSAX = 1;
667 return;
668 case XML_PARSER_PROLOG:
669 case XML_PARSER_START:
670 case XML_PARSER_MISC:
671 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
673 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
674 ctxt->wellFormed = 0;
675 ctxt->disableSAX = 1;
676 return;
677 case XML_PARSER_ENTITY_DECL:
678 case XML_PARSER_CONTENT:
679 case XML_PARSER_ATTRIBUTE_VALUE:
680 case XML_PARSER_PI:
681 case XML_PARSER_SYSTEM_LITERAL:
682 /* we just ignore it there */
683 return;
684 case XML_PARSER_EPILOG:
685 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
687 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
688 ctxt->wellFormed = 0;
689 ctxt->disableSAX = 1;
690 return;
691 case XML_PARSER_ENTITY_VALUE:
692 /*
693 * NOTE: in the case of entity values, we don't do the
694 * substitution here since we need the literal
695 * entity value to be able to save the internal
696 * subset of the document.
697 * This will be handled by xmlStringDecodeEntities
698 */
699 return;
700 case XML_PARSER_DTD:
701 /*
702 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
703 * In the internal DTD subset, parameter-entity references
704 * can occur only where markup declarations can occur, not
705 * within markup declarations.
706 * In that case this is handled in xmlParseMarkupDecl
707 */
708 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
709 return;
710 break;
711 case XML_PARSER_IGNORE:
712 return;
713 }
714
715 NEXT;
716 name = xmlParseName(ctxt);
717 if (xmlParserDebugEntities)
718 xmlGenericError(xmlGenericErrorContext,
719 "PE Reference: %s\n", name);
720 if (name == NULL) {
721 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
723 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
724 ctxt->wellFormed = 0;
725 ctxt->disableSAX = 1;
726 } else {
727 if (RAW == ';') {
728 NEXT;
729 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
730 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
731 if (entity == NULL) {
732
733 /*
734 * [ WFC: Entity Declared ]
735 * In a document without any DTD, a document with only an
736 * internal DTD subset which contains no parameter entity
737 * references, or a document with "standalone='yes'", ...
738 * ... The declaration of a parameter entity must precede
739 * any reference to it...
740 */
741 if ((ctxt->standalone == 1) ||
742 ((ctxt->hasExternalSubset == 0) &&
743 (ctxt->hasPErefs == 0))) {
744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
745 ctxt->sax->error(ctxt->userData,
746 "PEReference: %%%s; not found\n", name);
747 ctxt->wellFormed = 0;
748 ctxt->disableSAX = 1;
749 } else {
750 /*
751 * [ VC: Entity Declared ]
752 * In a document with an external subset or external
753 * parameter entities with "standalone='no'", ...
754 * ... The declaration of a parameter entity must precede
755 * any reference to it...
756 */
757 if ((!ctxt->disableSAX) &&
758 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
759 ctxt->vctxt.error(ctxt->vctxt.userData,
760 "PEReference: %%%s; not found\n", name);
761 } else if ((!ctxt->disableSAX) &&
762 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
763 ctxt->sax->warning(ctxt->userData,
764 "PEReference: %%%s; not found\n", name);
765 ctxt->valid = 0;
766 }
767 } else {
768 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
769 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
770 /*
771 * handle the extra spaces added before and after
772 * c.f. http://www.w3.org/TR/REC-xml#as-PE
773 * this is done independantly.
774 */
775 input = xmlNewEntityInputStream(ctxt, entity);
776 xmlPushInput(ctxt, input);
777 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
778 (RAW == '<') && (NXT(1) == '?') &&
779 (NXT(2) == 'x') && (NXT(3) == 'm') &&
780 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
781 xmlParseTextDecl(ctxt);
782 }
783 if (ctxt->token == 0)
784 ctxt->token = ' ';
785 } else {
786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
787 ctxt->sax->error(ctxt->userData,
788 "xmlHandlePEReference: %s is not a parameter entity\n",
789 name);
790 ctxt->wellFormed = 0;
791 ctxt->disableSAX = 1;
792 }
793 }
794 } else {
795 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
797 ctxt->sax->error(ctxt->userData,
798 "xmlHandlePEReference: expecting ';'\n");
799 ctxt->wellFormed = 0;
800 ctxt->disableSAX = 1;
801 }
802 xmlFree(name);
803 }
804}
805
806/*
807 * Macro used to grow the current buffer.
808 */
809#define growBuffer(buffer) { \
810 buffer##_size *= 2; \
811 buffer = (xmlChar *) \
812 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
813 if (buffer == NULL) { \
814 perror("realloc failed"); \
815 return(NULL); \
816 } \
817}
818
819/**
820 * xmlStringDecodeEntities:
821 * @ctxt: the parser context
822 * @str: the input string
823 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
824 * @end: an end marker xmlChar, 0 if none
825 * @end2: an end marker xmlChar, 0 if none
826 * @end3: an end marker xmlChar, 0 if none
827 *
828 * Takes a entity string content and process to do the adequate subtitutions.
829 *
830 * [67] Reference ::= EntityRef | CharRef
831 *
832 * [69] PEReference ::= '%' Name ';'
833 *
834 * Returns A newly allocated string with the substitution done. The caller
835 * must deallocate it !
836 */
837xmlChar *
838xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
839 xmlChar end, xmlChar end2, xmlChar end3) {
840 xmlChar *buffer = NULL;
841 int buffer_size = 0;
842
843 xmlChar *current = NULL;
844 xmlEntityPtr ent;
845 int c,l;
846 int nbchars = 0;
847
848 if (str == NULL)
849 return(NULL);
850
851 if (ctxt->depth > 40) {
852 ctxt->errNo = XML_ERR_ENTITY_LOOP;
853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
854 ctxt->sax->error(ctxt->userData,
855 "Detected entity reference loop\n");
856 ctxt->wellFormed = 0;
857 ctxt->disableSAX = 1;
858 return(NULL);
859 }
860
861 /*
862 * allocate a translation buffer.
863 */
864 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
865 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
866 if (buffer == NULL) {
867 perror("xmlDecodeEntities: malloc failed");
868 return(NULL);
869 }
870
871 /*
872 * Ok loop until we reach one of the ending char or a size limit.
873 * we are operating on already parsed values.
874 */
875 c = CUR_SCHAR(str, l);
876 while ((c != 0) && (c != end) && /* non input consuming loop */
877 (c != end2) && (c != end3)) {
878
879 if (c == 0) break;
880 if ((c == '&') && (str[1] == '#')) {
881 int val = xmlParseStringCharRef(ctxt, &str);
882 if (val != 0) {
883 COPY_BUF(0,buffer,nbchars,val);
884 }
885 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
886 if (xmlParserDebugEntities)
887 xmlGenericError(xmlGenericErrorContext,
888 "String decoding Entity Reference: %.30s\n",
889 str);
890 ent = xmlParseStringEntityRef(ctxt, &str);
891 if ((ent != NULL) &&
892 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
893 if (ent->content != NULL) {
894 COPY_BUF(0,buffer,nbchars,ent->content[0]);
895 } else {
896 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
897 ctxt->sax->error(ctxt->userData,
898 "internal error entity has no content\n");
899 }
900 } else if ((ent != NULL) && (ent->content != NULL)) {
901 xmlChar *rep;
902
903 ctxt->depth++;
904 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
905 0, 0, 0);
906 ctxt->depth--;
907 if (rep != NULL) {
908 current = rep;
909 while (*current != 0) { /* non input consuming loop */
910 buffer[nbchars++] = *current++;
911 if (nbchars >
912 buffer_size - XML_PARSER_BUFFER_SIZE) {
913 growBuffer(buffer);
914 }
915 }
916 xmlFree(rep);
917 }
918 } else if (ent != NULL) {
919 int i = xmlStrlen(ent->name);
920 const xmlChar *cur = ent->name;
921
922 buffer[nbchars++] = '&';
923 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
924 growBuffer(buffer);
925 }
926 for (;i > 0;i--)
927 buffer[nbchars++] = *cur++;
928 buffer[nbchars++] = ';';
929 }
930 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
931 if (xmlParserDebugEntities)
932 xmlGenericError(xmlGenericErrorContext,
933 "String decoding PE Reference: %.30s\n", str);
934 ent = xmlParseStringPEReference(ctxt, &str);
935 if (ent != NULL) {
936 xmlChar *rep;
937
938 ctxt->depth++;
939 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
940 0, 0, 0);
941 ctxt->depth--;
942 if (rep != NULL) {
943 current = rep;
944 while (*current != 0) { /* non input consuming loop */
945 buffer[nbchars++] = *current++;
946 if (nbchars >
947 buffer_size - XML_PARSER_BUFFER_SIZE) {
948 growBuffer(buffer);
949 }
950 }
951 xmlFree(rep);
952 }
953 }
954 } else {
955 COPY_BUF(l,buffer,nbchars,c);
956 str += l;
957 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
958 growBuffer(buffer);
959 }
960 }
961 c = CUR_SCHAR(str, l);
962 }
963 buffer[nbchars++] = 0;
964 return(buffer);
965}
966
967
968/************************************************************************
969 * *
970 * Commodity functions to handle xmlChars *
971 * *
972 ************************************************************************/
973
974/**
975 * xmlStrndup:
976 * @cur: the input xmlChar *
977 * @len: the len of @cur
978 *
979 * a strndup for array of xmlChar's
980 *
981 * Returns a new xmlChar * or NULL
982 */
983xmlChar *
984xmlStrndup(const xmlChar *cur, int len) {
985 xmlChar *ret;
986
987 if ((cur == NULL) || (len < 0)) return(NULL);
988 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
989 if (ret == NULL) {
990 xmlGenericError(xmlGenericErrorContext,
991 "malloc of %ld byte failed\n",
992 (len + 1) * (long)sizeof(xmlChar));
993 return(NULL);
994 }
995 memcpy(ret, cur, len * sizeof(xmlChar));
996 ret[len] = 0;
997 return(ret);
998}
999
1000/**
1001 * xmlStrdup:
1002 * @cur: the input xmlChar *
1003 *
1004 * a strdup for array of xmlChar's. Since they are supposed to be
1005 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1006 * a termination mark of '0'.
1007 *
1008 * Returns a new xmlChar * or NULL
1009 */
1010xmlChar *
1011xmlStrdup(const xmlChar *cur) {
1012 const xmlChar *p = cur;
1013
1014 if (cur == NULL) return(NULL);
1015 while (*p != 0) p++; /* non input consuming */
1016 return(xmlStrndup(cur, p - cur));
1017}
1018
1019/**
1020 * xmlCharStrndup:
1021 * @cur: the input char *
1022 * @len: the len of @cur
1023 *
1024 * a strndup for char's to xmlChar's
1025 *
1026 * Returns a new xmlChar * or NULL
1027 */
1028
1029xmlChar *
1030xmlCharStrndup(const char *cur, int len) {
1031 int i;
1032 xmlChar *ret;
1033
1034 if ((cur == NULL) || (len < 0)) return(NULL);
1035 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1036 if (ret == NULL) {
1037 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1038 (len + 1) * (long)sizeof(xmlChar));
1039 return(NULL);
1040 }
1041 for (i = 0;i < len;i++)
1042 ret[i] = (xmlChar) cur[i];
1043 ret[len] = 0;
1044 return(ret);
1045}
1046
1047/**
1048 * xmlCharStrdup:
1049 * @cur: the input char *
1050 * @len: the len of @cur
1051 *
1052 * a strdup for char's to xmlChar's
1053 *
1054 * Returns a new xmlChar * or NULL
1055 */
1056
1057xmlChar *
1058xmlCharStrdup(const char *cur) {
1059 const char *p = cur;
1060
1061 if (cur == NULL) return(NULL);
1062 while (*p != '\0') p++; /* non input consuming */
1063 return(xmlCharStrndup(cur, p - cur));
1064}
1065
1066/**
1067 * xmlStrcmp:
1068 * @str1: the first xmlChar *
1069 * @str2: the second xmlChar *
1070 *
1071 * a strcmp for xmlChar's
1072 *
1073 * Returns the integer result of the comparison
1074 */
1075
1076int
1077xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1078 register int tmp;
1079
1080 if (str1 == str2) return(0);
1081 if (str1 == NULL) return(-1);
1082 if (str2 == NULL) return(1);
1083 do {
1084 tmp = *str1++ - *str2;
1085 if (tmp != 0) return(tmp);
1086 } while (*str2++ != 0);
1087 return 0;
1088}
1089
1090/**
1091 * xmlStrEqual:
1092 * @str1: the first xmlChar *
1093 * @str2: the second xmlChar *
1094 *
1095 * Check if both string are equal of have same content
1096 * Should be a bit more readable and faster than xmlStrEqual()
1097 *
1098 * Returns 1 if they are equal, 0 if they are different
1099 */
1100
1101int
1102xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1103 if (str1 == str2) return(1);
1104 if (str1 == NULL) return(0);
1105 if (str2 == NULL) return(0);
1106 do {
1107 if (*str1++ != *str2) return(0);
1108 } while (*str2++);
1109 return(1);
1110}
1111
1112/**
1113 * xmlStrncmp:
1114 * @str1: the first xmlChar *
1115 * @str2: the second xmlChar *
1116 * @len: the max comparison length
1117 *
1118 * a strncmp for xmlChar's
1119 *
1120 * Returns the integer result of the comparison
1121 */
1122
1123int
1124xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1125 register int tmp;
1126
1127 if (len <= 0) return(0);
1128 if (str1 == str2) return(0);
1129 if (str1 == NULL) return(-1);
1130 if (str2 == NULL) return(1);
1131 do {
1132 tmp = *str1++ - *str2;
1133 if (tmp != 0 || --len == 0) return(tmp);
1134 } while (*str2++ != 0);
1135 return 0;
1136}
1137
1138static xmlChar casemap[256] = {
1139 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1140 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1141 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1142 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1143 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1144 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1145 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1146 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1147 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1148 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1149 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1150 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1151 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1152 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1153 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1154 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1155 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1156 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1157 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1158 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1159 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1160 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1161 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1162 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1163 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1164 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1165 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1166 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1167 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1168 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1169 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1170 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1171};
1172
1173/**
1174 * xmlStrcasecmp:
1175 * @str1: the first xmlChar *
1176 * @str2: the second xmlChar *
1177 *
1178 * a strcasecmp for xmlChar's
1179 *
1180 * Returns the integer result of the comparison
1181 */
1182
1183int
1184xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1185 register int tmp;
1186
1187 if (str1 == str2) return(0);
1188 if (str1 == NULL) return(-1);
1189 if (str2 == NULL) return(1);
1190 do {
1191 tmp = casemap[*str1++] - casemap[*str2];
1192 if (tmp != 0) return(tmp);
1193 } while (*str2++ != 0);
1194 return 0;
1195}
1196
1197/**
1198 * xmlStrncasecmp:
1199 * @str1: the first xmlChar *
1200 * @str2: the second xmlChar *
1201 * @len: the max comparison length
1202 *
1203 * a strncasecmp for xmlChar's
1204 *
1205 * Returns the integer result of the comparison
1206 */
1207
1208int
1209xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1210 register int tmp;
1211
1212 if (len <= 0) return(0);
1213 if (str1 == str2) return(0);
1214 if (str1 == NULL) return(-1);
1215 if (str2 == NULL) return(1);
1216 do {
1217 tmp = casemap[*str1++] - casemap[*str2];
1218 if (tmp != 0 || --len == 0) return(tmp);
1219 } while (*str2++ != 0);
1220 return 0;
1221}
1222
1223/**
1224 * xmlStrchr:
1225 * @str: the xmlChar * array
1226 * @val: the xmlChar to search
1227 *
1228 * a strchr for xmlChar's
1229 *
1230 * Returns the xmlChar * for the first occurence or NULL.
1231 */
1232
1233const xmlChar *
1234xmlStrchr(const xmlChar *str, xmlChar val) {
1235 if (str == NULL) return(NULL);
1236 while (*str != 0) { /* non input consuming */
1237 if (*str == val) return((xmlChar *) str);
1238 str++;
1239 }
1240 return(NULL);
1241}
1242
1243/**
1244 * xmlStrstr:
1245 * @str: the xmlChar * array (haystack)
1246 * @val: the xmlChar to search (needle)
1247 *
1248 * a strstr for xmlChar's
1249 *
1250 * Returns the xmlChar * for the first occurence or NULL.
1251 */
1252
1253const xmlChar *
1254xmlStrstr(const xmlChar *str, xmlChar *val) {
1255 int n;
1256
1257 if (str == NULL) return(NULL);
1258 if (val == NULL) return(NULL);
1259 n = xmlStrlen(val);
1260
1261 if (n == 0) return(str);
1262 while (*str != 0) { /* non input consuming */
1263 if (*str == *val) {
1264 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1265 }
1266 str++;
1267 }
1268 return(NULL);
1269}
1270
1271/**
1272 * xmlStrcasestr:
1273 * @str: the xmlChar * array (haystack)
1274 * @val: the xmlChar to search (needle)
1275 *
1276 * a case-ignoring strstr for xmlChar's
1277 *
1278 * Returns the xmlChar * for the first occurence or NULL.
1279 */
1280
1281const xmlChar *
1282xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1283 int n;
1284
1285 if (str == NULL) return(NULL);
1286 if (val == NULL) return(NULL);
1287 n = xmlStrlen(val);
1288
1289 if (n == 0) return(str);
1290 while (*str != 0) { /* non input consuming */
1291 if (casemap[*str] == casemap[*val])
1292 if (!xmlStrncasecmp(str, val, n)) return(str);
1293 str++;
1294 }
1295 return(NULL);
1296}
1297
1298/**
1299 * xmlStrsub:
1300 * @str: the xmlChar * array (haystack)
1301 * @start: the index of the first char (zero based)
1302 * @len: the length of the substring
1303 *
1304 * Extract a substring of a given string
1305 *
1306 * Returns the xmlChar * for the first occurence or NULL.
1307 */
1308
1309xmlChar *
1310xmlStrsub(const xmlChar *str, int start, int len) {
1311 int i;
1312
1313 if (str == NULL) return(NULL);
1314 if (start < 0) return(NULL);
1315 if (len < 0) return(NULL);
1316
1317 for (i = 0;i < start;i++) {
1318 if (*str == 0) return(NULL);
1319 str++;
1320 }
1321 if (*str == 0) return(NULL);
1322 return(xmlStrndup(str, len));
1323}
1324
1325/**
1326 * xmlStrlen:
1327 * @str: the xmlChar * array
1328 *
1329 * length of a xmlChar's string
1330 *
1331 * Returns the number of xmlChar contained in the ARRAY.
1332 */
1333
1334int
1335xmlStrlen(const xmlChar *str) {
1336 int len = 0;
1337
1338 if (str == NULL) return(0);
1339 while (*str != 0) { /* non input consuming */
1340 str++;
1341 len++;
1342 }
1343 return(len);
1344}
1345
1346/**
1347 * xmlStrncat:
1348 * @cur: the original xmlChar * array
1349 * @add: the xmlChar * array added
1350 * @len: the length of @add
1351 *
1352 * a strncat for array of xmlChar's, it will extend cur with the len
1353 * first bytes of @add.
1354 *
1355 * Returns a new xmlChar *, the original @cur is reallocated if needed
1356 * and should not be freed
1357 */
1358
1359xmlChar *
1360xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1361 int size;
1362 xmlChar *ret;
1363
1364 if ((add == NULL) || (len == 0))
1365 return(cur);
1366 if (cur == NULL)
1367 return(xmlStrndup(add, len));
1368
1369 size = xmlStrlen(cur);
1370 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1371 if (ret == NULL) {
1372 xmlGenericError(xmlGenericErrorContext,
1373 "xmlStrncat: realloc of %ld byte failed\n",
1374 (size + len + 1) * (long)sizeof(xmlChar));
1375 return(cur);
1376 }
1377 memcpy(&ret[size], add, len * sizeof(xmlChar));
1378 ret[size + len] = 0;
1379 return(ret);
1380}
1381
1382/**
1383 * xmlStrcat:
1384 * @cur: the original xmlChar * array
1385 * @add: the xmlChar * array added
1386 *
1387 * a strcat for array of xmlChar's. Since they are supposed to be
1388 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1389 * a termination mark of '0'.
1390 *
1391 * Returns a new xmlChar * containing the concatenated string.
1392 */
1393xmlChar *
1394xmlStrcat(xmlChar *cur, const xmlChar *add) {
1395 const xmlChar *p = add;
1396
1397 if (add == NULL) return(cur);
1398 if (cur == NULL)
1399 return(xmlStrdup(add));
1400
1401 while (*p != 0) p++; /* non input consuming */
1402 return(xmlStrncat(cur, add, p - add));
1403}
1404
1405/************************************************************************
1406 * *
1407 * Commodity functions, cleanup needed ? *
1408 * *
1409 ************************************************************************/
1410
1411/**
1412 * areBlanks:
1413 * @ctxt: an XML parser context
1414 * @str: a xmlChar *
1415 * @len: the size of @str
1416 *
1417 * Is this a sequence of blank chars that one can ignore ?
1418 *
1419 * Returns 1 if ignorable 0 otherwise.
1420 */
1421
1422static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1423 int i, ret;
1424 xmlNodePtr lastChild;
1425
Daniel Veillard2f362242001-03-02 17:36:21 +00001426 if (ctxt->keepBlanks)
1427 return(0);
1428
Owen Taylor3473f882001-02-23 17:55:21 +00001429 /*
1430 * Check for xml:space value.
1431 */
1432 if (*(ctxt->space) == 1)
1433 return(0);
1434
1435 /*
1436 * Check that the string is made of blanks
1437 */
1438 for (i = 0;i < len;i++)
1439 if (!(IS_BLANK(str[i]))) return(0);
1440
1441 /*
1442 * Look if the element is mixed content in the Dtd if available
1443 */
1444 if (ctxt->myDoc != NULL) {
1445 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1446 if (ret == 0) return(1);
1447 if (ret == 1) return(0);
1448 }
1449
1450 /*
1451 * Otherwise, heuristic :-\
1452 */
Owen Taylor3473f882001-02-23 17:55:21 +00001453 if (RAW != '<') return(0);
1454 if (ctxt->node == NULL) return(0);
1455 if ((ctxt->node->children == NULL) &&
1456 (RAW == '<') && (NXT(1) == '/')) return(0);
1457
1458 lastChild = xmlGetLastChild(ctxt->node);
1459 if (lastChild == NULL) {
1460 if (ctxt->node->content != NULL) return(0);
1461 } else if (xmlNodeIsText(lastChild))
1462 return(0);
1463 else if ((ctxt->node->children != NULL) &&
1464 (xmlNodeIsText(ctxt->node->children)))
1465 return(0);
1466 return(1);
1467}
1468
1469/*
1470 * Forward definition for recusive behaviour.
1471 */
1472void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1473void xmlParseReference(xmlParserCtxtPtr ctxt);
1474
1475/************************************************************************
1476 * *
1477 * Extra stuff for namespace support *
1478 * Relates to http://www.w3.org/TR/WD-xml-names *
1479 * *
1480 ************************************************************************/
1481
1482/**
1483 * xmlSplitQName:
1484 * @ctxt: an XML parser context
1485 * @name: an XML parser context
1486 * @prefix: a xmlChar **
1487 *
1488 * parse an UTF8 encoded XML qualified name string
1489 *
1490 * [NS 5] QName ::= (Prefix ':')? LocalPart
1491 *
1492 * [NS 6] Prefix ::= NCName
1493 *
1494 * [NS 7] LocalPart ::= NCName
1495 *
1496 * Returns the local part, and prefix is updated
1497 * to get the Prefix if any.
1498 */
1499
1500xmlChar *
1501xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1502 xmlChar buf[XML_MAX_NAMELEN + 5];
1503 xmlChar *buffer = NULL;
1504 int len = 0;
1505 int max = XML_MAX_NAMELEN;
1506 xmlChar *ret = NULL;
1507 const xmlChar *cur = name;
1508 int c;
1509
1510 *prefix = NULL;
1511
1512#ifndef XML_XML_NAMESPACE
1513 /* xml: prefix is not really a namespace */
1514 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1515 (cur[2] == 'l') && (cur[3] == ':'))
1516 return(xmlStrdup(name));
1517#endif
1518
1519 /* nasty but valid */
1520 if (cur[0] == ':')
1521 return(xmlStrdup(name));
1522
1523 c = *cur++;
1524 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1525 buf[len++] = c;
1526 c = *cur++;
1527 }
1528 if (len >= max) {
1529 /*
1530 * Okay someone managed to make a huge name, so he's ready to pay
1531 * for the processing speed.
1532 */
1533 max = len * 2;
1534
1535 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1536 if (buffer == NULL) {
1537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1538 ctxt->sax->error(ctxt->userData,
1539 "xmlSplitQName: out of memory\n");
1540 return(NULL);
1541 }
1542 memcpy(buffer, buf, len);
1543 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1544 if (len + 10 > max) {
1545 max *= 2;
1546 buffer = (xmlChar *) xmlRealloc(buffer,
1547 max * sizeof(xmlChar));
1548 if (buffer == NULL) {
1549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1550 ctxt->sax->error(ctxt->userData,
1551 "xmlSplitQName: out of memory\n");
1552 return(NULL);
1553 }
1554 }
1555 buffer[len++] = c;
1556 c = *cur++;
1557 }
1558 buffer[len] = 0;
1559 }
1560
1561 if (buffer == NULL)
1562 ret = xmlStrndup(buf, len);
1563 else {
1564 ret = buffer;
1565 buffer = NULL;
1566 max = XML_MAX_NAMELEN;
1567 }
1568
1569
1570 if (c == ':') {
1571 c = *cur++;
1572 if (c == 0) return(ret);
1573 *prefix = ret;
1574 len = 0;
1575
1576 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1577 buf[len++] = c;
1578 c = *cur++;
1579 }
1580 if (len >= max) {
1581 /*
1582 * Okay someone managed to make a huge name, so he's ready to pay
1583 * for the processing speed.
1584 */
1585 max = len * 2;
1586
1587 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1588 if (buffer == NULL) {
1589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1590 ctxt->sax->error(ctxt->userData,
1591 "xmlSplitQName: out of memory\n");
1592 return(NULL);
1593 }
1594 memcpy(buffer, buf, len);
1595 while (c != 0) { /* tested bigname2.xml */
1596 if (len + 10 > max) {
1597 max *= 2;
1598 buffer = (xmlChar *) xmlRealloc(buffer,
1599 max * sizeof(xmlChar));
1600 if (buffer == NULL) {
1601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1602 ctxt->sax->error(ctxt->userData,
1603 "xmlSplitQName: out of memory\n");
1604 return(NULL);
1605 }
1606 }
1607 buffer[len++] = c;
1608 c = *cur++;
1609 }
1610 buffer[len] = 0;
1611 }
1612
1613 if (buffer == NULL)
1614 ret = xmlStrndup(buf, len);
1615 else {
1616 ret = buffer;
1617 }
1618 }
1619
1620 return(ret);
1621}
1622
1623/************************************************************************
1624 * *
1625 * The parser itself *
1626 * Relates to http://www.w3.org/TR/REC-xml *
1627 * *
1628 ************************************************************************/
1629
Daniel Veillard21a0f912001-02-25 19:54:14 +00001630xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001631/**
1632 * xmlParseName:
1633 * @ctxt: an XML parser context
1634 *
1635 * parse an XML name.
1636 *
1637 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1638 * CombiningChar | Extender
1639 *
1640 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1641 *
1642 * [6] Names ::= Name (S Name)*
1643 *
1644 * Returns the Name parsed or NULL
1645 */
1646
1647xmlChar *
1648xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001649 const xmlChar *in;
1650 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001651 int count = 0;
1652
1653 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001654
1655 /*
1656 * Accelerator for simple ASCII names
1657 */
1658 in = ctxt->input->cur;
1659 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1660 ((*in >= 0x41) && (*in <= 0x5A)) ||
1661 (*in == '_') || (*in == ':')) {
1662 in++;
1663 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1664 ((*in >= 0x41) && (*in <= 0x5A)) ||
1665 ((*in >= 0x30) && (*in <= 0x39)) ||
1666 (*in == '_') || (*in == ':'))
1667 in++;
1668 if ((*in == ' ') || (*in == '>') || (*in == '/')) {
1669 count = in - ctxt->input->cur;
1670 ret = xmlStrndup(ctxt->input->cur, count);
1671 ctxt->input->cur = in;
1672 return(ret);
1673 }
1674 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001675 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001676}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001677
Daniel Veillard21a0f912001-02-25 19:54:14 +00001678xmlChar *
1679xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1680 xmlChar buf[XML_MAX_NAMELEN + 5];
1681 int len = 0, l;
1682 int c;
1683 int count = 0;
1684
1685 /*
1686 * Handler for more complex cases
1687 */
1688 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001689 c = CUR_CHAR(l);
1690 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1691 (!IS_LETTER(c) && (c != '_') &&
1692 (c != ':'))) {
1693 return(NULL);
1694 }
1695
1696 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1697 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1698 (c == '.') || (c == '-') ||
1699 (c == '_') || (c == ':') ||
1700 (IS_COMBINING(c)) ||
1701 (IS_EXTENDER(c)))) {
1702 if (count++ > 100) {
1703 count = 0;
1704 GROW;
1705 }
1706 COPY_BUF(l,buf,len,c);
1707 NEXTL(l);
1708 c = CUR_CHAR(l);
1709 if (len >= XML_MAX_NAMELEN) {
1710 /*
1711 * Okay someone managed to make a huge name, so he's ready to pay
1712 * for the processing speed.
1713 */
1714 xmlChar *buffer;
1715 int max = len * 2;
1716
1717 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1718 if (buffer == NULL) {
1719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1720 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001721 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001722 return(NULL);
1723 }
1724 memcpy(buffer, buf, len);
1725 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1726 (c == '.') || (c == '-') ||
1727 (c == '_') || (c == ':') ||
1728 (IS_COMBINING(c)) ||
1729 (IS_EXTENDER(c))) {
1730 if (count++ > 100) {
1731 count = 0;
1732 GROW;
1733 }
1734 if (len + 10 > max) {
1735 max *= 2;
1736 buffer = (xmlChar *) xmlRealloc(buffer,
1737 max * sizeof(xmlChar));
1738 if (buffer == NULL) {
1739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1740 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001741 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001742 return(NULL);
1743 }
1744 }
1745 COPY_BUF(l,buffer,len,c);
1746 NEXTL(l);
1747 c = CUR_CHAR(l);
1748 }
1749 buffer[len] = 0;
1750 return(buffer);
1751 }
1752 }
1753 return(xmlStrndup(buf, len));
1754}
1755
1756/**
1757 * xmlParseStringName:
1758 * @ctxt: an XML parser context
1759 * @str: a pointer to the string pointer (IN/OUT)
1760 *
1761 * parse an XML name.
1762 *
1763 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1764 * CombiningChar | Extender
1765 *
1766 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1767 *
1768 * [6] Names ::= Name (S Name)*
1769 *
1770 * Returns the Name parsed or NULL. The str pointer
1771 * is updated to the current location in the string.
1772 */
1773
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001774static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001775xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1776 xmlChar buf[XML_MAX_NAMELEN + 5];
1777 const xmlChar *cur = *str;
1778 int len = 0, l;
1779 int c;
1780
1781 c = CUR_SCHAR(cur, l);
1782 if (!IS_LETTER(c) && (c != '_') &&
1783 (c != ':')) {
1784 return(NULL);
1785 }
1786
1787 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1788 (c == '.') || (c == '-') ||
1789 (c == '_') || (c == ':') ||
1790 (IS_COMBINING(c)) ||
1791 (IS_EXTENDER(c))) {
1792 COPY_BUF(l,buf,len,c);
1793 cur += l;
1794 c = CUR_SCHAR(cur, l);
1795 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1796 /*
1797 * Okay someone managed to make a huge name, so he's ready to pay
1798 * for the processing speed.
1799 */
1800 xmlChar *buffer;
1801 int max = len * 2;
1802
1803 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1804 if (buffer == NULL) {
1805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1806 ctxt->sax->error(ctxt->userData,
1807 "xmlParseStringName: out of memory\n");
1808 return(NULL);
1809 }
1810 memcpy(buffer, buf, len);
1811 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1812 (c == '.') || (c == '-') ||
1813 (c == '_') || (c == ':') ||
1814 (IS_COMBINING(c)) ||
1815 (IS_EXTENDER(c))) {
1816 if (len + 10 > max) {
1817 max *= 2;
1818 buffer = (xmlChar *) xmlRealloc(buffer,
1819 max * sizeof(xmlChar));
1820 if (buffer == NULL) {
1821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1822 ctxt->sax->error(ctxt->userData,
1823 "xmlParseStringName: out of memory\n");
1824 return(NULL);
1825 }
1826 }
1827 COPY_BUF(l,buffer,len,c);
1828 cur += l;
1829 c = CUR_SCHAR(cur, l);
1830 }
1831 buffer[len] = 0;
1832 *str = cur;
1833 return(buffer);
1834 }
1835 }
1836 *str = cur;
1837 return(xmlStrndup(buf, len));
1838}
1839
1840/**
1841 * xmlParseNmtoken:
1842 * @ctxt: an XML parser context
1843 *
1844 * parse an XML Nmtoken.
1845 *
1846 * [7] Nmtoken ::= (NameChar)+
1847 *
1848 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1849 *
1850 * Returns the Nmtoken parsed or NULL
1851 */
1852
1853xmlChar *
1854xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1855 xmlChar buf[XML_MAX_NAMELEN + 5];
1856 int len = 0, l;
1857 int c;
1858 int count = 0;
1859
1860 GROW;
1861 c = CUR_CHAR(l);
1862
1863 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1864 (c == '.') || (c == '-') ||
1865 (c == '_') || (c == ':') ||
1866 (IS_COMBINING(c)) ||
1867 (IS_EXTENDER(c))) {
1868 if (count++ > 100) {
1869 count = 0;
1870 GROW;
1871 }
1872 COPY_BUF(l,buf,len,c);
1873 NEXTL(l);
1874 c = CUR_CHAR(l);
1875 if (len >= XML_MAX_NAMELEN) {
1876 /*
1877 * Okay someone managed to make a huge token, so he's ready to pay
1878 * for the processing speed.
1879 */
1880 xmlChar *buffer;
1881 int max = len * 2;
1882
1883 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1884 if (buffer == NULL) {
1885 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1886 ctxt->sax->error(ctxt->userData,
1887 "xmlParseNmtoken: out of memory\n");
1888 return(NULL);
1889 }
1890 memcpy(buffer, buf, len);
1891 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1892 (c == '.') || (c == '-') ||
1893 (c == '_') || (c == ':') ||
1894 (IS_COMBINING(c)) ||
1895 (IS_EXTENDER(c))) {
1896 if (count++ > 100) {
1897 count = 0;
1898 GROW;
1899 }
1900 if (len + 10 > max) {
1901 max *= 2;
1902 buffer = (xmlChar *) xmlRealloc(buffer,
1903 max * sizeof(xmlChar));
1904 if (buffer == NULL) {
1905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1906 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001907 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001908 return(NULL);
1909 }
1910 }
1911 COPY_BUF(l,buffer,len,c);
1912 NEXTL(l);
1913 c = CUR_CHAR(l);
1914 }
1915 buffer[len] = 0;
1916 return(buffer);
1917 }
1918 }
1919 if (len == 0)
1920 return(NULL);
1921 return(xmlStrndup(buf, len));
1922}
1923
1924/**
1925 * xmlParseEntityValue:
1926 * @ctxt: an XML parser context
1927 * @orig: if non-NULL store a copy of the original entity value
1928 *
1929 * parse a value for ENTITY declarations
1930 *
1931 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1932 * "'" ([^%&'] | PEReference | Reference)* "'"
1933 *
1934 * Returns the EntityValue parsed with reference substitued or NULL
1935 */
1936
1937xmlChar *
1938xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1939 xmlChar *buf = NULL;
1940 int len = 0;
1941 int size = XML_PARSER_BUFFER_SIZE;
1942 int c, l;
1943 xmlChar stop;
1944 xmlChar *ret = NULL;
1945 const xmlChar *cur = NULL;
1946 xmlParserInputPtr input;
1947
1948 if (RAW == '"') stop = '"';
1949 else if (RAW == '\'') stop = '\'';
1950 else {
1951 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1953 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1954 ctxt->wellFormed = 0;
1955 ctxt->disableSAX = 1;
1956 return(NULL);
1957 }
1958 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1959 if (buf == NULL) {
1960 xmlGenericError(xmlGenericErrorContext,
1961 "malloc of %d byte failed\n", size);
1962 return(NULL);
1963 }
1964
1965 /*
1966 * The content of the entity definition is copied in a buffer.
1967 */
1968
1969 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1970 input = ctxt->input;
1971 GROW;
1972 NEXT;
1973 c = CUR_CHAR(l);
1974 /*
1975 * NOTE: 4.4.5 Included in Literal
1976 * When a parameter entity reference appears in a literal entity
1977 * value, ... a single or double quote character in the replacement
1978 * text is always treated as a normal data character and will not
1979 * terminate the literal.
1980 * In practice it means we stop the loop only when back at parsing
1981 * the initial entity and the quote is found
1982 */
1983 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1984 (ctxt->input != input))) {
1985 if (len + 5 >= size) {
1986 size *= 2;
1987 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1988 if (buf == NULL) {
1989 xmlGenericError(xmlGenericErrorContext,
1990 "realloc of %d byte failed\n", size);
1991 return(NULL);
1992 }
1993 }
1994 COPY_BUF(l,buf,len,c);
1995 NEXTL(l);
1996 /*
1997 * Pop-up of finished entities.
1998 */
1999 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2000 xmlPopInput(ctxt);
2001
2002 GROW;
2003 c = CUR_CHAR(l);
2004 if (c == 0) {
2005 GROW;
2006 c = CUR_CHAR(l);
2007 }
2008 }
2009 buf[len] = 0;
2010
2011 /*
2012 * Raise problem w.r.t. '&' and '%' being used in non-entities
2013 * reference constructs. Note Charref will be handled in
2014 * xmlStringDecodeEntities()
2015 */
2016 cur = buf;
2017 while (*cur != 0) { /* non input consuming */
2018 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2019 xmlChar *name;
2020 xmlChar tmp = *cur;
2021
2022 cur++;
2023 name = xmlParseStringName(ctxt, &cur);
2024 if ((name == NULL) || (*cur != ';')) {
2025 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2027 ctxt->sax->error(ctxt->userData,
2028 "EntityValue: '%c' forbidden except for entities references\n",
2029 tmp);
2030 ctxt->wellFormed = 0;
2031 ctxt->disableSAX = 1;
2032 }
2033 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2034 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2036 ctxt->sax->error(ctxt->userData,
2037 "EntityValue: PEReferences forbidden in internal subset\n",
2038 tmp);
2039 ctxt->wellFormed = 0;
2040 ctxt->disableSAX = 1;
2041 }
2042 if (name != NULL)
2043 xmlFree(name);
2044 }
2045 cur++;
2046 }
2047
2048 /*
2049 * Then PEReference entities are substituted.
2050 */
2051 if (c != stop) {
2052 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2054 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2055 ctxt->wellFormed = 0;
2056 ctxt->disableSAX = 1;
2057 xmlFree(buf);
2058 } else {
2059 NEXT;
2060 /*
2061 * NOTE: 4.4.7 Bypassed
2062 * When a general entity reference appears in the EntityValue in
2063 * an entity declaration, it is bypassed and left as is.
2064 * so XML_SUBSTITUTE_REF is not set here.
2065 */
2066 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2067 0, 0, 0);
2068 if (orig != NULL)
2069 *orig = buf;
2070 else
2071 xmlFree(buf);
2072 }
2073
2074 return(ret);
2075}
2076
2077/**
2078 * xmlParseAttValue:
2079 * @ctxt: an XML parser context
2080 *
2081 * parse a value for an attribute
2082 * Note: the parser won't do substitution of entities here, this
2083 * will be handled later in xmlStringGetNodeList
2084 *
2085 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2086 * "'" ([^<&'] | Reference)* "'"
2087 *
2088 * 3.3.3 Attribute-Value Normalization:
2089 * Before the value of an attribute is passed to the application or
2090 * checked for validity, the XML processor must normalize it as follows:
2091 * - a character reference is processed by appending the referenced
2092 * character to the attribute value
2093 * - an entity reference is processed by recursively processing the
2094 * replacement text of the entity
2095 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2096 * appending #x20 to the normalized value, except that only a single
2097 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2098 * parsed entity or the literal entity value of an internal parsed entity
2099 * - other characters are processed by appending them to the normalized value
2100 * If the declared value is not CDATA, then the XML processor must further
2101 * process the normalized attribute value by discarding any leading and
2102 * trailing space (#x20) characters, and by replacing sequences of space
2103 * (#x20) characters by a single space (#x20) character.
2104 * All attributes for which no declaration has been read should be treated
2105 * by a non-validating parser as if declared CDATA.
2106 *
2107 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2108 */
2109
2110xmlChar *
2111xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2112 xmlChar limit = 0;
2113 xmlChar *buf = NULL;
2114 int len = 0;
2115 int buf_size = 0;
2116 int c, l;
2117 xmlChar *current = NULL;
2118 xmlEntityPtr ent;
2119
2120
2121 SHRINK;
2122 if (NXT(0) == '"') {
2123 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2124 limit = '"';
2125 NEXT;
2126 } else if (NXT(0) == '\'') {
2127 limit = '\'';
2128 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2129 NEXT;
2130 } else {
2131 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2133 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2134 ctxt->wellFormed = 0;
2135 ctxt->disableSAX = 1;
2136 return(NULL);
2137 }
2138
2139 /*
2140 * allocate a translation buffer.
2141 */
2142 buf_size = XML_PARSER_BUFFER_SIZE;
2143 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2144 if (buf == NULL) {
2145 perror("xmlParseAttValue: malloc failed");
2146 return(NULL);
2147 }
2148
2149 /*
2150 * Ok loop until we reach one of the ending char or a size limit.
2151 */
2152 c = CUR_CHAR(l);
2153 while (((NXT(0) != limit) && /* checked */
2154 (c != '<')) || (ctxt->token != 0)) {
2155 if (c == 0) break;
2156 if (ctxt->token == '&') {
2157 /*
2158 * The reparsing will be done in xmlStringGetNodeList()
2159 * called by the attribute() function in SAX.c
2160 */
2161 static xmlChar buffer[6] = "&#38;";
2162
2163 if (len > buf_size - 10) {
2164 growBuffer(buf);
2165 }
2166 current = &buffer[0];
2167 while (*current != 0) { /* non input consuming */
2168 buf[len++] = *current++;
2169 }
2170 ctxt->token = 0;
2171 } else if (c == '&') {
2172 if (NXT(1) == '#') {
2173 int val = xmlParseCharRef(ctxt);
2174 if (val == '&') {
2175 /*
2176 * The reparsing will be done in xmlStringGetNodeList()
2177 * called by the attribute() function in SAX.c
2178 */
2179 static xmlChar buffer[6] = "&#38;";
2180
2181 if (len > buf_size - 10) {
2182 growBuffer(buf);
2183 }
2184 current = &buffer[0];
2185 while (*current != 0) { /* non input consuming */
2186 buf[len++] = *current++;
2187 }
2188 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002189 if (len > buf_size - 10) {
2190 growBuffer(buf);
2191 }
Owen Taylor3473f882001-02-23 17:55:21 +00002192 len += xmlCopyChar(0, &buf[len], val);
2193 }
2194 } else {
2195 ent = xmlParseEntityRef(ctxt);
2196 if ((ent != NULL) &&
2197 (ctxt->replaceEntities != 0)) {
2198 xmlChar *rep;
2199
2200 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2201 rep = xmlStringDecodeEntities(ctxt, ent->content,
2202 XML_SUBSTITUTE_REF, 0, 0, 0);
2203 if (rep != NULL) {
2204 current = rep;
2205 while (*current != 0) { /* non input consuming */
2206 buf[len++] = *current++;
2207 if (len > buf_size - 10) {
2208 growBuffer(buf);
2209 }
2210 }
2211 xmlFree(rep);
2212 }
2213 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002214 if (len > buf_size - 10) {
2215 growBuffer(buf);
2216 }
Owen Taylor3473f882001-02-23 17:55:21 +00002217 if (ent->content != NULL)
2218 buf[len++] = ent->content[0];
2219 }
2220 } else if (ent != NULL) {
2221 int i = xmlStrlen(ent->name);
2222 const xmlChar *cur = ent->name;
2223
2224 /*
2225 * This may look absurd but is needed to detect
2226 * entities problems
2227 */
2228 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2229 (ent->content != NULL)) {
2230 xmlChar *rep;
2231 rep = xmlStringDecodeEntities(ctxt, ent->content,
2232 XML_SUBSTITUTE_REF, 0, 0, 0);
2233 if (rep != NULL)
2234 xmlFree(rep);
2235 }
2236
2237 /*
2238 * Just output the reference
2239 */
2240 buf[len++] = '&';
2241 if (len > buf_size - i - 10) {
2242 growBuffer(buf);
2243 }
2244 for (;i > 0;i--)
2245 buf[len++] = *cur++;
2246 buf[len++] = ';';
2247 }
2248 }
2249 } else {
2250 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2251 COPY_BUF(l,buf,len,0x20);
2252 if (len > buf_size - 10) {
2253 growBuffer(buf);
2254 }
2255 } else {
2256 COPY_BUF(l,buf,len,c);
2257 if (len > buf_size - 10) {
2258 growBuffer(buf);
2259 }
2260 }
2261 NEXTL(l);
2262 }
2263 GROW;
2264 c = CUR_CHAR(l);
2265 }
2266 buf[len++] = 0;
2267 if (RAW == '<') {
2268 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2270 ctxt->sax->error(ctxt->userData,
2271 "Unescaped '<' not allowed in attributes values\n");
2272 ctxt->wellFormed = 0;
2273 ctxt->disableSAX = 1;
2274 } else if (RAW != limit) {
2275 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2277 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2278 ctxt->wellFormed = 0;
2279 ctxt->disableSAX = 1;
2280 } else
2281 NEXT;
2282 return(buf);
2283}
2284
2285/**
2286 * xmlParseSystemLiteral:
2287 * @ctxt: an XML parser context
2288 *
2289 * parse an XML Literal
2290 *
2291 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2292 *
2293 * Returns the SystemLiteral parsed or NULL
2294 */
2295
2296xmlChar *
2297xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2298 xmlChar *buf = NULL;
2299 int len = 0;
2300 int size = XML_PARSER_BUFFER_SIZE;
2301 int cur, l;
2302 xmlChar stop;
2303 int state = ctxt->instate;
2304 int count = 0;
2305
2306 SHRINK;
2307 if (RAW == '"') {
2308 NEXT;
2309 stop = '"';
2310 } else if (RAW == '\'') {
2311 NEXT;
2312 stop = '\'';
2313 } else {
2314 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2316 ctxt->sax->error(ctxt->userData,
2317 "SystemLiteral \" or ' expected\n");
2318 ctxt->wellFormed = 0;
2319 ctxt->disableSAX = 1;
2320 return(NULL);
2321 }
2322
2323 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2324 if (buf == NULL) {
2325 xmlGenericError(xmlGenericErrorContext,
2326 "malloc of %d byte failed\n", size);
2327 return(NULL);
2328 }
2329 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2330 cur = CUR_CHAR(l);
2331 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2332 if (len + 5 >= size) {
2333 size *= 2;
2334 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2335 if (buf == NULL) {
2336 xmlGenericError(xmlGenericErrorContext,
2337 "realloc of %d byte failed\n", size);
2338 ctxt->instate = (xmlParserInputState) state;
2339 return(NULL);
2340 }
2341 }
2342 count++;
2343 if (count > 50) {
2344 GROW;
2345 count = 0;
2346 }
2347 COPY_BUF(l,buf,len,cur);
2348 NEXTL(l);
2349 cur = CUR_CHAR(l);
2350 if (cur == 0) {
2351 GROW;
2352 SHRINK;
2353 cur = CUR_CHAR(l);
2354 }
2355 }
2356 buf[len] = 0;
2357 ctxt->instate = (xmlParserInputState) state;
2358 if (!IS_CHAR(cur)) {
2359 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2361 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2362 ctxt->wellFormed = 0;
2363 ctxt->disableSAX = 1;
2364 } else {
2365 NEXT;
2366 }
2367 return(buf);
2368}
2369
2370/**
2371 * xmlParsePubidLiteral:
2372 * @ctxt: an XML parser context
2373 *
2374 * parse an XML public literal
2375 *
2376 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2377 *
2378 * Returns the PubidLiteral parsed or NULL.
2379 */
2380
2381xmlChar *
2382xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2383 xmlChar *buf = NULL;
2384 int len = 0;
2385 int size = XML_PARSER_BUFFER_SIZE;
2386 xmlChar cur;
2387 xmlChar stop;
2388 int count = 0;
2389
2390 SHRINK;
2391 if (RAW == '"') {
2392 NEXT;
2393 stop = '"';
2394 } else if (RAW == '\'') {
2395 NEXT;
2396 stop = '\'';
2397 } else {
2398 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2399 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2400 ctxt->sax->error(ctxt->userData,
2401 "SystemLiteral \" or ' expected\n");
2402 ctxt->wellFormed = 0;
2403 ctxt->disableSAX = 1;
2404 return(NULL);
2405 }
2406 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2407 if (buf == NULL) {
2408 xmlGenericError(xmlGenericErrorContext,
2409 "malloc of %d byte failed\n", size);
2410 return(NULL);
2411 }
2412 cur = CUR;
2413 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2414 if (len + 1 >= size) {
2415 size *= 2;
2416 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2417 if (buf == NULL) {
2418 xmlGenericError(xmlGenericErrorContext,
2419 "realloc of %d byte failed\n", size);
2420 return(NULL);
2421 }
2422 }
2423 buf[len++] = cur;
2424 count++;
2425 if (count > 50) {
2426 GROW;
2427 count = 0;
2428 }
2429 NEXT;
2430 cur = CUR;
2431 if (cur == 0) {
2432 GROW;
2433 SHRINK;
2434 cur = CUR;
2435 }
2436 }
2437 buf[len] = 0;
2438 if (cur != stop) {
2439 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2440 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2441 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2442 ctxt->wellFormed = 0;
2443 ctxt->disableSAX = 1;
2444 } else {
2445 NEXT;
2446 }
2447 return(buf);
2448}
2449
Daniel Veillard48b2f892001-02-25 16:11:03 +00002450void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002451/**
2452 * xmlParseCharData:
2453 * @ctxt: an XML parser context
2454 * @cdata: int indicating whether we are within a CDATA section
2455 *
2456 * parse a CharData section.
2457 * if we are within a CDATA section ']]>' marks an end of section.
2458 *
2459 * The right angle bracket (>) may be represented using the string "&gt;",
2460 * and must, for compatibility, be escaped using "&gt;" or a character
2461 * reference when it appears in the string "]]>" in content, when that
2462 * string is not marking the end of a CDATA section.
2463 *
2464 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2465 */
2466
2467void
2468xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002469 const xmlChar *in;
2470 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002471 int line = ctxt->input->line;
2472 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002473
2474 SHRINK;
2475 GROW;
2476 /*
2477 * Accelerated common case where input don't need to be
2478 * modified before passing it to the handler.
2479 */
2480 if ((ctxt->token == 0) && (!cdata)) {
2481 in = ctxt->input->cur;
2482 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002483get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002484 while (((*in >= 0x20) && (*in != '<') &&
2485 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2486 in++;
2487 if (*in == 0xA) {
2488 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002489 in++;
2490 while (*in == 0xA) {
2491 ctxt->input->line++;
2492 in++;
2493 }
2494 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002495 }
2496 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002497 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002498 if (IS_BLANK(*ctxt->input->cur)) {
2499 const xmlChar *tmp = ctxt->input->cur;
2500 ctxt->input->cur = in;
2501 if (areBlanks(ctxt, tmp, nbchar)) {
2502 if (ctxt->sax->ignorableWhitespace != NULL)
2503 ctxt->sax->ignorableWhitespace(ctxt->userData,
2504 tmp, nbchar);
2505 } else {
2506 if (ctxt->sax->characters != NULL)
2507 ctxt->sax->characters(ctxt->userData,
2508 tmp, nbchar);
2509 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002510 } else {
2511 if (ctxt->sax->characters != NULL)
2512 ctxt->sax->characters(ctxt->userData,
2513 ctxt->input->cur, nbchar);
2514 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002515 }
2516 ctxt->input->cur = in;
2517 if (*in == 0xD) {
2518 in++;
2519 if (*in == 0xA) {
2520 ctxt->input->cur = in;
2521 in++;
2522 ctxt->input->line++;
2523 continue; /* while */
2524 }
2525 in--;
2526 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002527 if (*in == '<') {
2528 return;
2529 }
2530 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002531 return;
2532 }
2533 SHRINK;
2534 GROW;
2535 in = ctxt->input->cur;
2536 } while ((*in >= 0x20) && (*in <= 0x7F));
2537 nbchar = 0;
2538 }
Daniel Veillard50582112001-03-26 22:52:16 +00002539 ctxt->input->line = line;
2540 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002541 xmlParseCharDataComplex(ctxt, cdata);
2542}
2543
2544void
2545xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002546 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2547 int nbchar = 0;
2548 int cur, l;
2549 int count = 0;
2550
2551 SHRINK;
2552 GROW;
2553 cur = CUR_CHAR(l);
2554 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2555 ((cur != '&') || (ctxt->token == '&')) &&
2556 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2557 if ((cur == ']') && (NXT(1) == ']') &&
2558 (NXT(2) == '>')) {
2559 if (cdata) break;
2560 else {
2561 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2562 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2563 ctxt->sax->error(ctxt->userData,
2564 "Sequence ']]>' not allowed in content\n");
2565 /* Should this be relaxed ??? I see a "must here */
2566 ctxt->wellFormed = 0;
2567 ctxt->disableSAX = 1;
2568 }
2569 }
2570 COPY_BUF(l,buf,nbchar,cur);
2571 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2572 /*
2573 * Ok the segment is to be consumed as chars.
2574 */
2575 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2576 if (areBlanks(ctxt, buf, nbchar)) {
2577 if (ctxt->sax->ignorableWhitespace != NULL)
2578 ctxt->sax->ignorableWhitespace(ctxt->userData,
2579 buf, nbchar);
2580 } else {
2581 if (ctxt->sax->characters != NULL)
2582 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2583 }
2584 }
2585 nbchar = 0;
2586 }
2587 count++;
2588 if (count > 50) {
2589 GROW;
2590 count = 0;
2591 }
2592 NEXTL(l);
2593 cur = CUR_CHAR(l);
2594 }
2595 if (nbchar != 0) {
2596 /*
2597 * Ok the segment is to be consumed as chars.
2598 */
2599 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2600 if (areBlanks(ctxt, buf, nbchar)) {
2601 if (ctxt->sax->ignorableWhitespace != NULL)
2602 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2603 } else {
2604 if (ctxt->sax->characters != NULL)
2605 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2606 }
2607 }
2608 }
2609}
2610
2611/**
2612 * xmlParseExternalID:
2613 * @ctxt: an XML parser context
2614 * @publicID: a xmlChar** receiving PubidLiteral
2615 * @strict: indicate whether we should restrict parsing to only
2616 * production [75], see NOTE below
2617 *
2618 * Parse an External ID or a Public ID
2619 *
2620 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2621 * 'PUBLIC' S PubidLiteral S SystemLiteral
2622 *
2623 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2624 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2625 *
2626 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2627 *
2628 * Returns the function returns SystemLiteral and in the second
2629 * case publicID receives PubidLiteral, is strict is off
2630 * it is possible to return NULL and have publicID set.
2631 */
2632
2633xmlChar *
2634xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2635 xmlChar *URI = NULL;
2636
2637 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002638
2639 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002640 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2641 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2642 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2643 SKIP(6);
2644 if (!IS_BLANK(CUR)) {
2645 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2647 ctxt->sax->error(ctxt->userData,
2648 "Space required after 'SYSTEM'\n");
2649 ctxt->wellFormed = 0;
2650 ctxt->disableSAX = 1;
2651 }
2652 SKIP_BLANKS;
2653 URI = xmlParseSystemLiteral(ctxt);
2654 if (URI == NULL) {
2655 ctxt->errNo = XML_ERR_URI_REQUIRED;
2656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2657 ctxt->sax->error(ctxt->userData,
2658 "xmlParseExternalID: SYSTEM, no URI\n");
2659 ctxt->wellFormed = 0;
2660 ctxt->disableSAX = 1;
2661 }
2662 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2663 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2664 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2665 SKIP(6);
2666 if (!IS_BLANK(CUR)) {
2667 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2669 ctxt->sax->error(ctxt->userData,
2670 "Space required after 'PUBLIC'\n");
2671 ctxt->wellFormed = 0;
2672 ctxt->disableSAX = 1;
2673 }
2674 SKIP_BLANKS;
2675 *publicID = xmlParsePubidLiteral(ctxt);
2676 if (*publicID == NULL) {
2677 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2679 ctxt->sax->error(ctxt->userData,
2680 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2681 ctxt->wellFormed = 0;
2682 ctxt->disableSAX = 1;
2683 }
2684 if (strict) {
2685 /*
2686 * We don't handle [83] so "S SystemLiteral" is required.
2687 */
2688 if (!IS_BLANK(CUR)) {
2689 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2691 ctxt->sax->error(ctxt->userData,
2692 "Space required after the Public Identifier\n");
2693 ctxt->wellFormed = 0;
2694 ctxt->disableSAX = 1;
2695 }
2696 } else {
2697 /*
2698 * We handle [83] so we return immediately, if
2699 * "S SystemLiteral" is not detected. From a purely parsing
2700 * point of view that's a nice mess.
2701 */
2702 const xmlChar *ptr;
2703 GROW;
2704
2705 ptr = CUR_PTR;
2706 if (!IS_BLANK(*ptr)) return(NULL);
2707
2708 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2709 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2710 }
2711 SKIP_BLANKS;
2712 URI = xmlParseSystemLiteral(ctxt);
2713 if (URI == NULL) {
2714 ctxt->errNo = XML_ERR_URI_REQUIRED;
2715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2716 ctxt->sax->error(ctxt->userData,
2717 "xmlParseExternalID: PUBLIC, no URI\n");
2718 ctxt->wellFormed = 0;
2719 ctxt->disableSAX = 1;
2720 }
2721 }
2722 return(URI);
2723}
2724
2725/**
2726 * xmlParseComment:
2727 * @ctxt: an XML parser context
2728 *
2729 * Skip an XML (SGML) comment <!-- .... -->
2730 * The spec says that "For compatibility, the string "--" (double-hyphen)
2731 * must not occur within comments. "
2732 *
2733 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2734 */
2735void
2736xmlParseComment(xmlParserCtxtPtr ctxt) {
2737 xmlChar *buf = NULL;
2738 int len;
2739 int size = XML_PARSER_BUFFER_SIZE;
2740 int q, ql;
2741 int r, rl;
2742 int cur, l;
2743 xmlParserInputState state;
2744 xmlParserInputPtr input = ctxt->input;
2745 int count = 0;
2746
2747 /*
2748 * Check that there is a comment right here.
2749 */
2750 if ((RAW != '<') || (NXT(1) != '!') ||
2751 (NXT(2) != '-') || (NXT(3) != '-')) return;
2752
2753 state = ctxt->instate;
2754 ctxt->instate = XML_PARSER_COMMENT;
2755 SHRINK;
2756 SKIP(4);
2757 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2758 if (buf == NULL) {
2759 xmlGenericError(xmlGenericErrorContext,
2760 "malloc of %d byte failed\n", size);
2761 ctxt->instate = state;
2762 return;
2763 }
2764 q = CUR_CHAR(ql);
2765 NEXTL(ql);
2766 r = CUR_CHAR(rl);
2767 NEXTL(rl);
2768 cur = CUR_CHAR(l);
2769 len = 0;
2770 while (IS_CHAR(cur) && /* checked */
2771 ((cur != '>') ||
2772 (r != '-') || (q != '-'))) {
2773 if ((r == '-') && (q == '-') && (len > 1)) {
2774 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2776 ctxt->sax->error(ctxt->userData,
2777 "Comment must not contain '--' (double-hyphen)`\n");
2778 ctxt->wellFormed = 0;
2779 ctxt->disableSAX = 1;
2780 }
2781 if (len + 5 >= size) {
2782 size *= 2;
2783 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2784 if (buf == NULL) {
2785 xmlGenericError(xmlGenericErrorContext,
2786 "realloc of %d byte failed\n", size);
2787 ctxt->instate = state;
2788 return;
2789 }
2790 }
2791 COPY_BUF(ql,buf,len,q);
2792 q = r;
2793 ql = rl;
2794 r = cur;
2795 rl = l;
2796
2797 count++;
2798 if (count > 50) {
2799 GROW;
2800 count = 0;
2801 }
2802 NEXTL(l);
2803 cur = CUR_CHAR(l);
2804 if (cur == 0) {
2805 SHRINK;
2806 GROW;
2807 cur = CUR_CHAR(l);
2808 }
2809 }
2810 buf[len] = 0;
2811 if (!IS_CHAR(cur)) {
2812 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2814 ctxt->sax->error(ctxt->userData,
2815 "Comment not terminated \n<!--%.50s\n", buf);
2816 ctxt->wellFormed = 0;
2817 ctxt->disableSAX = 1;
2818 xmlFree(buf);
2819 } else {
2820 if (input != ctxt->input) {
2821 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2823 ctxt->sax->error(ctxt->userData,
2824"Comment doesn't start and stop in the same entity\n");
2825 ctxt->wellFormed = 0;
2826 ctxt->disableSAX = 1;
2827 }
2828 NEXT;
2829 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2830 (!ctxt->disableSAX))
2831 ctxt->sax->comment(ctxt->userData, buf);
2832 xmlFree(buf);
2833 }
2834 ctxt->instate = state;
2835}
2836
2837/**
2838 * xmlParsePITarget:
2839 * @ctxt: an XML parser context
2840 *
2841 * parse the name of a PI
2842 *
2843 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2844 *
2845 * Returns the PITarget name or NULL
2846 */
2847
2848xmlChar *
2849xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2850 xmlChar *name;
2851
2852 name = xmlParseName(ctxt);
2853 if ((name != NULL) &&
2854 ((name[0] == 'x') || (name[0] == 'X')) &&
2855 ((name[1] == 'm') || (name[1] == 'M')) &&
2856 ((name[2] == 'l') || (name[2] == 'L'))) {
2857 int i;
2858 if ((name[0] == 'x') && (name[1] == 'm') &&
2859 (name[2] == 'l') && (name[3] == 0)) {
2860 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2862 ctxt->sax->error(ctxt->userData,
2863 "XML declaration allowed only at the start of the document\n");
2864 ctxt->wellFormed = 0;
2865 ctxt->disableSAX = 1;
2866 return(name);
2867 } else if (name[3] == 0) {
2868 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2870 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2871 ctxt->wellFormed = 0;
2872 ctxt->disableSAX = 1;
2873 return(name);
2874 }
2875 for (i = 0;;i++) {
2876 if (xmlW3CPIs[i] == NULL) break;
2877 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2878 return(name);
2879 }
2880 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2881 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2882 ctxt->sax->warning(ctxt->userData,
2883 "xmlParsePItarget: invalid name prefix 'xml'\n");
2884 }
2885 }
2886 return(name);
2887}
2888
2889/**
2890 * xmlParsePI:
2891 * @ctxt: an XML parser context
2892 *
2893 * parse an XML Processing Instruction.
2894 *
2895 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2896 *
2897 * The processing is transfered to SAX once parsed.
2898 */
2899
2900void
2901xmlParsePI(xmlParserCtxtPtr ctxt) {
2902 xmlChar *buf = NULL;
2903 int len = 0;
2904 int size = XML_PARSER_BUFFER_SIZE;
2905 int cur, l;
2906 xmlChar *target;
2907 xmlParserInputState state;
2908 int count = 0;
2909
2910 if ((RAW == '<') && (NXT(1) == '?')) {
2911 xmlParserInputPtr input = ctxt->input;
2912 state = ctxt->instate;
2913 ctxt->instate = XML_PARSER_PI;
2914 /*
2915 * this is a Processing Instruction.
2916 */
2917 SKIP(2);
2918 SHRINK;
2919
2920 /*
2921 * Parse the target name and check for special support like
2922 * namespace.
2923 */
2924 target = xmlParsePITarget(ctxt);
2925 if (target != NULL) {
2926 if ((RAW == '?') && (NXT(1) == '>')) {
2927 if (input != ctxt->input) {
2928 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2930 ctxt->sax->error(ctxt->userData,
2931 "PI declaration doesn't start and stop in the same entity\n");
2932 ctxt->wellFormed = 0;
2933 ctxt->disableSAX = 1;
2934 }
2935 SKIP(2);
2936
2937 /*
2938 * SAX: PI detected.
2939 */
2940 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2941 (ctxt->sax->processingInstruction != NULL))
2942 ctxt->sax->processingInstruction(ctxt->userData,
2943 target, NULL);
2944 ctxt->instate = state;
2945 xmlFree(target);
2946 return;
2947 }
2948 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2949 if (buf == NULL) {
2950 xmlGenericError(xmlGenericErrorContext,
2951 "malloc of %d byte failed\n", size);
2952 ctxt->instate = state;
2953 return;
2954 }
2955 cur = CUR;
2956 if (!IS_BLANK(cur)) {
2957 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2959 ctxt->sax->error(ctxt->userData,
2960 "xmlParsePI: PI %s space expected\n", target);
2961 ctxt->wellFormed = 0;
2962 ctxt->disableSAX = 1;
2963 }
2964 SKIP_BLANKS;
2965 cur = CUR_CHAR(l);
2966 while (IS_CHAR(cur) && /* checked */
2967 ((cur != '?') || (NXT(1) != '>'))) {
2968 if (len + 5 >= size) {
2969 size *= 2;
2970 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2971 if (buf == NULL) {
2972 xmlGenericError(xmlGenericErrorContext,
2973 "realloc of %d byte failed\n", size);
2974 ctxt->instate = state;
2975 return;
2976 }
2977 }
2978 count++;
2979 if (count > 50) {
2980 GROW;
2981 count = 0;
2982 }
2983 COPY_BUF(l,buf,len,cur);
2984 NEXTL(l);
2985 cur = CUR_CHAR(l);
2986 if (cur == 0) {
2987 SHRINK;
2988 GROW;
2989 cur = CUR_CHAR(l);
2990 }
2991 }
2992 buf[len] = 0;
2993 if (cur != '?') {
2994 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2996 ctxt->sax->error(ctxt->userData,
2997 "xmlParsePI: PI %s never end ...\n", target);
2998 ctxt->wellFormed = 0;
2999 ctxt->disableSAX = 1;
3000 } else {
3001 if (input != ctxt->input) {
3002 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3004 ctxt->sax->error(ctxt->userData,
3005 "PI declaration doesn't start and stop in the same entity\n");
3006 ctxt->wellFormed = 0;
3007 ctxt->disableSAX = 1;
3008 }
3009 SKIP(2);
3010
3011 /*
3012 * SAX: PI detected.
3013 */
3014 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3015 (ctxt->sax->processingInstruction != NULL))
3016 ctxt->sax->processingInstruction(ctxt->userData,
3017 target, buf);
3018 }
3019 xmlFree(buf);
3020 xmlFree(target);
3021 } else {
3022 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3024 ctxt->sax->error(ctxt->userData,
3025 "xmlParsePI : no target name\n");
3026 ctxt->wellFormed = 0;
3027 ctxt->disableSAX = 1;
3028 }
3029 ctxt->instate = state;
3030 }
3031}
3032
3033/**
3034 * xmlParseNotationDecl:
3035 * @ctxt: an XML parser context
3036 *
3037 * parse a notation declaration
3038 *
3039 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3040 *
3041 * Hence there is actually 3 choices:
3042 * 'PUBLIC' S PubidLiteral
3043 * 'PUBLIC' S PubidLiteral S SystemLiteral
3044 * and 'SYSTEM' S SystemLiteral
3045 *
3046 * See the NOTE on xmlParseExternalID().
3047 */
3048
3049void
3050xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3051 xmlChar *name;
3052 xmlChar *Pubid;
3053 xmlChar *Systemid;
3054
3055 if ((RAW == '<') && (NXT(1) == '!') &&
3056 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3057 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3058 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3059 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3060 xmlParserInputPtr input = ctxt->input;
3061 SHRINK;
3062 SKIP(10);
3063 if (!IS_BLANK(CUR)) {
3064 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3066 ctxt->sax->error(ctxt->userData,
3067 "Space required after '<!NOTATION'\n");
3068 ctxt->wellFormed = 0;
3069 ctxt->disableSAX = 1;
3070 return;
3071 }
3072 SKIP_BLANKS;
3073
Daniel Veillard29631a82001-03-05 09:49:20 +00003074 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003075 if (name == NULL) {
3076 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3078 ctxt->sax->error(ctxt->userData,
3079 "NOTATION: Name expected here\n");
3080 ctxt->wellFormed = 0;
3081 ctxt->disableSAX = 1;
3082 return;
3083 }
3084 if (!IS_BLANK(CUR)) {
3085 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3086 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3087 ctxt->sax->error(ctxt->userData,
3088 "Space required after the NOTATION name'\n");
3089 ctxt->wellFormed = 0;
3090 ctxt->disableSAX = 1;
3091 return;
3092 }
3093 SKIP_BLANKS;
3094
3095 /*
3096 * Parse the IDs.
3097 */
3098 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3099 SKIP_BLANKS;
3100
3101 if (RAW == '>') {
3102 if (input != ctxt->input) {
3103 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3105 ctxt->sax->error(ctxt->userData,
3106"Notation declaration doesn't start and stop in the same entity\n");
3107 ctxt->wellFormed = 0;
3108 ctxt->disableSAX = 1;
3109 }
3110 NEXT;
3111 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3112 (ctxt->sax->notationDecl != NULL))
3113 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3114 } else {
3115 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3117 ctxt->sax->error(ctxt->userData,
3118 "'>' required to close NOTATION declaration\n");
3119 ctxt->wellFormed = 0;
3120 ctxt->disableSAX = 1;
3121 }
3122 xmlFree(name);
3123 if (Systemid != NULL) xmlFree(Systemid);
3124 if (Pubid != NULL) xmlFree(Pubid);
3125 }
3126}
3127
3128/**
3129 * xmlParseEntityDecl:
3130 * @ctxt: an XML parser context
3131 *
3132 * parse <!ENTITY declarations
3133 *
3134 * [70] EntityDecl ::= GEDecl | PEDecl
3135 *
3136 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3137 *
3138 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3139 *
3140 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3141 *
3142 * [74] PEDef ::= EntityValue | ExternalID
3143 *
3144 * [76] NDataDecl ::= S 'NDATA' S Name
3145 *
3146 * [ VC: Notation Declared ]
3147 * The Name must match the declared name of a notation.
3148 */
3149
3150void
3151xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3152 xmlChar *name = NULL;
3153 xmlChar *value = NULL;
3154 xmlChar *URI = NULL, *literal = NULL;
3155 xmlChar *ndata = NULL;
3156 int isParameter = 0;
3157 xmlChar *orig = NULL;
3158
3159 GROW;
3160 if ((RAW == '<') && (NXT(1) == '!') &&
3161 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3162 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3163 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3164 xmlParserInputPtr input = ctxt->input;
3165 ctxt->instate = XML_PARSER_ENTITY_DECL;
3166 SHRINK;
3167 SKIP(8);
3168 if (!IS_BLANK(CUR)) {
3169 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3171 ctxt->sax->error(ctxt->userData,
3172 "Space required after '<!ENTITY'\n");
3173 ctxt->wellFormed = 0;
3174 ctxt->disableSAX = 1;
3175 }
3176 SKIP_BLANKS;
3177
3178 if (RAW == '%') {
3179 NEXT;
3180 if (!IS_BLANK(CUR)) {
3181 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3183 ctxt->sax->error(ctxt->userData,
3184 "Space required after '%'\n");
3185 ctxt->wellFormed = 0;
3186 ctxt->disableSAX = 1;
3187 }
3188 SKIP_BLANKS;
3189 isParameter = 1;
3190 }
3191
Daniel Veillard29631a82001-03-05 09:49:20 +00003192 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003193 if (name == NULL) {
3194 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3196 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3197 ctxt->wellFormed = 0;
3198 ctxt->disableSAX = 1;
3199 return;
3200 }
3201 if (!IS_BLANK(CUR)) {
3202 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3204 ctxt->sax->error(ctxt->userData,
3205 "Space required after the entity name\n");
3206 ctxt->wellFormed = 0;
3207 ctxt->disableSAX = 1;
3208 }
3209 SKIP_BLANKS;
3210
3211 /*
3212 * handle the various case of definitions...
3213 */
3214 if (isParameter) {
3215 if ((RAW == '"') || (RAW == '\'')) {
3216 value = xmlParseEntityValue(ctxt, &orig);
3217 if (value) {
3218 if ((ctxt->sax != NULL) &&
3219 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3220 ctxt->sax->entityDecl(ctxt->userData, name,
3221 XML_INTERNAL_PARAMETER_ENTITY,
3222 NULL, NULL, value);
3223 }
3224 } else {
3225 URI = xmlParseExternalID(ctxt, &literal, 1);
3226 if ((URI == NULL) && (literal == NULL)) {
3227 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3229 ctxt->sax->error(ctxt->userData,
3230 "Entity value required\n");
3231 ctxt->wellFormed = 0;
3232 ctxt->disableSAX = 1;
3233 }
3234 if (URI) {
3235 xmlURIPtr uri;
3236
3237 uri = xmlParseURI((const char *) URI);
3238 if (uri == NULL) {
3239 ctxt->errNo = XML_ERR_INVALID_URI;
3240 if ((ctxt->sax != NULL) &&
3241 (!ctxt->disableSAX) &&
3242 (ctxt->sax->error != NULL))
3243 ctxt->sax->error(ctxt->userData,
3244 "Invalid URI: %s\n", URI);
3245 ctxt->wellFormed = 0;
3246 } else {
3247 if (uri->fragment != NULL) {
3248 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3249 if ((ctxt->sax != NULL) &&
3250 (!ctxt->disableSAX) &&
3251 (ctxt->sax->error != NULL))
3252 ctxt->sax->error(ctxt->userData,
3253 "Fragment not allowed: %s\n", URI);
3254 ctxt->wellFormed = 0;
3255 } else {
3256 if ((ctxt->sax != NULL) &&
3257 (!ctxt->disableSAX) &&
3258 (ctxt->sax->entityDecl != NULL))
3259 ctxt->sax->entityDecl(ctxt->userData, name,
3260 XML_EXTERNAL_PARAMETER_ENTITY,
3261 literal, URI, NULL);
3262 }
3263 xmlFreeURI(uri);
3264 }
3265 }
3266 }
3267 } else {
3268 if ((RAW == '"') || (RAW == '\'')) {
3269 value = xmlParseEntityValue(ctxt, &orig);
3270 if ((ctxt->sax != NULL) &&
3271 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3272 ctxt->sax->entityDecl(ctxt->userData, name,
3273 XML_INTERNAL_GENERAL_ENTITY,
3274 NULL, NULL, value);
3275 } else {
3276 URI = xmlParseExternalID(ctxt, &literal, 1);
3277 if ((URI == NULL) && (literal == NULL)) {
3278 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3280 ctxt->sax->error(ctxt->userData,
3281 "Entity value required\n");
3282 ctxt->wellFormed = 0;
3283 ctxt->disableSAX = 1;
3284 }
3285 if (URI) {
3286 xmlURIPtr uri;
3287
3288 uri = xmlParseURI((const char *)URI);
3289 if (uri == NULL) {
3290 ctxt->errNo = XML_ERR_INVALID_URI;
3291 if ((ctxt->sax != NULL) &&
3292 (!ctxt->disableSAX) &&
3293 (ctxt->sax->error != NULL))
3294 ctxt->sax->error(ctxt->userData,
3295 "Invalid URI: %s\n", URI);
3296 ctxt->wellFormed = 0;
3297 } else {
3298 if (uri->fragment != NULL) {
3299 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3300 if ((ctxt->sax != NULL) &&
3301 (!ctxt->disableSAX) &&
3302 (ctxt->sax->error != NULL))
3303 ctxt->sax->error(ctxt->userData,
3304 "Fragment not allowed: %s\n", URI);
3305 ctxt->wellFormed = 0;
3306 }
3307 xmlFreeURI(uri);
3308 }
3309 }
3310 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3311 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3313 ctxt->sax->error(ctxt->userData,
3314 "Space required before 'NDATA'\n");
3315 ctxt->wellFormed = 0;
3316 ctxt->disableSAX = 1;
3317 }
3318 SKIP_BLANKS;
3319 if ((RAW == 'N') && (NXT(1) == 'D') &&
3320 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3321 (NXT(4) == 'A')) {
3322 SKIP(5);
3323 if (!IS_BLANK(CUR)) {
3324 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3326 ctxt->sax->error(ctxt->userData,
3327 "Space required after 'NDATA'\n");
3328 ctxt->wellFormed = 0;
3329 ctxt->disableSAX = 1;
3330 }
3331 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003332 ndata = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003333 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3334 (ctxt->sax->unparsedEntityDecl != NULL))
3335 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3336 literal, URI, ndata);
3337 } else {
3338 if ((ctxt->sax != NULL) &&
3339 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3340 ctxt->sax->entityDecl(ctxt->userData, name,
3341 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3342 literal, URI, NULL);
3343 }
3344 }
3345 }
3346 SKIP_BLANKS;
3347 if (RAW != '>') {
3348 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3350 ctxt->sax->error(ctxt->userData,
3351 "xmlParseEntityDecl: entity %s not terminated\n", name);
3352 ctxt->wellFormed = 0;
3353 ctxt->disableSAX = 1;
3354 } else {
3355 if (input != ctxt->input) {
3356 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3358 ctxt->sax->error(ctxt->userData,
3359"Entity declaration doesn't start and stop in the same entity\n");
3360 ctxt->wellFormed = 0;
3361 ctxt->disableSAX = 1;
3362 }
3363 NEXT;
3364 }
3365 if (orig != NULL) {
3366 /*
3367 * Ugly mechanism to save the raw entity value.
3368 */
3369 xmlEntityPtr cur = NULL;
3370
3371 if (isParameter) {
3372 if ((ctxt->sax != NULL) &&
3373 (ctxt->sax->getParameterEntity != NULL))
3374 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3375 } else {
3376 if ((ctxt->sax != NULL) &&
3377 (ctxt->sax->getEntity != NULL))
3378 cur = ctxt->sax->getEntity(ctxt->userData, name);
3379 }
3380 if (cur != NULL) {
3381 if (cur->orig != NULL)
3382 xmlFree(orig);
3383 else
3384 cur->orig = orig;
3385 } else
3386 xmlFree(orig);
3387 }
3388 if (name != NULL) xmlFree(name);
3389 if (value != NULL) xmlFree(value);
3390 if (URI != NULL) xmlFree(URI);
3391 if (literal != NULL) xmlFree(literal);
3392 if (ndata != NULL) xmlFree(ndata);
3393 }
3394}
3395
3396/**
3397 * xmlParseDefaultDecl:
3398 * @ctxt: an XML parser context
3399 * @value: Receive a possible fixed default value for the attribute
3400 *
3401 * Parse an attribute default declaration
3402 *
3403 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3404 *
3405 * [ VC: Required Attribute ]
3406 * if the default declaration is the keyword #REQUIRED, then the
3407 * attribute must be specified for all elements of the type in the
3408 * attribute-list declaration.
3409 *
3410 * [ VC: Attribute Default Legal ]
3411 * The declared default value must meet the lexical constraints of
3412 * the declared attribute type c.f. xmlValidateAttributeDecl()
3413 *
3414 * [ VC: Fixed Attribute Default ]
3415 * if an attribute has a default value declared with the #FIXED
3416 * keyword, instances of that attribute must match the default value.
3417 *
3418 * [ WFC: No < in Attribute Values ]
3419 * handled in xmlParseAttValue()
3420 *
3421 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3422 * or XML_ATTRIBUTE_FIXED.
3423 */
3424
3425int
3426xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3427 int val;
3428 xmlChar *ret;
3429
3430 *value = NULL;
3431 if ((RAW == '#') && (NXT(1) == 'R') &&
3432 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3433 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3434 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3435 (NXT(8) == 'D')) {
3436 SKIP(9);
3437 return(XML_ATTRIBUTE_REQUIRED);
3438 }
3439 if ((RAW == '#') && (NXT(1) == 'I') &&
3440 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3441 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3442 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3443 SKIP(8);
3444 return(XML_ATTRIBUTE_IMPLIED);
3445 }
3446 val = XML_ATTRIBUTE_NONE;
3447 if ((RAW == '#') && (NXT(1) == 'F') &&
3448 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3449 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3450 SKIP(6);
3451 val = XML_ATTRIBUTE_FIXED;
3452 if (!IS_BLANK(CUR)) {
3453 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3455 ctxt->sax->error(ctxt->userData,
3456 "Space required after '#FIXED'\n");
3457 ctxt->wellFormed = 0;
3458 ctxt->disableSAX = 1;
3459 }
3460 SKIP_BLANKS;
3461 }
3462 ret = xmlParseAttValue(ctxt);
3463 ctxt->instate = XML_PARSER_DTD;
3464 if (ret == NULL) {
3465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3466 ctxt->sax->error(ctxt->userData,
3467 "Attribute default value declaration error\n");
3468 ctxt->wellFormed = 0;
3469 ctxt->disableSAX = 1;
3470 } else
3471 *value = ret;
3472 return(val);
3473}
3474
3475/**
3476 * xmlParseNotationType:
3477 * @ctxt: an XML parser context
3478 *
3479 * parse an Notation attribute type.
3480 *
3481 * Note: the leading 'NOTATION' S part has already being parsed...
3482 *
3483 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3484 *
3485 * [ VC: Notation Attributes ]
3486 * Values of this type must match one of the notation names included
3487 * in the declaration; all notation names in the declaration must be declared.
3488 *
3489 * Returns: the notation attribute tree built while parsing
3490 */
3491
3492xmlEnumerationPtr
3493xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3494 xmlChar *name;
3495 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3496
3497 if (RAW != '(') {
3498 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3500 ctxt->sax->error(ctxt->userData,
3501 "'(' required to start 'NOTATION'\n");
3502 ctxt->wellFormed = 0;
3503 ctxt->disableSAX = 1;
3504 return(NULL);
3505 }
3506 SHRINK;
3507 do {
3508 NEXT;
3509 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003510 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003511 if (name == NULL) {
3512 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3514 ctxt->sax->error(ctxt->userData,
3515 "Name expected in NOTATION declaration\n");
3516 ctxt->wellFormed = 0;
3517 ctxt->disableSAX = 1;
3518 return(ret);
3519 }
3520 cur = xmlCreateEnumeration(name);
3521 xmlFree(name);
3522 if (cur == NULL) return(ret);
3523 if (last == NULL) ret = last = cur;
3524 else {
3525 last->next = cur;
3526 last = cur;
3527 }
3528 SKIP_BLANKS;
3529 } while (RAW == '|');
3530 if (RAW != ')') {
3531 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3533 ctxt->sax->error(ctxt->userData,
3534 "')' required to finish NOTATION declaration\n");
3535 ctxt->wellFormed = 0;
3536 ctxt->disableSAX = 1;
3537 if ((last != NULL) && (last != ret))
3538 xmlFreeEnumeration(last);
3539 return(ret);
3540 }
3541 NEXT;
3542 return(ret);
3543}
3544
3545/**
3546 * xmlParseEnumerationType:
3547 * @ctxt: an XML parser context
3548 *
3549 * parse an Enumeration attribute type.
3550 *
3551 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3552 *
3553 * [ VC: Enumeration ]
3554 * Values of this type must match one of the Nmtoken tokens in
3555 * the declaration
3556 *
3557 * Returns: the enumeration attribute tree built while parsing
3558 */
3559
3560xmlEnumerationPtr
3561xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3562 xmlChar *name;
3563 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3564
3565 if (RAW != '(') {
3566 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3568 ctxt->sax->error(ctxt->userData,
3569 "'(' required to start ATTLIST enumeration\n");
3570 ctxt->wellFormed = 0;
3571 ctxt->disableSAX = 1;
3572 return(NULL);
3573 }
3574 SHRINK;
3575 do {
3576 NEXT;
3577 SKIP_BLANKS;
3578 name = xmlParseNmtoken(ctxt);
3579 if (name == NULL) {
3580 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3582 ctxt->sax->error(ctxt->userData,
3583 "NmToken expected in ATTLIST enumeration\n");
3584 ctxt->wellFormed = 0;
3585 ctxt->disableSAX = 1;
3586 return(ret);
3587 }
3588 cur = xmlCreateEnumeration(name);
3589 xmlFree(name);
3590 if (cur == NULL) return(ret);
3591 if (last == NULL) ret = last = cur;
3592 else {
3593 last->next = cur;
3594 last = cur;
3595 }
3596 SKIP_BLANKS;
3597 } while (RAW == '|');
3598 if (RAW != ')') {
3599 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3601 ctxt->sax->error(ctxt->userData,
3602 "')' required to finish ATTLIST enumeration\n");
3603 ctxt->wellFormed = 0;
3604 ctxt->disableSAX = 1;
3605 return(ret);
3606 }
3607 NEXT;
3608 return(ret);
3609}
3610
3611/**
3612 * xmlParseEnumeratedType:
3613 * @ctxt: an XML parser context
3614 * @tree: the enumeration tree built while parsing
3615 *
3616 * parse an Enumerated attribute type.
3617 *
3618 * [57] EnumeratedType ::= NotationType | Enumeration
3619 *
3620 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3621 *
3622 *
3623 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3624 */
3625
3626int
3627xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3628 if ((RAW == 'N') && (NXT(1) == 'O') &&
3629 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3630 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3631 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3632 SKIP(8);
3633 if (!IS_BLANK(CUR)) {
3634 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3636 ctxt->sax->error(ctxt->userData,
3637 "Space required after 'NOTATION'\n");
3638 ctxt->wellFormed = 0;
3639 ctxt->disableSAX = 1;
3640 return(0);
3641 }
3642 SKIP_BLANKS;
3643 *tree = xmlParseNotationType(ctxt);
3644 if (*tree == NULL) return(0);
3645 return(XML_ATTRIBUTE_NOTATION);
3646 }
3647 *tree = xmlParseEnumerationType(ctxt);
3648 if (*tree == NULL) return(0);
3649 return(XML_ATTRIBUTE_ENUMERATION);
3650}
3651
3652/**
3653 * xmlParseAttributeType:
3654 * @ctxt: an XML parser context
3655 * @tree: the enumeration tree built while parsing
3656 *
3657 * parse the Attribute list def for an element
3658 *
3659 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3660 *
3661 * [55] StringType ::= 'CDATA'
3662 *
3663 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3664 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3665 *
3666 * Validity constraints for attribute values syntax are checked in
3667 * xmlValidateAttributeValue()
3668 *
3669 * [ VC: ID ]
3670 * Values of type ID must match the Name production. A name must not
3671 * appear more than once in an XML document as a value of this type;
3672 * i.e., ID values must uniquely identify the elements which bear them.
3673 *
3674 * [ VC: One ID per Element Type ]
3675 * No element type may have more than one ID attribute specified.
3676 *
3677 * [ VC: ID Attribute Default ]
3678 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3679 *
3680 * [ VC: IDREF ]
3681 * Values of type IDREF must match the Name production, and values
3682 * of type IDREFS must match Names; each IDREF Name must match the value
3683 * of an ID attribute on some element in the XML document; i.e. IDREF
3684 * values must match the value of some ID attribute.
3685 *
3686 * [ VC: Entity Name ]
3687 * Values of type ENTITY must match the Name production, values
3688 * of type ENTITIES must match Names; each Entity Name must match the
3689 * name of an unparsed entity declared in the DTD.
3690 *
3691 * [ VC: Name Token ]
3692 * Values of type NMTOKEN must match the Nmtoken production; values
3693 * of type NMTOKENS must match Nmtokens.
3694 *
3695 * Returns the attribute type
3696 */
3697int
3698xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3699 SHRINK;
3700 if ((RAW == 'C') && (NXT(1) == 'D') &&
3701 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3702 (NXT(4) == 'A')) {
3703 SKIP(5);
3704 return(XML_ATTRIBUTE_CDATA);
3705 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3706 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3707 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3708 SKIP(6);
3709 return(XML_ATTRIBUTE_IDREFS);
3710 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3711 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3712 (NXT(4) == 'F')) {
3713 SKIP(5);
3714 return(XML_ATTRIBUTE_IDREF);
3715 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3716 SKIP(2);
3717 return(XML_ATTRIBUTE_ID);
3718 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3719 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3720 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3721 SKIP(6);
3722 return(XML_ATTRIBUTE_ENTITY);
3723 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3724 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3725 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3726 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3727 SKIP(8);
3728 return(XML_ATTRIBUTE_ENTITIES);
3729 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3730 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3731 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3732 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3733 SKIP(8);
3734 return(XML_ATTRIBUTE_NMTOKENS);
3735 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3736 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3737 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3738 (NXT(6) == 'N')) {
3739 SKIP(7);
3740 return(XML_ATTRIBUTE_NMTOKEN);
3741 }
3742 return(xmlParseEnumeratedType(ctxt, tree));
3743}
3744
3745/**
3746 * xmlParseAttributeListDecl:
3747 * @ctxt: an XML parser context
3748 *
3749 * : parse the Attribute list def for an element
3750 *
3751 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3752 *
3753 * [53] AttDef ::= S Name S AttType S DefaultDecl
3754 *
3755 */
3756void
3757xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3758 xmlChar *elemName;
3759 xmlChar *attrName;
3760 xmlEnumerationPtr tree;
3761
3762 if ((RAW == '<') && (NXT(1) == '!') &&
3763 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3764 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3765 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3766 (NXT(8) == 'T')) {
3767 xmlParserInputPtr input = ctxt->input;
3768
3769 SKIP(9);
3770 if (!IS_BLANK(CUR)) {
3771 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3773 ctxt->sax->error(ctxt->userData,
3774 "Space required after '<!ATTLIST'\n");
3775 ctxt->wellFormed = 0;
3776 ctxt->disableSAX = 1;
3777 }
3778 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003779 elemName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003780 if (elemName == NULL) {
3781 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3783 ctxt->sax->error(ctxt->userData,
3784 "ATTLIST: no name for Element\n");
3785 ctxt->wellFormed = 0;
3786 ctxt->disableSAX = 1;
3787 return;
3788 }
3789 SKIP_BLANKS;
3790 GROW;
3791 while (RAW != '>') {
3792 const xmlChar *check = CUR_PTR;
3793 int type;
3794 int def;
3795 xmlChar *defaultValue = NULL;
3796
3797 GROW;
3798 tree = NULL;
Daniel Veillard29631a82001-03-05 09:49:20 +00003799 attrName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003800 if (attrName == NULL) {
3801 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3803 ctxt->sax->error(ctxt->userData,
3804 "ATTLIST: no name for Attribute\n");
3805 ctxt->wellFormed = 0;
3806 ctxt->disableSAX = 1;
3807 break;
3808 }
3809 GROW;
3810 if (!IS_BLANK(CUR)) {
3811 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3813 ctxt->sax->error(ctxt->userData,
3814 "Space required after the attribute name\n");
3815 ctxt->wellFormed = 0;
3816 ctxt->disableSAX = 1;
3817 if (attrName != NULL)
3818 xmlFree(attrName);
3819 if (defaultValue != NULL)
3820 xmlFree(defaultValue);
3821 break;
3822 }
3823 SKIP_BLANKS;
3824
3825 type = xmlParseAttributeType(ctxt, &tree);
3826 if (type <= 0) {
3827 if (attrName != NULL)
3828 xmlFree(attrName);
3829 if (defaultValue != NULL)
3830 xmlFree(defaultValue);
3831 break;
3832 }
3833
3834 GROW;
3835 if (!IS_BLANK(CUR)) {
3836 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3838 ctxt->sax->error(ctxt->userData,
3839 "Space required after the attribute type\n");
3840 ctxt->wellFormed = 0;
3841 ctxt->disableSAX = 1;
3842 if (attrName != NULL)
3843 xmlFree(attrName);
3844 if (defaultValue != NULL)
3845 xmlFree(defaultValue);
3846 if (tree != NULL)
3847 xmlFreeEnumeration(tree);
3848 break;
3849 }
3850 SKIP_BLANKS;
3851
3852 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3853 if (def <= 0) {
3854 if (attrName != NULL)
3855 xmlFree(attrName);
3856 if (defaultValue != NULL)
3857 xmlFree(defaultValue);
3858 if (tree != NULL)
3859 xmlFreeEnumeration(tree);
3860 break;
3861 }
3862
3863 GROW;
3864 if (RAW != '>') {
3865 if (!IS_BLANK(CUR)) {
3866 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3868 ctxt->sax->error(ctxt->userData,
3869 "Space required after the attribute default value\n");
3870 ctxt->wellFormed = 0;
3871 ctxt->disableSAX = 1;
3872 if (attrName != NULL)
3873 xmlFree(attrName);
3874 if (defaultValue != NULL)
3875 xmlFree(defaultValue);
3876 if (tree != NULL)
3877 xmlFreeEnumeration(tree);
3878 break;
3879 }
3880 SKIP_BLANKS;
3881 }
3882 if (check == CUR_PTR) {
3883 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3884 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3885 ctxt->sax->error(ctxt->userData,
3886 "xmlParseAttributeListDecl: detected internal error\n");
3887 if (attrName != NULL)
3888 xmlFree(attrName);
3889 if (defaultValue != NULL)
3890 xmlFree(defaultValue);
3891 if (tree != NULL)
3892 xmlFreeEnumeration(tree);
3893 break;
3894 }
3895 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3896 (ctxt->sax->attributeDecl != NULL))
3897 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3898 type, def, defaultValue, tree);
3899 if (attrName != NULL)
3900 xmlFree(attrName);
3901 if (defaultValue != NULL)
3902 xmlFree(defaultValue);
3903 GROW;
3904 }
3905 if (RAW == '>') {
3906 if (input != ctxt->input) {
3907 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3909 ctxt->sax->error(ctxt->userData,
3910"Attribute list declaration doesn't start and stop in the same entity\n");
3911 ctxt->wellFormed = 0;
3912 ctxt->disableSAX = 1;
3913 }
3914 NEXT;
3915 }
3916
3917 xmlFree(elemName);
3918 }
3919}
3920
3921/**
3922 * xmlParseElementMixedContentDecl:
3923 * @ctxt: an XML parser context
3924 *
3925 * parse the declaration for a Mixed Element content
3926 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3927 *
3928 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3929 * '(' S? '#PCDATA' S? ')'
3930 *
3931 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3932 *
3933 * [ VC: No Duplicate Types ]
3934 * The same name must not appear more than once in a single
3935 * mixed-content declaration.
3936 *
3937 * returns: the list of the xmlElementContentPtr describing the element choices
3938 */
3939xmlElementContentPtr
3940xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3941 xmlElementContentPtr ret = NULL, cur = NULL, n;
3942 xmlChar *elem = NULL;
3943
3944 GROW;
3945 if ((RAW == '#') && (NXT(1) == 'P') &&
3946 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3947 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3948 (NXT(6) == 'A')) {
3949 SKIP(7);
3950 SKIP_BLANKS;
3951 SHRINK;
3952 if (RAW == ')') {
3953 ctxt->entity = ctxt->input;
3954 NEXT;
3955 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3956 if (RAW == '*') {
3957 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3958 NEXT;
3959 }
3960 return(ret);
3961 }
3962 if ((RAW == '(') || (RAW == '|')) {
3963 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3964 if (ret == NULL) return(NULL);
3965 }
3966 while (RAW == '|') {
3967 NEXT;
3968 if (elem == NULL) {
3969 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3970 if (ret == NULL) return(NULL);
3971 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003972 if (cur != NULL)
3973 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003974 cur = ret;
3975 } else {
3976 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3977 if (n == NULL) return(NULL);
3978 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003979 if (n->c1 != NULL)
3980 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00003981 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003982 if (n != NULL)
3983 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003984 cur = n;
3985 xmlFree(elem);
3986 }
3987 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003988 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003989 if (elem == NULL) {
3990 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3992 ctxt->sax->error(ctxt->userData,
3993 "xmlParseElementMixedContentDecl : Name expected\n");
3994 ctxt->wellFormed = 0;
3995 ctxt->disableSAX = 1;
3996 xmlFreeElementContent(cur);
3997 return(NULL);
3998 }
3999 SKIP_BLANKS;
4000 GROW;
4001 }
4002 if ((RAW == ')') && (NXT(1) == '*')) {
4003 if (elem != NULL) {
4004 cur->c2 = xmlNewElementContent(elem,
4005 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004006 if (cur->c2 != NULL)
4007 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004008 xmlFree(elem);
4009 }
4010 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4011 ctxt->entity = ctxt->input;
4012 SKIP(2);
4013 } else {
4014 if (elem != NULL) xmlFree(elem);
4015 xmlFreeElementContent(ret);
4016 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4018 ctxt->sax->error(ctxt->userData,
4019 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4020 ctxt->wellFormed = 0;
4021 ctxt->disableSAX = 1;
4022 return(NULL);
4023 }
4024
4025 } else {
4026 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4028 ctxt->sax->error(ctxt->userData,
4029 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4030 ctxt->wellFormed = 0;
4031 ctxt->disableSAX = 1;
4032 }
4033 return(ret);
4034}
4035
4036/**
4037 * xmlParseElementChildrenContentDecl:
4038 * @ctxt: an XML parser context
4039 *
4040 * parse the declaration for a Mixed Element content
4041 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4042 *
4043 *
4044 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4045 *
4046 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4047 *
4048 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4049 *
4050 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4051 *
4052 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4053 * TODO Parameter-entity replacement text must be properly nested
4054 * with parenthetized groups. That is to say, if either of the
4055 * opening or closing parentheses in a choice, seq, or Mixed
4056 * construct is contained in the replacement text for a parameter
4057 * entity, both must be contained in the same replacement text. For
4058 * interoperability, if a parameter-entity reference appears in a
4059 * choice, seq, or Mixed construct, its replacement text should not
4060 * be empty, and neither the first nor last non-blank character of
4061 * the replacement text should be a connector (| or ,).
4062 *
4063 * returns: the tree of xmlElementContentPtr describing the element
4064 * hierarchy.
4065 */
4066xmlElementContentPtr
4067#ifdef VMS
4068xmlParseElementChildrenContentD
4069#else
4070xmlParseElementChildrenContentDecl
4071#endif
4072(xmlParserCtxtPtr ctxt) {
4073 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4074 xmlChar *elem;
4075 xmlChar type = 0;
4076
4077 SKIP_BLANKS;
4078 GROW;
4079 if (RAW == '(') {
4080 /* Recurse on first child */
4081 NEXT;
4082 SKIP_BLANKS;
4083 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4084 SKIP_BLANKS;
4085 GROW;
4086 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004087 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004088 if (elem == NULL) {
4089 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4091 ctxt->sax->error(ctxt->userData,
4092 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4093 ctxt->wellFormed = 0;
4094 ctxt->disableSAX = 1;
4095 return(NULL);
4096 }
4097 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4098 GROW;
4099 if (RAW == '?') {
4100 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4101 NEXT;
4102 } else if (RAW == '*') {
4103 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4104 NEXT;
4105 } else if (RAW == '+') {
4106 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4107 NEXT;
4108 } else {
4109 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4110 }
4111 xmlFree(elem);
4112 GROW;
4113 }
4114 SKIP_BLANKS;
4115 SHRINK;
4116 while (RAW != ')') {
4117 /*
4118 * Each loop we parse one separator and one element.
4119 */
4120 if (RAW == ',') {
4121 if (type == 0) type = CUR;
4122
4123 /*
4124 * Detect "Name | Name , Name" error
4125 */
4126 else if (type != CUR) {
4127 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4129 ctxt->sax->error(ctxt->userData,
4130 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4131 type);
4132 ctxt->wellFormed = 0;
4133 ctxt->disableSAX = 1;
4134 if ((op != NULL) && (op != ret))
4135 xmlFreeElementContent(op);
4136 if ((last != NULL) && (last != ret) &&
4137 (last != ret->c1) && (last != ret->c2))
4138 xmlFreeElementContent(last);
4139 if (ret != NULL)
4140 xmlFreeElementContent(ret);
4141 return(NULL);
4142 }
4143 NEXT;
4144
4145 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4146 if (op == NULL) {
4147 xmlFreeElementContent(ret);
4148 return(NULL);
4149 }
4150 if (last == NULL) {
4151 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004152 if (ret != NULL)
4153 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004154 ret = cur = op;
4155 } else {
4156 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004157 if (op != NULL)
4158 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004159 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004160 if (last != NULL)
4161 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004162 cur =op;
4163 last = NULL;
4164 }
4165 } else if (RAW == '|') {
4166 if (type == 0) type = CUR;
4167
4168 /*
4169 * Detect "Name , Name | Name" error
4170 */
4171 else if (type != CUR) {
4172 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4173 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4174 ctxt->sax->error(ctxt->userData,
4175 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4176 type);
4177 ctxt->wellFormed = 0;
4178 ctxt->disableSAX = 1;
4179 if ((op != NULL) && (op != ret) && (op != last))
4180 xmlFreeElementContent(op);
4181 if ((last != NULL) && (last != ret) &&
4182 (last != ret->c1) && (last != ret->c2))
4183 xmlFreeElementContent(last);
4184 if (ret != NULL)
4185 xmlFreeElementContent(ret);
4186 return(NULL);
4187 }
4188 NEXT;
4189
4190 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4191 if (op == NULL) {
4192 if ((op != NULL) && (op != ret))
4193 xmlFreeElementContent(op);
4194 if ((last != NULL) && (last != ret) &&
4195 (last != ret->c1) && (last != ret->c2))
4196 xmlFreeElementContent(last);
4197 if (ret != NULL)
4198 xmlFreeElementContent(ret);
4199 return(NULL);
4200 }
4201 if (last == NULL) {
4202 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004203 if (ret != NULL)
4204 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004205 ret = cur = op;
4206 } else {
4207 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004208 if (op != NULL)
4209 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004210 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004211 if (last != NULL)
4212 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004213 cur =op;
4214 last = NULL;
4215 }
4216 } else {
4217 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4219 ctxt->sax->error(ctxt->userData,
4220 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4221 ctxt->wellFormed = 0;
4222 ctxt->disableSAX = 1;
4223 if ((op != NULL) && (op != ret))
4224 xmlFreeElementContent(op);
4225 if ((last != NULL) && (last != ret) &&
4226 (last != ret->c1) && (last != ret->c2))
4227 xmlFreeElementContent(last);
4228 if (ret != NULL)
4229 xmlFreeElementContent(ret);
4230 return(NULL);
4231 }
4232 GROW;
4233 SKIP_BLANKS;
4234 GROW;
4235 if (RAW == '(') {
4236 /* Recurse on second child */
4237 NEXT;
4238 SKIP_BLANKS;
4239 last = xmlParseElementChildrenContentDecl(ctxt);
4240 SKIP_BLANKS;
4241 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004242 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004243 if (elem == NULL) {
4244 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4246 ctxt->sax->error(ctxt->userData,
4247 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4248 ctxt->wellFormed = 0;
4249 ctxt->disableSAX = 1;
4250 if ((op != NULL) && (op != ret))
4251 xmlFreeElementContent(op);
4252 if ((last != NULL) && (last != ret) &&
4253 (last != ret->c1) && (last != ret->c2))
4254 xmlFreeElementContent(last);
4255 if (ret != NULL)
4256 xmlFreeElementContent(ret);
4257 return(NULL);
4258 }
4259 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4260 xmlFree(elem);
4261 if (RAW == '?') {
4262 last->ocur = XML_ELEMENT_CONTENT_OPT;
4263 NEXT;
4264 } else if (RAW == '*') {
4265 last->ocur = XML_ELEMENT_CONTENT_MULT;
4266 NEXT;
4267 } else if (RAW == '+') {
4268 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4269 NEXT;
4270 } else {
4271 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4272 }
4273 }
4274 SKIP_BLANKS;
4275 GROW;
4276 }
4277 if ((cur != NULL) && (last != NULL)) {
4278 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004279 if (last != NULL)
4280 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004281 }
4282 ctxt->entity = ctxt->input;
4283 NEXT;
4284 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004285 if (ret != NULL)
4286 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004287 NEXT;
4288 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004289 if (ret != NULL)
4290 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004291 NEXT;
4292 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004293 if (ret != NULL)
4294 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004295 NEXT;
4296 }
4297 return(ret);
4298}
4299
4300/**
4301 * xmlParseElementContentDecl:
4302 * @ctxt: an XML parser context
4303 * @name: the name of the element being defined.
4304 * @result: the Element Content pointer will be stored here if any
4305 *
4306 * parse the declaration for an Element content either Mixed or Children,
4307 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4308 *
4309 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4310 *
4311 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4312 */
4313
4314int
4315xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4316 xmlElementContentPtr *result) {
4317
4318 xmlElementContentPtr tree = NULL;
4319 xmlParserInputPtr input = ctxt->input;
4320 int res;
4321
4322 *result = NULL;
4323
4324 if (RAW != '(') {
4325 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4327 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004328 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004329 ctxt->wellFormed = 0;
4330 ctxt->disableSAX = 1;
4331 return(-1);
4332 }
4333 NEXT;
4334 GROW;
4335 SKIP_BLANKS;
4336 if ((RAW == '#') && (NXT(1) == 'P') &&
4337 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4338 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4339 (NXT(6) == 'A')) {
4340 tree = xmlParseElementMixedContentDecl(ctxt);
4341 res = XML_ELEMENT_TYPE_MIXED;
4342 } else {
4343 tree = xmlParseElementChildrenContentDecl(ctxt);
4344 res = XML_ELEMENT_TYPE_ELEMENT;
4345 }
4346 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4347 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4349 ctxt->sax->error(ctxt->userData,
4350"Element content declaration doesn't start and stop in the same entity\n");
4351 ctxt->wellFormed = 0;
4352 ctxt->disableSAX = 1;
4353 }
4354 SKIP_BLANKS;
4355 *result = tree;
4356 return(res);
4357}
4358
4359/**
4360 * xmlParseElementDecl:
4361 * @ctxt: an XML parser context
4362 *
4363 * parse an Element declaration.
4364 *
4365 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4366 *
4367 * [ VC: Unique Element Type Declaration ]
4368 * No element type may be declared more than once
4369 *
4370 * Returns the type of the element, or -1 in case of error
4371 */
4372int
4373xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4374 xmlChar *name;
4375 int ret = -1;
4376 xmlElementContentPtr content = NULL;
4377
4378 GROW;
4379 if ((RAW == '<') && (NXT(1) == '!') &&
4380 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4381 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4382 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4383 (NXT(8) == 'T')) {
4384 xmlParserInputPtr input = ctxt->input;
4385
4386 SKIP(9);
4387 if (!IS_BLANK(CUR)) {
4388 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4389 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4390 ctxt->sax->error(ctxt->userData,
4391 "Space required after 'ELEMENT'\n");
4392 ctxt->wellFormed = 0;
4393 ctxt->disableSAX = 1;
4394 }
4395 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00004396 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004397 if (name == NULL) {
4398 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4399 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4400 ctxt->sax->error(ctxt->userData,
4401 "xmlParseElementDecl: no name for Element\n");
4402 ctxt->wellFormed = 0;
4403 ctxt->disableSAX = 1;
4404 return(-1);
4405 }
4406 while ((RAW == 0) && (ctxt->inputNr > 1))
4407 xmlPopInput(ctxt);
4408 if (!IS_BLANK(CUR)) {
4409 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4411 ctxt->sax->error(ctxt->userData,
4412 "Space required after the element name\n");
4413 ctxt->wellFormed = 0;
4414 ctxt->disableSAX = 1;
4415 }
4416 SKIP_BLANKS;
4417 if ((RAW == 'E') && (NXT(1) == 'M') &&
4418 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4419 (NXT(4) == 'Y')) {
4420 SKIP(5);
4421 /*
4422 * Element must always be empty.
4423 */
4424 ret = XML_ELEMENT_TYPE_EMPTY;
4425 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4426 (NXT(2) == 'Y')) {
4427 SKIP(3);
4428 /*
4429 * Element is a generic container.
4430 */
4431 ret = XML_ELEMENT_TYPE_ANY;
4432 } else if (RAW == '(') {
4433 ret = xmlParseElementContentDecl(ctxt, name, &content);
4434 } else {
4435 /*
4436 * [ WFC: PEs in Internal Subset ] error handling.
4437 */
4438 if ((RAW == '%') && (ctxt->external == 0) &&
4439 (ctxt->inputNr == 1)) {
4440 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4442 ctxt->sax->error(ctxt->userData,
4443 "PEReference: forbidden within markup decl in internal subset\n");
4444 } else {
4445 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4446 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4447 ctxt->sax->error(ctxt->userData,
4448 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4449 }
4450 ctxt->wellFormed = 0;
4451 ctxt->disableSAX = 1;
4452 if (name != NULL) xmlFree(name);
4453 return(-1);
4454 }
4455
4456 SKIP_BLANKS;
4457 /*
4458 * Pop-up of finished entities.
4459 */
4460 while ((RAW == 0) && (ctxt->inputNr > 1))
4461 xmlPopInput(ctxt);
4462 SKIP_BLANKS;
4463
4464 if (RAW != '>') {
4465 ctxt->errNo = XML_ERR_GT_REQUIRED;
4466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4467 ctxt->sax->error(ctxt->userData,
4468 "xmlParseElementDecl: expected '>' at the end\n");
4469 ctxt->wellFormed = 0;
4470 ctxt->disableSAX = 1;
4471 } else {
4472 if (input != ctxt->input) {
4473 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4475 ctxt->sax->error(ctxt->userData,
4476"Element declaration doesn't start and stop in the same entity\n");
4477 ctxt->wellFormed = 0;
4478 ctxt->disableSAX = 1;
4479 }
4480
4481 NEXT;
4482 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4483 (ctxt->sax->elementDecl != NULL))
4484 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4485 content);
4486 }
4487 if (content != NULL) {
4488 xmlFreeElementContent(content);
4489 }
4490 if (name != NULL) {
4491 xmlFree(name);
4492 }
4493 }
4494 return(ret);
4495}
4496
4497/**
4498 * xmlParseMarkupDecl:
4499 * @ctxt: an XML parser context
4500 *
4501 * parse Markup declarations
4502 *
4503 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4504 * NotationDecl | PI | Comment
4505 *
4506 * [ VC: Proper Declaration/PE Nesting ]
4507 * Parameter-entity replacement text must be properly nested with
4508 * markup declarations. That is to say, if either the first character
4509 * or the last character of a markup declaration (markupdecl above) is
4510 * contained in the replacement text for a parameter-entity reference,
4511 * both must be contained in the same replacement text.
4512 *
4513 * [ WFC: PEs in Internal Subset ]
4514 * In the internal DTD subset, parameter-entity references can occur
4515 * only where markup declarations can occur, not within markup declarations.
4516 * (This does not apply to references that occur in external parameter
4517 * entities or to the external subset.)
4518 */
4519void
4520xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4521 GROW;
4522 xmlParseElementDecl(ctxt);
4523 xmlParseAttributeListDecl(ctxt);
4524 xmlParseEntityDecl(ctxt);
4525 xmlParseNotationDecl(ctxt);
4526 xmlParsePI(ctxt);
4527 xmlParseComment(ctxt);
4528 /*
4529 * This is only for internal subset. On external entities,
4530 * the replacement is done before parsing stage
4531 */
4532 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4533 xmlParsePEReference(ctxt);
4534 ctxt->instate = XML_PARSER_DTD;
4535}
4536
4537/**
4538 * xmlParseTextDecl:
4539 * @ctxt: an XML parser context
4540 *
4541 * parse an XML declaration header for external entities
4542 *
4543 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4544 *
4545 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4546 */
4547
4548void
4549xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4550 xmlChar *version;
4551
4552 /*
4553 * We know that '<?xml' is here.
4554 */
4555 if ((RAW == '<') && (NXT(1) == '?') &&
4556 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4557 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4558 SKIP(5);
4559 } else {
4560 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4562 ctxt->sax->error(ctxt->userData,
4563 "Text declaration '<?xml' required\n");
4564 ctxt->wellFormed = 0;
4565 ctxt->disableSAX = 1;
4566
4567 return;
4568 }
4569
4570 if (!IS_BLANK(CUR)) {
4571 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4573 ctxt->sax->error(ctxt->userData,
4574 "Space needed after '<?xml'\n");
4575 ctxt->wellFormed = 0;
4576 ctxt->disableSAX = 1;
4577 }
4578 SKIP_BLANKS;
4579
4580 /*
4581 * We may have the VersionInfo here.
4582 */
4583 version = xmlParseVersionInfo(ctxt);
4584 if (version == NULL)
4585 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4586 ctxt->input->version = version;
4587
4588 /*
4589 * We must have the encoding declaration
4590 */
4591 if (!IS_BLANK(CUR)) {
4592 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4594 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4595 ctxt->wellFormed = 0;
4596 ctxt->disableSAX = 1;
4597 }
4598 xmlParseEncodingDecl(ctxt);
4599 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4600 /*
4601 * The XML REC instructs us to stop parsing right here
4602 */
4603 return;
4604 }
4605
4606 SKIP_BLANKS;
4607 if ((RAW == '?') && (NXT(1) == '>')) {
4608 SKIP(2);
4609 } else if (RAW == '>') {
4610 /* Deprecated old WD ... */
4611 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4613 ctxt->sax->error(ctxt->userData,
4614 "XML declaration must end-up with '?>'\n");
4615 ctxt->wellFormed = 0;
4616 ctxt->disableSAX = 1;
4617 NEXT;
4618 } else {
4619 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4621 ctxt->sax->error(ctxt->userData,
4622 "parsing XML declaration: '?>' expected\n");
4623 ctxt->wellFormed = 0;
4624 ctxt->disableSAX = 1;
4625 MOVETO_ENDTAG(CUR_PTR);
4626 NEXT;
4627 }
4628}
4629
4630/*
4631 * xmlParseConditionalSections
4632 * @ctxt: an XML parser context
4633 *
4634 * [61] conditionalSect ::= includeSect | ignoreSect
4635 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4636 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4637 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4638 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4639 */
4640
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004641static void
Owen Taylor3473f882001-02-23 17:55:21 +00004642xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4643 SKIP(3);
4644 SKIP_BLANKS;
4645 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4646 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4647 (NXT(6) == 'E')) {
4648 SKIP(7);
4649 SKIP_BLANKS;
4650 if (RAW != '[') {
4651 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4653 ctxt->sax->error(ctxt->userData,
4654 "XML conditional section '[' expected\n");
4655 ctxt->wellFormed = 0;
4656 ctxt->disableSAX = 1;
4657 } else {
4658 NEXT;
4659 }
4660 if (xmlParserDebugEntities) {
4661 if ((ctxt->input != NULL) && (ctxt->input->filename))
4662 xmlGenericError(xmlGenericErrorContext,
4663 "%s(%d): ", ctxt->input->filename,
4664 ctxt->input->line);
4665 xmlGenericError(xmlGenericErrorContext,
4666 "Entering INCLUDE Conditional Section\n");
4667 }
4668
4669 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4670 (NXT(2) != '>'))) {
4671 const xmlChar *check = CUR_PTR;
4672 int cons = ctxt->input->consumed;
4673 int tok = ctxt->token;
4674
4675 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4676 xmlParseConditionalSections(ctxt);
4677 } else if (IS_BLANK(CUR)) {
4678 NEXT;
4679 } else if (RAW == '%') {
4680 xmlParsePEReference(ctxt);
4681 } else
4682 xmlParseMarkupDecl(ctxt);
4683
4684 /*
4685 * Pop-up of finished entities.
4686 */
4687 while ((RAW == 0) && (ctxt->inputNr > 1))
4688 xmlPopInput(ctxt);
4689
4690 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4691 (tok == ctxt->token)) {
4692 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4694 ctxt->sax->error(ctxt->userData,
4695 "Content error in the external subset\n");
4696 ctxt->wellFormed = 0;
4697 ctxt->disableSAX = 1;
4698 break;
4699 }
4700 }
4701 if (xmlParserDebugEntities) {
4702 if ((ctxt->input != NULL) && (ctxt->input->filename))
4703 xmlGenericError(xmlGenericErrorContext,
4704 "%s(%d): ", ctxt->input->filename,
4705 ctxt->input->line);
4706 xmlGenericError(xmlGenericErrorContext,
4707 "Leaving INCLUDE Conditional Section\n");
4708 }
4709
4710 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4711 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4712 int state;
4713 int instate;
4714 int depth = 0;
4715
4716 SKIP(6);
4717 SKIP_BLANKS;
4718 if (RAW != '[') {
4719 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4721 ctxt->sax->error(ctxt->userData,
4722 "XML conditional section '[' expected\n");
4723 ctxt->wellFormed = 0;
4724 ctxt->disableSAX = 1;
4725 } else {
4726 NEXT;
4727 }
4728 if (xmlParserDebugEntities) {
4729 if ((ctxt->input != NULL) && (ctxt->input->filename))
4730 xmlGenericError(xmlGenericErrorContext,
4731 "%s(%d): ", ctxt->input->filename,
4732 ctxt->input->line);
4733 xmlGenericError(xmlGenericErrorContext,
4734 "Entering IGNORE Conditional Section\n");
4735 }
4736
4737 /*
4738 * Parse up to the end of the conditionnal section
4739 * But disable SAX event generating DTD building in the meantime
4740 */
4741 state = ctxt->disableSAX;
4742 instate = ctxt->instate;
4743 ctxt->disableSAX = 1;
4744 ctxt->instate = XML_PARSER_IGNORE;
4745
4746 while (depth >= 0) {
4747 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4748 depth++;
4749 SKIP(3);
4750 continue;
4751 }
4752 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4753 if (--depth >= 0) SKIP(3);
4754 continue;
4755 }
4756 NEXT;
4757 continue;
4758 }
4759
4760 ctxt->disableSAX = state;
4761 ctxt->instate = instate;
4762
4763 if (xmlParserDebugEntities) {
4764 if ((ctxt->input != NULL) && (ctxt->input->filename))
4765 xmlGenericError(xmlGenericErrorContext,
4766 "%s(%d): ", ctxt->input->filename,
4767 ctxt->input->line);
4768 xmlGenericError(xmlGenericErrorContext,
4769 "Leaving IGNORE Conditional Section\n");
4770 }
4771
4772 } else {
4773 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4775 ctxt->sax->error(ctxt->userData,
4776 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4777 ctxt->wellFormed = 0;
4778 ctxt->disableSAX = 1;
4779 }
4780
4781 if (RAW == 0)
4782 SHRINK;
4783
4784 if (RAW == 0) {
4785 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4787 ctxt->sax->error(ctxt->userData,
4788 "XML conditional section not closed\n");
4789 ctxt->wellFormed = 0;
4790 ctxt->disableSAX = 1;
4791 } else {
4792 SKIP(3);
4793 }
4794}
4795
4796/**
4797 * xmlParseExternalSubset:
4798 * @ctxt: an XML parser context
4799 * @ExternalID: the external identifier
4800 * @SystemID: the system identifier (or URL)
4801 *
4802 * parse Markup declarations from an external subset
4803 *
4804 * [30] extSubset ::= textDecl? extSubsetDecl
4805 *
4806 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4807 */
4808void
4809xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4810 const xmlChar *SystemID) {
4811 GROW;
4812 if ((RAW == '<') && (NXT(1) == '?') &&
4813 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4814 (NXT(4) == 'l')) {
4815 xmlParseTextDecl(ctxt);
4816 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4817 /*
4818 * The XML REC instructs us to stop parsing right here
4819 */
4820 ctxt->instate = XML_PARSER_EOF;
4821 return;
4822 }
4823 }
4824 if (ctxt->myDoc == NULL) {
4825 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4826 }
4827 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4828 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4829
4830 ctxt->instate = XML_PARSER_DTD;
4831 ctxt->external = 1;
4832 while (((RAW == '<') && (NXT(1) == '?')) ||
4833 ((RAW == '<') && (NXT(1) == '!')) ||
4834 IS_BLANK(CUR)) {
4835 const xmlChar *check = CUR_PTR;
4836 int cons = ctxt->input->consumed;
4837 int tok = ctxt->token;
4838
4839 GROW;
4840 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4841 xmlParseConditionalSections(ctxt);
4842 } else if (IS_BLANK(CUR)) {
4843 NEXT;
4844 } else if (RAW == '%') {
4845 xmlParsePEReference(ctxt);
4846 } else
4847 xmlParseMarkupDecl(ctxt);
4848
4849 /*
4850 * Pop-up of finished entities.
4851 */
4852 while ((RAW == 0) && (ctxt->inputNr > 1))
4853 xmlPopInput(ctxt);
4854
4855 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4856 (tok == ctxt->token)) {
4857 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4859 ctxt->sax->error(ctxt->userData,
4860 "Content error in the external subset\n");
4861 ctxt->wellFormed = 0;
4862 ctxt->disableSAX = 1;
4863 break;
4864 }
4865 }
4866
4867 if (RAW != 0) {
4868 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4870 ctxt->sax->error(ctxt->userData,
4871 "Extra content at the end of the document\n");
4872 ctxt->wellFormed = 0;
4873 ctxt->disableSAX = 1;
4874 }
4875
4876}
4877
4878/**
4879 * xmlParseReference:
4880 * @ctxt: an XML parser context
4881 *
4882 * parse and handle entity references in content, depending on the SAX
4883 * interface, this may end-up in a call to character() if this is a
4884 * CharRef, a predefined entity, if there is no reference() callback.
4885 * or if the parser was asked to switch to that mode.
4886 *
4887 * [67] Reference ::= EntityRef | CharRef
4888 */
4889void
4890xmlParseReference(xmlParserCtxtPtr ctxt) {
4891 xmlEntityPtr ent;
4892 xmlChar *val;
4893 if (RAW != '&') return;
4894
4895 if (NXT(1) == '#') {
4896 int i = 0;
4897 xmlChar out[10];
4898 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004899 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004900
4901 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4902 /*
4903 * So we are using non-UTF-8 buffers
4904 * Check that the char fit on 8bits, if not
4905 * generate a CharRef.
4906 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004907 if (value <= 0xFF) {
4908 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004909 out[1] = 0;
4910 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4911 (!ctxt->disableSAX))
4912 ctxt->sax->characters(ctxt->userData, out, 1);
4913 } else {
4914 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004915 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004916 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004917 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004918 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4919 (!ctxt->disableSAX))
4920 ctxt->sax->reference(ctxt->userData, out);
4921 }
4922 } else {
4923 /*
4924 * Just encode the value in UTF-8
4925 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004926 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004927 out[i] = 0;
4928 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4929 (!ctxt->disableSAX))
4930 ctxt->sax->characters(ctxt->userData, out, i);
4931 }
4932 } else {
4933 ent = xmlParseEntityRef(ctxt);
4934 if (ent == NULL) return;
4935 if ((ent->name != NULL) &&
4936 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4937 xmlNodePtr list = NULL;
4938 int ret;
4939
4940
4941 /*
4942 * The first reference to the entity trigger a parsing phase
4943 * where the ent->children is filled with the result from
4944 * the parsing.
4945 */
4946 if (ent->children == NULL) {
4947 xmlChar *value;
4948 value = ent->content;
4949
4950 /*
4951 * Check that this entity is well formed
4952 */
4953 if ((value != NULL) &&
4954 (value[1] == 0) && (value[0] == '<') &&
4955 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4956 /*
4957 * DONE: get definite answer on this !!!
4958 * Lots of entity decls are used to declare a single
4959 * char
4960 * <!ENTITY lt "<">
4961 * Which seems to be valid since
4962 * 2.4: The ampersand character (&) and the left angle
4963 * bracket (<) may appear in their literal form only
4964 * when used ... They are also legal within the literal
4965 * entity value of an internal entity declaration;i
4966 * see "4.3.2 Well-Formed Parsed Entities".
4967 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4968 * Looking at the OASIS test suite and James Clark
4969 * tests, this is broken. However the XML REC uses
4970 * it. Is the XML REC not well-formed ????
4971 * This is a hack to avoid this problem
4972 *
4973 * ANSWER: since lt gt amp .. are already defined,
4974 * this is a redefinition and hence the fact that the
4975 * contentis not well balanced is not a Wf error, this
4976 * is lousy but acceptable.
4977 */
4978 list = xmlNewDocText(ctxt->myDoc, value);
4979 if (list != NULL) {
4980 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4981 (ent->children == NULL)) {
4982 ent->children = list;
4983 ent->last = list;
4984 list->parent = (xmlNodePtr) ent;
4985 } else {
4986 xmlFreeNodeList(list);
4987 }
4988 } else if (list != NULL) {
4989 xmlFreeNodeList(list);
4990 }
4991 } else {
4992 /*
4993 * 4.3.2: An internal general parsed entity is well-formed
4994 * if its replacement text matches the production labeled
4995 * content.
4996 */
4997 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4998 ctxt->depth++;
4999 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5000 ctxt->sax, NULL, ctxt->depth,
5001 value, &list);
5002 ctxt->depth--;
5003 } else if (ent->etype ==
5004 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5005 ctxt->depth++;
Daniel Veillard257d9102001-05-08 10:41:44 +00005006 ret = xmlParseExternalEntityPrivate(ctxt->myDoc,
Owen Taylor3473f882001-02-23 17:55:21 +00005007 ctxt->sax, NULL, ctxt->depth,
Daniel Veillard257d9102001-05-08 10:41:44 +00005008 ent->URI, ent->ExternalID, &list,
5009 ctxt->_private);
Owen Taylor3473f882001-02-23 17:55:21 +00005010 ctxt->depth--;
5011 } else {
5012 ret = -1;
5013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5014 ctxt->sax->error(ctxt->userData,
5015 "Internal: invalid entity type\n");
5016 }
5017 if (ret == XML_ERR_ENTITY_LOOP) {
5018 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5020 ctxt->sax->error(ctxt->userData,
5021 "Detected entity reference loop\n");
5022 ctxt->wellFormed = 0;
5023 ctxt->disableSAX = 1;
5024 } else if ((ret == 0) && (list != NULL)) {
5025 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5026 (ent->children == NULL)) {
5027 ent->children = list;
5028 while (list != NULL) {
5029 list->parent = (xmlNodePtr) ent;
5030 if (list->next == NULL)
5031 ent->last = list;
5032 list = list->next;
5033 }
5034 } else {
5035 xmlFreeNodeList(list);
5036 }
5037 } else if (ret > 0) {
5038 ctxt->errNo = ret;
5039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5040 ctxt->sax->error(ctxt->userData,
5041 "Entity value required\n");
5042 ctxt->wellFormed = 0;
5043 ctxt->disableSAX = 1;
5044 } else if (list != NULL) {
5045 xmlFreeNodeList(list);
5046 }
5047 }
5048 }
5049 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5050 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5051 /*
5052 * Create a node.
5053 */
5054 ctxt->sax->reference(ctxt->userData, ent->name);
5055 return;
5056 } else if (ctxt->replaceEntities) {
5057 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5058 /*
5059 * Seems we are generating the DOM content, do
5060 * a simple tree copy
5061 */
5062 xmlNodePtr new;
5063 new = xmlCopyNodeList(ent->children);
5064
5065 xmlAddChildList(ctxt->node, new);
5066 /*
5067 * This is to avoid a nasty side effect, see
5068 * characters() in SAX.c
5069 */
5070 ctxt->nodemem = 0;
5071 ctxt->nodelen = 0;
5072 return;
5073 } else {
5074 /*
5075 * Probably running in SAX mode
5076 */
5077 xmlParserInputPtr input;
5078
5079 input = xmlNewEntityInputStream(ctxt, ent);
5080 xmlPushInput(ctxt, input);
5081 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5082 (RAW == '<') && (NXT(1) == '?') &&
5083 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5084 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5085 xmlParseTextDecl(ctxt);
5086 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5087 /*
5088 * The XML REC instructs us to stop parsing right here
5089 */
5090 ctxt->instate = XML_PARSER_EOF;
5091 return;
5092 }
5093 if (input->standalone == 1) {
5094 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5096 ctxt->sax->error(ctxt->userData,
5097 "external parsed entities cannot be standalone\n");
5098 ctxt->wellFormed = 0;
5099 ctxt->disableSAX = 1;
5100 }
5101 }
5102 return;
5103 }
5104 }
5105 } else {
5106 val = ent->content;
5107 if (val == NULL) return;
5108 /*
5109 * inline the entity.
5110 */
5111 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5112 (!ctxt->disableSAX))
5113 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5114 }
5115 }
5116}
5117
5118/**
5119 * xmlParseEntityRef:
5120 * @ctxt: an XML parser context
5121 *
5122 * parse ENTITY references declarations
5123 *
5124 * [68] EntityRef ::= '&' Name ';'
5125 *
5126 * [ WFC: Entity Declared ]
5127 * In a document without any DTD, a document with only an internal DTD
5128 * subset which contains no parameter entity references, or a document
5129 * with "standalone='yes'", the Name given in the entity reference
5130 * must match that in an entity declaration, except that well-formed
5131 * documents need not declare any of the following entities: amp, lt,
5132 * gt, apos, quot. The declaration of a parameter entity must precede
5133 * any reference to it. Similarly, the declaration of a general entity
5134 * must precede any reference to it which appears in a default value in an
5135 * attribute-list declaration. Note that if entities are declared in the
5136 * external subset or in external parameter entities, a non-validating
5137 * processor is not obligated to read and process their declarations;
5138 * for such documents, the rule that an entity must be declared is a
5139 * well-formedness constraint only if standalone='yes'.
5140 *
5141 * [ WFC: Parsed Entity ]
5142 * An entity reference must not contain the name of an unparsed entity
5143 *
5144 * Returns the xmlEntityPtr if found, or NULL otherwise.
5145 */
5146xmlEntityPtr
5147xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5148 xmlChar *name;
5149 xmlEntityPtr ent = NULL;
5150
5151 GROW;
5152
5153 if (RAW == '&') {
5154 NEXT;
5155 name = xmlParseName(ctxt);
5156 if (name == NULL) {
5157 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5158 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5159 ctxt->sax->error(ctxt->userData,
5160 "xmlParseEntityRef: no name\n");
5161 ctxt->wellFormed = 0;
5162 ctxt->disableSAX = 1;
5163 } else {
5164 if (RAW == ';') {
5165 NEXT;
5166 /*
5167 * Ask first SAX for entity resolution, otherwise try the
5168 * predefined set.
5169 */
5170 if (ctxt->sax != NULL) {
5171 if (ctxt->sax->getEntity != NULL)
5172 ent = ctxt->sax->getEntity(ctxt->userData, name);
5173 if (ent == NULL)
5174 ent = xmlGetPredefinedEntity(name);
5175 }
5176 /*
5177 * [ WFC: Entity Declared ]
5178 * In a document without any DTD, a document with only an
5179 * internal DTD subset which contains no parameter entity
5180 * references, or a document with "standalone='yes'", the
5181 * Name given in the entity reference must match that in an
5182 * entity declaration, except that well-formed documents
5183 * need not declare any of the following entities: amp, lt,
5184 * gt, apos, quot.
5185 * The declaration of a parameter entity must precede any
5186 * reference to it.
5187 * Similarly, the declaration of a general entity must
5188 * precede any reference to it which appears in a default
5189 * value in an attribute-list declaration. Note that if
5190 * entities are declared in the external subset or in
5191 * external parameter entities, a non-validating processor
5192 * is not obligated to read and process their declarations;
5193 * for such documents, the rule that an entity must be
5194 * declared is a well-formedness constraint only if
5195 * standalone='yes'.
5196 */
5197 if (ent == NULL) {
5198 if ((ctxt->standalone == 1) ||
5199 ((ctxt->hasExternalSubset == 0) &&
5200 (ctxt->hasPErefs == 0))) {
5201 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5203 ctxt->sax->error(ctxt->userData,
5204 "Entity '%s' not defined\n", name);
5205 ctxt->wellFormed = 0;
5206 ctxt->disableSAX = 1;
5207 } else {
5208 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5209 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5210 ctxt->sax->warning(ctxt->userData,
5211 "Entity '%s' not defined\n", name);
5212 }
5213 }
5214
5215 /*
5216 * [ WFC: Parsed Entity ]
5217 * An entity reference must not contain the name of an
5218 * unparsed entity
5219 */
5220 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5221 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5223 ctxt->sax->error(ctxt->userData,
5224 "Entity reference to unparsed entity %s\n", name);
5225 ctxt->wellFormed = 0;
5226 ctxt->disableSAX = 1;
5227 }
5228
5229 /*
5230 * [ WFC: No External Entity References ]
5231 * Attribute values cannot contain direct or indirect
5232 * entity references to external entities.
5233 */
5234 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5235 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5236 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5238 ctxt->sax->error(ctxt->userData,
5239 "Attribute references external entity '%s'\n", name);
5240 ctxt->wellFormed = 0;
5241 ctxt->disableSAX = 1;
5242 }
5243 /*
5244 * [ WFC: No < in Attribute Values ]
5245 * The replacement text of any entity referred to directly or
5246 * indirectly in an attribute value (other than "&lt;") must
5247 * not contain a <.
5248 */
5249 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5250 (ent != NULL) &&
5251 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5252 (ent->content != NULL) &&
5253 (xmlStrchr(ent->content, '<'))) {
5254 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5256 ctxt->sax->error(ctxt->userData,
5257 "'<' in entity '%s' is not allowed in attributes values\n", name);
5258 ctxt->wellFormed = 0;
5259 ctxt->disableSAX = 1;
5260 }
5261
5262 /*
5263 * Internal check, no parameter entities here ...
5264 */
5265 else {
5266 switch (ent->etype) {
5267 case XML_INTERNAL_PARAMETER_ENTITY:
5268 case XML_EXTERNAL_PARAMETER_ENTITY:
5269 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5271 ctxt->sax->error(ctxt->userData,
5272 "Attempt to reference the parameter entity '%s'\n", name);
5273 ctxt->wellFormed = 0;
5274 ctxt->disableSAX = 1;
5275 break;
5276 default:
5277 break;
5278 }
5279 }
5280
5281 /*
5282 * [ WFC: No Recursion ]
5283 * A parsed entity must not contain a recursive reference
5284 * to itself, either directly or indirectly.
5285 * Done somewhere else
5286 */
5287
5288 } else {
5289 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5290 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5291 ctxt->sax->error(ctxt->userData,
5292 "xmlParseEntityRef: expecting ';'\n");
5293 ctxt->wellFormed = 0;
5294 ctxt->disableSAX = 1;
5295 }
5296 xmlFree(name);
5297 }
5298 }
5299 return(ent);
5300}
5301
5302/**
5303 * xmlParseStringEntityRef:
5304 * @ctxt: an XML parser context
5305 * @str: a pointer to an index in the string
5306 *
5307 * parse ENTITY references declarations, but this version parses it from
5308 * a string value.
5309 *
5310 * [68] EntityRef ::= '&' Name ';'
5311 *
5312 * [ WFC: Entity Declared ]
5313 * In a document without any DTD, a document with only an internal DTD
5314 * subset which contains no parameter entity references, or a document
5315 * with "standalone='yes'", the Name given in the entity reference
5316 * must match that in an entity declaration, except that well-formed
5317 * documents need not declare any of the following entities: amp, lt,
5318 * gt, apos, quot. The declaration of a parameter entity must precede
5319 * any reference to it. Similarly, the declaration of a general entity
5320 * must precede any reference to it which appears in a default value in an
5321 * attribute-list declaration. Note that if entities are declared in the
5322 * external subset or in external parameter entities, a non-validating
5323 * processor is not obligated to read and process their declarations;
5324 * for such documents, the rule that an entity must be declared is a
5325 * well-formedness constraint only if standalone='yes'.
5326 *
5327 * [ WFC: Parsed Entity ]
5328 * An entity reference must not contain the name of an unparsed entity
5329 *
5330 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5331 * is updated to the current location in the string.
5332 */
5333xmlEntityPtr
5334xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5335 xmlChar *name;
5336 const xmlChar *ptr;
5337 xmlChar cur;
5338 xmlEntityPtr ent = NULL;
5339
5340 if ((str == NULL) || (*str == NULL))
5341 return(NULL);
5342 ptr = *str;
5343 cur = *ptr;
5344 if (cur == '&') {
5345 ptr++;
5346 cur = *ptr;
5347 name = xmlParseStringName(ctxt, &ptr);
5348 if (name == NULL) {
5349 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5351 ctxt->sax->error(ctxt->userData,
5352 "xmlParseEntityRef: no name\n");
5353 ctxt->wellFormed = 0;
5354 ctxt->disableSAX = 1;
5355 } else {
5356 if (*ptr == ';') {
5357 ptr++;
5358 /*
5359 * Ask first SAX for entity resolution, otherwise try the
5360 * predefined set.
5361 */
5362 if (ctxt->sax != NULL) {
5363 if (ctxt->sax->getEntity != NULL)
5364 ent = ctxt->sax->getEntity(ctxt->userData, name);
5365 if (ent == NULL)
5366 ent = xmlGetPredefinedEntity(name);
5367 }
5368 /*
5369 * [ WFC: Entity Declared ]
5370 * In a document without any DTD, a document with only an
5371 * internal DTD subset which contains no parameter entity
5372 * references, or a document with "standalone='yes'", the
5373 * Name given in the entity reference must match that in an
5374 * entity declaration, except that well-formed documents
5375 * need not declare any of the following entities: amp, lt,
5376 * gt, apos, quot.
5377 * The declaration of a parameter entity must precede any
5378 * reference to it.
5379 * Similarly, the declaration of a general entity must
5380 * precede any reference to it which appears in a default
5381 * value in an attribute-list declaration. Note that if
5382 * entities are declared in the external subset or in
5383 * external parameter entities, a non-validating processor
5384 * is not obligated to read and process their declarations;
5385 * for such documents, the rule that an entity must be
5386 * declared is a well-formedness constraint only if
5387 * standalone='yes'.
5388 */
5389 if (ent == NULL) {
5390 if ((ctxt->standalone == 1) ||
5391 ((ctxt->hasExternalSubset == 0) &&
5392 (ctxt->hasPErefs == 0))) {
5393 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5394 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5395 ctxt->sax->error(ctxt->userData,
5396 "Entity '%s' not defined\n", name);
5397 ctxt->wellFormed = 0;
5398 ctxt->disableSAX = 1;
5399 } else {
5400 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5401 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5402 ctxt->sax->warning(ctxt->userData,
5403 "Entity '%s' not defined\n", name);
5404 }
5405 }
5406
5407 /*
5408 * [ WFC: Parsed Entity ]
5409 * An entity reference must not contain the name of an
5410 * unparsed entity
5411 */
5412 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5413 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5415 ctxt->sax->error(ctxt->userData,
5416 "Entity reference to unparsed entity %s\n", name);
5417 ctxt->wellFormed = 0;
5418 ctxt->disableSAX = 1;
5419 }
5420
5421 /*
5422 * [ WFC: No External Entity References ]
5423 * Attribute values cannot contain direct or indirect
5424 * entity references to external entities.
5425 */
5426 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5427 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5428 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5430 ctxt->sax->error(ctxt->userData,
5431 "Attribute references external entity '%s'\n", name);
5432 ctxt->wellFormed = 0;
5433 ctxt->disableSAX = 1;
5434 }
5435 /*
5436 * [ WFC: No < in Attribute Values ]
5437 * The replacement text of any entity referred to directly or
5438 * indirectly in an attribute value (other than "&lt;") must
5439 * not contain a <.
5440 */
5441 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5442 (ent != NULL) &&
5443 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5444 (ent->content != NULL) &&
5445 (xmlStrchr(ent->content, '<'))) {
5446 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5448 ctxt->sax->error(ctxt->userData,
5449 "'<' in entity '%s' is not allowed in attributes values\n", name);
5450 ctxt->wellFormed = 0;
5451 ctxt->disableSAX = 1;
5452 }
5453
5454 /*
5455 * Internal check, no parameter entities here ...
5456 */
5457 else {
5458 switch (ent->etype) {
5459 case XML_INTERNAL_PARAMETER_ENTITY:
5460 case XML_EXTERNAL_PARAMETER_ENTITY:
5461 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5463 ctxt->sax->error(ctxt->userData,
5464 "Attempt to reference the parameter entity '%s'\n", name);
5465 ctxt->wellFormed = 0;
5466 ctxt->disableSAX = 1;
5467 break;
5468 default:
5469 break;
5470 }
5471 }
5472
5473 /*
5474 * [ WFC: No Recursion ]
5475 * A parsed entity must not contain a recursive reference
5476 * to itself, either directly or indirectly.
5477 * Done somewhwere else
5478 */
5479
5480 } else {
5481 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5483 ctxt->sax->error(ctxt->userData,
5484 "xmlParseEntityRef: expecting ';'\n");
5485 ctxt->wellFormed = 0;
5486 ctxt->disableSAX = 1;
5487 }
5488 xmlFree(name);
5489 }
5490 }
5491 *str = ptr;
5492 return(ent);
5493}
5494
5495/**
5496 * xmlParsePEReference:
5497 * @ctxt: an XML parser context
5498 *
5499 * parse PEReference declarations
5500 * The entity content is handled directly by pushing it's content as
5501 * a new input stream.
5502 *
5503 * [69] PEReference ::= '%' Name ';'
5504 *
5505 * [ WFC: No Recursion ]
5506 * A parsed entity must not contain a recursive
5507 * reference to itself, either directly or indirectly.
5508 *
5509 * [ WFC: Entity Declared ]
5510 * In a document without any DTD, a document with only an internal DTD
5511 * subset which contains no parameter entity references, or a document
5512 * with "standalone='yes'", ... ... The declaration of a parameter
5513 * entity must precede any reference to it...
5514 *
5515 * [ VC: Entity Declared ]
5516 * In a document with an external subset or external parameter entities
5517 * with "standalone='no'", ... ... The declaration of a parameter entity
5518 * must precede any reference to it...
5519 *
5520 * [ WFC: In DTD ]
5521 * Parameter-entity references may only appear in the DTD.
5522 * NOTE: misleading but this is handled.
5523 */
5524void
5525xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5526 xmlChar *name;
5527 xmlEntityPtr entity = NULL;
5528 xmlParserInputPtr input;
5529
5530 if (RAW == '%') {
5531 NEXT;
Daniel Veillard29631a82001-03-05 09:49:20 +00005532 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005533 if (name == NULL) {
5534 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5536 ctxt->sax->error(ctxt->userData,
5537 "xmlParsePEReference: no name\n");
5538 ctxt->wellFormed = 0;
5539 ctxt->disableSAX = 1;
5540 } else {
5541 if (RAW == ';') {
5542 NEXT;
5543 if ((ctxt->sax != NULL) &&
5544 (ctxt->sax->getParameterEntity != NULL))
5545 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5546 name);
5547 if (entity == NULL) {
5548 /*
5549 * [ WFC: Entity Declared ]
5550 * In a document without any DTD, a document with only an
5551 * internal DTD subset which contains no parameter entity
5552 * references, or a document with "standalone='yes'", ...
5553 * ... The declaration of a parameter entity must precede
5554 * any reference to it...
5555 */
5556 if ((ctxt->standalone == 1) ||
5557 ((ctxt->hasExternalSubset == 0) &&
5558 (ctxt->hasPErefs == 0))) {
5559 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5560 if ((!ctxt->disableSAX) &&
5561 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5562 ctxt->sax->error(ctxt->userData,
5563 "PEReference: %%%s; not found\n", name);
5564 ctxt->wellFormed = 0;
5565 ctxt->disableSAX = 1;
5566 } else {
5567 /*
5568 * [ VC: Entity Declared ]
5569 * In a document with an external subset or external
5570 * parameter entities with "standalone='no'", ...
5571 * ... The declaration of a parameter entity must precede
5572 * any reference to it...
5573 */
5574 if ((!ctxt->disableSAX) &&
5575 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5576 ctxt->sax->warning(ctxt->userData,
5577 "PEReference: %%%s; not found\n", name);
5578 ctxt->valid = 0;
5579 }
5580 } else {
5581 /*
5582 * Internal checking in case the entity quest barfed
5583 */
5584 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5585 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5586 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5587 ctxt->sax->warning(ctxt->userData,
5588 "Internal: %%%s; is not a parameter entity\n", name);
5589 } else {
5590 /*
5591 * TODO !!!
5592 * handle the extra spaces added before and after
5593 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5594 */
5595 input = xmlNewEntityInputStream(ctxt, entity);
5596 xmlPushInput(ctxt, input);
5597 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5598 (RAW == '<') && (NXT(1) == '?') &&
5599 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5600 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5601 xmlParseTextDecl(ctxt);
5602 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5603 /*
5604 * The XML REC instructs us to stop parsing
5605 * right here
5606 */
5607 ctxt->instate = XML_PARSER_EOF;
5608 xmlFree(name);
5609 return;
5610 }
5611 }
5612 if (ctxt->token == 0)
5613 ctxt->token = ' ';
5614 }
5615 }
5616 ctxt->hasPErefs = 1;
5617 } else {
5618 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5620 ctxt->sax->error(ctxt->userData,
5621 "xmlParsePEReference: expecting ';'\n");
5622 ctxt->wellFormed = 0;
5623 ctxt->disableSAX = 1;
5624 }
5625 xmlFree(name);
5626 }
5627 }
5628}
5629
5630/**
5631 * xmlParseStringPEReference:
5632 * @ctxt: an XML parser context
5633 * @str: a pointer to an index in the string
5634 *
5635 * parse PEReference declarations
5636 *
5637 * [69] PEReference ::= '%' Name ';'
5638 *
5639 * [ WFC: No Recursion ]
5640 * A parsed entity must not contain a recursive
5641 * reference to itself, either directly or indirectly.
5642 *
5643 * [ WFC: Entity Declared ]
5644 * In a document without any DTD, a document with only an internal DTD
5645 * subset which contains no parameter entity references, or a document
5646 * with "standalone='yes'", ... ... The declaration of a parameter
5647 * entity must precede any reference to it...
5648 *
5649 * [ VC: Entity Declared ]
5650 * In a document with an external subset or external parameter entities
5651 * with "standalone='no'", ... ... The declaration of a parameter entity
5652 * must precede any reference to it...
5653 *
5654 * [ WFC: In DTD ]
5655 * Parameter-entity references may only appear in the DTD.
5656 * NOTE: misleading but this is handled.
5657 *
5658 * Returns the string of the entity content.
5659 * str is updated to the current value of the index
5660 */
5661xmlEntityPtr
5662xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5663 const xmlChar *ptr;
5664 xmlChar cur;
5665 xmlChar *name;
5666 xmlEntityPtr entity = NULL;
5667
5668 if ((str == NULL) || (*str == NULL)) return(NULL);
5669 ptr = *str;
5670 cur = *ptr;
5671 if (cur == '%') {
5672 ptr++;
5673 cur = *ptr;
5674 name = xmlParseStringName(ctxt, &ptr);
5675 if (name == NULL) {
5676 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5678 ctxt->sax->error(ctxt->userData,
5679 "xmlParseStringPEReference: no name\n");
5680 ctxt->wellFormed = 0;
5681 ctxt->disableSAX = 1;
5682 } else {
5683 cur = *ptr;
5684 if (cur == ';') {
5685 ptr++;
5686 cur = *ptr;
5687 if ((ctxt->sax != NULL) &&
5688 (ctxt->sax->getParameterEntity != NULL))
5689 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5690 name);
5691 if (entity == NULL) {
5692 /*
5693 * [ WFC: Entity Declared ]
5694 * In a document without any DTD, a document with only an
5695 * internal DTD subset which contains no parameter entity
5696 * references, or a document with "standalone='yes'", ...
5697 * ... The declaration of a parameter entity must precede
5698 * any reference to it...
5699 */
5700 if ((ctxt->standalone == 1) ||
5701 ((ctxt->hasExternalSubset == 0) &&
5702 (ctxt->hasPErefs == 0))) {
5703 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5705 ctxt->sax->error(ctxt->userData,
5706 "PEReference: %%%s; not found\n", name);
5707 ctxt->wellFormed = 0;
5708 ctxt->disableSAX = 1;
5709 } else {
5710 /*
5711 * [ VC: Entity Declared ]
5712 * In a document with an external subset or external
5713 * parameter entities with "standalone='no'", ...
5714 * ... The declaration of a parameter entity must
5715 * precede any reference to it...
5716 */
5717 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5718 ctxt->sax->warning(ctxt->userData,
5719 "PEReference: %%%s; not found\n", name);
5720 ctxt->valid = 0;
5721 }
5722 } else {
5723 /*
5724 * Internal checking in case the entity quest barfed
5725 */
5726 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5727 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5728 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5729 ctxt->sax->warning(ctxt->userData,
5730 "Internal: %%%s; is not a parameter entity\n", name);
5731 }
5732 }
5733 ctxt->hasPErefs = 1;
5734 } else {
5735 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5737 ctxt->sax->error(ctxt->userData,
5738 "xmlParseStringPEReference: expecting ';'\n");
5739 ctxt->wellFormed = 0;
5740 ctxt->disableSAX = 1;
5741 }
5742 xmlFree(name);
5743 }
5744 }
5745 *str = ptr;
5746 return(entity);
5747}
5748
5749/**
5750 * xmlParseDocTypeDecl:
5751 * @ctxt: an XML parser context
5752 *
5753 * parse a DOCTYPE declaration
5754 *
5755 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5756 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5757 *
5758 * [ VC: Root Element Type ]
5759 * The Name in the document type declaration must match the element
5760 * type of the root element.
5761 */
5762
5763void
5764xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5765 xmlChar *name = NULL;
5766 xmlChar *ExternalID = NULL;
5767 xmlChar *URI = NULL;
5768
5769 /*
5770 * We know that '<!DOCTYPE' has been detected.
5771 */
5772 SKIP(9);
5773
5774 SKIP_BLANKS;
5775
5776 /*
5777 * Parse the DOCTYPE name.
5778 */
5779 name = xmlParseName(ctxt);
5780 if (name == NULL) {
5781 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5783 ctxt->sax->error(ctxt->userData,
5784 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5785 ctxt->wellFormed = 0;
5786 ctxt->disableSAX = 1;
5787 }
5788 ctxt->intSubName = name;
5789
5790 SKIP_BLANKS;
5791
5792 /*
5793 * Check for SystemID and ExternalID
5794 */
5795 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5796
5797 if ((URI != NULL) || (ExternalID != NULL)) {
5798 ctxt->hasExternalSubset = 1;
5799 }
5800 ctxt->extSubURI = URI;
5801 ctxt->extSubSystem = ExternalID;
5802
5803 SKIP_BLANKS;
5804
5805 /*
5806 * Create and update the internal subset.
5807 */
5808 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5809 (!ctxt->disableSAX))
5810 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5811
5812 /*
5813 * Is there any internal subset declarations ?
5814 * they are handled separately in xmlParseInternalSubset()
5815 */
5816 if (RAW == '[')
5817 return;
5818
5819 /*
5820 * We should be at the end of the DOCTYPE declaration.
5821 */
5822 if (RAW != '>') {
5823 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5825 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5826 ctxt->wellFormed = 0;
5827 ctxt->disableSAX = 1;
5828 }
5829 NEXT;
5830}
5831
5832/**
5833 * xmlParseInternalsubset:
5834 * @ctxt: an XML parser context
5835 *
5836 * parse the internal subset declaration
5837 *
5838 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5839 */
5840
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005841static void
Owen Taylor3473f882001-02-23 17:55:21 +00005842xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5843 /*
5844 * Is there any DTD definition ?
5845 */
5846 if (RAW == '[') {
5847 ctxt->instate = XML_PARSER_DTD;
5848 NEXT;
5849 /*
5850 * Parse the succession of Markup declarations and
5851 * PEReferences.
5852 * Subsequence (markupdecl | PEReference | S)*
5853 */
5854 while (RAW != ']') {
5855 const xmlChar *check = CUR_PTR;
5856 int cons = ctxt->input->consumed;
5857
5858 SKIP_BLANKS;
5859 xmlParseMarkupDecl(ctxt);
5860 xmlParsePEReference(ctxt);
5861
5862 /*
5863 * Pop-up of finished entities.
5864 */
5865 while ((RAW == 0) && (ctxt->inputNr > 1))
5866 xmlPopInput(ctxt);
5867
5868 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5869 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5871 ctxt->sax->error(ctxt->userData,
5872 "xmlParseInternalSubset: error detected in Markup declaration\n");
5873 ctxt->wellFormed = 0;
5874 ctxt->disableSAX = 1;
5875 break;
5876 }
5877 }
5878 if (RAW == ']') {
5879 NEXT;
5880 SKIP_BLANKS;
5881 }
5882 }
5883
5884 /*
5885 * We should be at the end of the DOCTYPE declaration.
5886 */
5887 if (RAW != '>') {
5888 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5890 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5891 ctxt->wellFormed = 0;
5892 ctxt->disableSAX = 1;
5893 }
5894 NEXT;
5895}
5896
5897/**
5898 * xmlParseAttribute:
5899 * @ctxt: an XML parser context
5900 * @value: a xmlChar ** used to store the value of the attribute
5901 *
5902 * parse an attribute
5903 *
5904 * [41] Attribute ::= Name Eq AttValue
5905 *
5906 * [ WFC: No External Entity References ]
5907 * Attribute values cannot contain direct or indirect entity references
5908 * to external entities.
5909 *
5910 * [ WFC: No < in Attribute Values ]
5911 * The replacement text of any entity referred to directly or indirectly in
5912 * an attribute value (other than "&lt;") must not contain a <.
5913 *
5914 * [ VC: Attribute Value Type ]
5915 * The attribute must have been declared; the value must be of the type
5916 * declared for it.
5917 *
5918 * [25] Eq ::= S? '=' S?
5919 *
5920 * With namespace:
5921 *
5922 * [NS 11] Attribute ::= QName Eq AttValue
5923 *
5924 * Also the case QName == xmlns:??? is handled independently as a namespace
5925 * definition.
5926 *
5927 * Returns the attribute name, and the value in *value.
5928 */
5929
5930xmlChar *
5931xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5932 xmlChar *name, *val;
5933
5934 *value = NULL;
5935 name = xmlParseName(ctxt);
5936 if (name == NULL) {
5937 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5939 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5940 ctxt->wellFormed = 0;
5941 ctxt->disableSAX = 1;
5942 return(NULL);
5943 }
5944
5945 /*
5946 * read the value
5947 */
5948 SKIP_BLANKS;
5949 if (RAW == '=') {
5950 NEXT;
5951 SKIP_BLANKS;
5952 val = xmlParseAttValue(ctxt);
5953 ctxt->instate = XML_PARSER_CONTENT;
5954 } else {
5955 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5957 ctxt->sax->error(ctxt->userData,
5958 "Specification mandate value for attribute %s\n", name);
5959 ctxt->wellFormed = 0;
5960 ctxt->disableSAX = 1;
5961 xmlFree(name);
5962 return(NULL);
5963 }
5964
5965 /*
5966 * Check that xml:lang conforms to the specification
5967 * No more registered as an error, just generate a warning now
5968 * since this was deprecated in XML second edition
5969 */
5970 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5971 if (!xmlCheckLanguageID(val)) {
5972 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5973 ctxt->sax->warning(ctxt->userData,
5974 "Malformed value for xml:lang : %s\n", val);
5975 }
5976 }
5977
5978 /*
5979 * Check that xml:space conforms to the specification
5980 */
5981 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5982 if (xmlStrEqual(val, BAD_CAST "default"))
5983 *(ctxt->space) = 0;
5984 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5985 *(ctxt->space) = 1;
5986 else {
5987 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5988 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5989 ctxt->sax->error(ctxt->userData,
5990"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5991 val);
5992 ctxt->wellFormed = 0;
5993 ctxt->disableSAX = 1;
5994 }
5995 }
5996
5997 *value = val;
5998 return(name);
5999}
6000
6001/**
6002 * xmlParseStartTag:
6003 * @ctxt: an XML parser context
6004 *
6005 * parse a start of tag either for rule element or
6006 * EmptyElement. In both case we don't parse the tag closing chars.
6007 *
6008 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6009 *
6010 * [ WFC: Unique Att Spec ]
6011 * No attribute name may appear more than once in the same start-tag or
6012 * empty-element tag.
6013 *
6014 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6015 *
6016 * [ WFC: Unique Att Spec ]
6017 * No attribute name may appear more than once in the same start-tag or
6018 * empty-element tag.
6019 *
6020 * With namespace:
6021 *
6022 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6023 *
6024 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6025 *
6026 * Returns the element name parsed
6027 */
6028
6029xmlChar *
6030xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6031 xmlChar *name;
6032 xmlChar *attname;
6033 xmlChar *attvalue;
6034 const xmlChar **atts = NULL;
6035 int nbatts = 0;
6036 int maxatts = 0;
6037 int i;
6038
6039 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006040 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006041
6042 name = xmlParseName(ctxt);
6043 if (name == NULL) {
6044 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6046 ctxt->sax->error(ctxt->userData,
6047 "xmlParseStartTag: invalid element name\n");
6048 ctxt->wellFormed = 0;
6049 ctxt->disableSAX = 1;
6050 return(NULL);
6051 }
6052
6053 /*
6054 * Now parse the attributes, it ends up with the ending
6055 *
6056 * (S Attribute)* S?
6057 */
6058 SKIP_BLANKS;
6059 GROW;
6060
Daniel Veillard21a0f912001-02-25 19:54:14 +00006061 while ((RAW != '>') &&
6062 ((RAW != '/') || (NXT(1) != '>')) &&
6063 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006064 const xmlChar *q = CUR_PTR;
6065 int cons = ctxt->input->consumed;
6066
6067 attname = xmlParseAttribute(ctxt, &attvalue);
6068 if ((attname != NULL) && (attvalue != NULL)) {
6069 /*
6070 * [ WFC: Unique Att Spec ]
6071 * No attribute name may appear more than once in the same
6072 * start-tag or empty-element tag.
6073 */
6074 for (i = 0; i < nbatts;i += 2) {
6075 if (xmlStrEqual(atts[i], attname)) {
6076 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6078 ctxt->sax->error(ctxt->userData,
6079 "Attribute %s redefined\n",
6080 attname);
6081 ctxt->wellFormed = 0;
6082 ctxt->disableSAX = 1;
6083 xmlFree(attname);
6084 xmlFree(attvalue);
6085 goto failed;
6086 }
6087 }
6088
6089 /*
6090 * Add the pair to atts
6091 */
6092 if (atts == NULL) {
6093 maxatts = 10;
6094 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6095 if (atts == NULL) {
6096 xmlGenericError(xmlGenericErrorContext,
6097 "malloc of %ld byte failed\n",
6098 maxatts * (long)sizeof(xmlChar *));
6099 return(NULL);
6100 }
6101 } else if (nbatts + 4 > maxatts) {
6102 maxatts *= 2;
6103 atts = (const xmlChar **) xmlRealloc((void *) atts,
6104 maxatts * sizeof(xmlChar *));
6105 if (atts == NULL) {
6106 xmlGenericError(xmlGenericErrorContext,
6107 "realloc of %ld byte failed\n",
6108 maxatts * (long)sizeof(xmlChar *));
6109 return(NULL);
6110 }
6111 }
6112 atts[nbatts++] = attname;
6113 atts[nbatts++] = attvalue;
6114 atts[nbatts] = NULL;
6115 atts[nbatts + 1] = NULL;
6116 } else {
6117 if (attname != NULL)
6118 xmlFree(attname);
6119 if (attvalue != NULL)
6120 xmlFree(attvalue);
6121 }
6122
6123failed:
6124
6125 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6126 break;
6127 if (!IS_BLANK(RAW)) {
6128 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6130 ctxt->sax->error(ctxt->userData,
6131 "attributes construct error\n");
6132 ctxt->wellFormed = 0;
6133 ctxt->disableSAX = 1;
6134 }
6135 SKIP_BLANKS;
6136 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6137 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6139 ctxt->sax->error(ctxt->userData,
6140 "xmlParseStartTag: problem parsing attributes\n");
6141 ctxt->wellFormed = 0;
6142 ctxt->disableSAX = 1;
6143 break;
6144 }
6145 GROW;
6146 }
6147
6148 /*
6149 * SAX: Start of Element !
6150 */
6151 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6152 (!ctxt->disableSAX))
6153 ctxt->sax->startElement(ctxt->userData, name, atts);
6154
6155 if (atts != NULL) {
6156 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6157 xmlFree((void *) atts);
6158 }
6159 return(name);
6160}
6161
6162/**
6163 * xmlParseEndTag:
6164 * @ctxt: an XML parser context
6165 *
6166 * parse an end of tag
6167 *
6168 * [42] ETag ::= '</' Name S? '>'
6169 *
6170 * With namespace
6171 *
6172 * [NS 9] ETag ::= '</' QName S? '>'
6173 */
6174
6175void
6176xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6177 xmlChar *name;
6178 xmlChar *oldname;
6179
6180 GROW;
6181 if ((RAW != '<') || (NXT(1) != '/')) {
6182 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6184 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6185 ctxt->wellFormed = 0;
6186 ctxt->disableSAX = 1;
6187 return;
6188 }
6189 SKIP(2);
6190
6191 name = xmlParseName(ctxt);
6192
6193 /*
6194 * We should definitely be at the ending "S? '>'" part
6195 */
6196 GROW;
6197 SKIP_BLANKS;
6198 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6199 ctxt->errNo = XML_ERR_GT_REQUIRED;
6200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6201 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6202 ctxt->wellFormed = 0;
6203 ctxt->disableSAX = 1;
6204 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006205 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006206
6207 /*
6208 * [ WFC: Element Type Match ]
6209 * The Name in an element's end-tag must match the element type in the
6210 * start-tag.
6211 *
6212 */
6213 if ((name == NULL) || (ctxt->name == NULL) ||
6214 (!xmlStrEqual(name, ctxt->name))) {
6215 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6216 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6217 if ((name != NULL) && (ctxt->name != NULL)) {
6218 ctxt->sax->error(ctxt->userData,
6219 "Opening and ending tag mismatch: %s and %s\n",
6220 ctxt->name, name);
6221 } else if (ctxt->name != NULL) {
6222 ctxt->sax->error(ctxt->userData,
6223 "Ending tag eror for: %s\n", ctxt->name);
6224 } else {
6225 ctxt->sax->error(ctxt->userData,
6226 "Ending tag error: internal error ???\n");
6227 }
6228
6229 }
6230 ctxt->wellFormed = 0;
6231 ctxt->disableSAX = 1;
6232 }
6233
6234 /*
6235 * SAX: End of Tag
6236 */
6237 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6238 (!ctxt->disableSAX))
6239 ctxt->sax->endElement(ctxt->userData, name);
6240
6241 if (name != NULL)
6242 xmlFree(name);
6243 oldname = namePop(ctxt);
6244 spacePop(ctxt);
6245 if (oldname != NULL) {
6246#ifdef DEBUG_STACK
6247 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6248#endif
6249 xmlFree(oldname);
6250 }
6251 return;
6252}
6253
6254/**
6255 * xmlParseCDSect:
6256 * @ctxt: an XML parser context
6257 *
6258 * Parse escaped pure raw content.
6259 *
6260 * [18] CDSect ::= CDStart CData CDEnd
6261 *
6262 * [19] CDStart ::= '<![CDATA['
6263 *
6264 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6265 *
6266 * [21] CDEnd ::= ']]>'
6267 */
6268void
6269xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6270 xmlChar *buf = NULL;
6271 int len = 0;
6272 int size = XML_PARSER_BUFFER_SIZE;
6273 int r, rl;
6274 int s, sl;
6275 int cur, l;
6276 int count = 0;
6277
6278 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6279 (NXT(2) == '[') && (NXT(3) == 'C') &&
6280 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6281 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6282 (NXT(8) == '[')) {
6283 SKIP(9);
6284 } else
6285 return;
6286
6287 ctxt->instate = XML_PARSER_CDATA_SECTION;
6288 r = CUR_CHAR(rl);
6289 if (!IS_CHAR(r)) {
6290 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6292 ctxt->sax->error(ctxt->userData,
6293 "CData section not finished\n");
6294 ctxt->wellFormed = 0;
6295 ctxt->disableSAX = 1;
6296 ctxt->instate = XML_PARSER_CONTENT;
6297 return;
6298 }
6299 NEXTL(rl);
6300 s = CUR_CHAR(sl);
6301 if (!IS_CHAR(s)) {
6302 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6304 ctxt->sax->error(ctxt->userData,
6305 "CData section not finished\n");
6306 ctxt->wellFormed = 0;
6307 ctxt->disableSAX = 1;
6308 ctxt->instate = XML_PARSER_CONTENT;
6309 return;
6310 }
6311 NEXTL(sl);
6312 cur = CUR_CHAR(l);
6313 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6314 if (buf == NULL) {
6315 xmlGenericError(xmlGenericErrorContext,
6316 "malloc of %d byte failed\n", size);
6317 return;
6318 }
6319 while (IS_CHAR(cur) &&
6320 ((r != ']') || (s != ']') || (cur != '>'))) {
6321 if (len + 5 >= size) {
6322 size *= 2;
6323 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6324 if (buf == NULL) {
6325 xmlGenericError(xmlGenericErrorContext,
6326 "realloc of %d byte failed\n", size);
6327 return;
6328 }
6329 }
6330 COPY_BUF(rl,buf,len,r);
6331 r = s;
6332 rl = sl;
6333 s = cur;
6334 sl = l;
6335 count++;
6336 if (count > 50) {
6337 GROW;
6338 count = 0;
6339 }
6340 NEXTL(l);
6341 cur = CUR_CHAR(l);
6342 }
6343 buf[len] = 0;
6344 ctxt->instate = XML_PARSER_CONTENT;
6345 if (cur != '>') {
6346 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6348 ctxt->sax->error(ctxt->userData,
6349 "CData section not finished\n%.50s\n", buf);
6350 ctxt->wellFormed = 0;
6351 ctxt->disableSAX = 1;
6352 xmlFree(buf);
6353 return;
6354 }
6355 NEXTL(l);
6356
6357 /*
6358 * Ok the buffer is to be consumed as cdata.
6359 */
6360 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6361 if (ctxt->sax->cdataBlock != NULL)
6362 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6363 }
6364 xmlFree(buf);
6365}
6366
6367/**
6368 * xmlParseContent:
6369 * @ctxt: an XML parser context
6370 *
6371 * Parse a content:
6372 *
6373 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6374 */
6375
6376void
6377xmlParseContent(xmlParserCtxtPtr ctxt) {
6378 GROW;
6379 while (((RAW != 0) || (ctxt->token != 0)) &&
6380 ((RAW != '<') || (NXT(1) != '/'))) {
6381 const xmlChar *test = CUR_PTR;
6382 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006383 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006384 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006385
6386 /*
6387 * Handle possible processed charrefs.
6388 */
6389 if (ctxt->token != 0) {
6390 xmlParseCharData(ctxt, 0);
6391 }
6392 /*
6393 * First case : a Processing Instruction.
6394 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006395 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006396 xmlParsePI(ctxt);
6397 }
6398
6399 /*
6400 * Second case : a CDSection
6401 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006402 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006403 (NXT(2) == '[') && (NXT(3) == 'C') &&
6404 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6405 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6406 (NXT(8) == '[')) {
6407 xmlParseCDSect(ctxt);
6408 }
6409
6410 /*
6411 * Third case : a comment
6412 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006413 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006414 (NXT(2) == '-') && (NXT(3) == '-')) {
6415 xmlParseComment(ctxt);
6416 ctxt->instate = XML_PARSER_CONTENT;
6417 }
6418
6419 /*
6420 * Fourth case : a sub-element.
6421 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006422 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006423 xmlParseElement(ctxt);
6424 }
6425
6426 /*
6427 * Fifth case : a reference. If if has not been resolved,
6428 * parsing returns it's Name, create the node
6429 */
6430
Daniel Veillard21a0f912001-02-25 19:54:14 +00006431 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006432 xmlParseReference(ctxt);
6433 }
6434
6435 /*
6436 * Last case, text. Note that References are handled directly.
6437 */
6438 else {
6439 xmlParseCharData(ctxt, 0);
6440 }
6441
6442 GROW;
6443 /*
6444 * Pop-up of finished entities.
6445 */
6446 while ((RAW == 0) && (ctxt->inputNr > 1))
6447 xmlPopInput(ctxt);
6448 SHRINK;
6449
6450 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6451 (tok == ctxt->token)) {
6452 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6454 ctxt->sax->error(ctxt->userData,
6455 "detected an error in element content\n");
6456 ctxt->wellFormed = 0;
6457 ctxt->disableSAX = 1;
6458 ctxt->instate = XML_PARSER_EOF;
6459 break;
6460 }
6461 }
6462}
6463
6464/**
6465 * xmlParseElement:
6466 * @ctxt: an XML parser context
6467 *
6468 * parse an XML element, this is highly recursive
6469 *
6470 * [39] element ::= EmptyElemTag | STag content ETag
6471 *
6472 * [ WFC: Element Type Match ]
6473 * The Name in an element's end-tag must match the element type in the
6474 * start-tag.
6475 *
6476 * [ VC: Element Valid ]
6477 * An element is valid if there is a declaration matching elementdecl
6478 * where the Name matches the element type and one of the following holds:
6479 * - The declaration matches EMPTY and the element has no content.
6480 * - The declaration matches children and the sequence of child elements
6481 * belongs to the language generated by the regular expression in the
6482 * content model, with optional white space (characters matching the
6483 * nonterminal S) between each pair of child elements.
6484 * - The declaration matches Mixed and the content consists of character
6485 * data and child elements whose types match names in the content model.
6486 * - The declaration matches ANY, and the types of any child elements have
6487 * been declared.
6488 */
6489
6490void
6491xmlParseElement(xmlParserCtxtPtr ctxt) {
6492 const xmlChar *openTag = CUR_PTR;
6493 xmlChar *name;
6494 xmlChar *oldname;
6495 xmlParserNodeInfo node_info;
6496 xmlNodePtr ret;
6497
6498 /* Capture start position */
6499 if (ctxt->record_info) {
6500 node_info.begin_pos = ctxt->input->consumed +
6501 (CUR_PTR - ctxt->input->base);
6502 node_info.begin_line = ctxt->input->line;
6503 }
6504
6505 if (ctxt->spaceNr == 0)
6506 spacePush(ctxt, -1);
6507 else
6508 spacePush(ctxt, *ctxt->space);
6509
6510 name = xmlParseStartTag(ctxt);
6511 if (name == NULL) {
6512 spacePop(ctxt);
6513 return;
6514 }
6515 namePush(ctxt, name);
6516 ret = ctxt->node;
6517
6518 /*
6519 * [ VC: Root Element Type ]
6520 * The Name in the document type declaration must match the element
6521 * type of the root element.
6522 */
6523 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6524 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6525 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6526
6527 /*
6528 * Check for an Empty Element.
6529 */
6530 if ((RAW == '/') && (NXT(1) == '>')) {
6531 SKIP(2);
6532 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6533 (!ctxt->disableSAX))
6534 ctxt->sax->endElement(ctxt->userData, name);
6535 oldname = namePop(ctxt);
6536 spacePop(ctxt);
6537 if (oldname != NULL) {
6538#ifdef DEBUG_STACK
6539 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6540#endif
6541 xmlFree(oldname);
6542 }
6543 if ( ret != NULL && ctxt->record_info ) {
6544 node_info.end_pos = ctxt->input->consumed +
6545 (CUR_PTR - ctxt->input->base);
6546 node_info.end_line = ctxt->input->line;
6547 node_info.node = ret;
6548 xmlParserAddNodeInfo(ctxt, &node_info);
6549 }
6550 return;
6551 }
6552 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006553 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006554 } else {
6555 ctxt->errNo = XML_ERR_GT_REQUIRED;
6556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6557 ctxt->sax->error(ctxt->userData,
6558 "Couldn't find end of Start Tag\n%.30s\n",
6559 openTag);
6560 ctxt->wellFormed = 0;
6561 ctxt->disableSAX = 1;
6562
6563 /*
6564 * end of parsing of this node.
6565 */
6566 nodePop(ctxt);
6567 oldname = namePop(ctxt);
6568 spacePop(ctxt);
6569 if (oldname != NULL) {
6570#ifdef DEBUG_STACK
6571 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6572#endif
6573 xmlFree(oldname);
6574 }
6575
6576 /*
6577 * Capture end position and add node
6578 */
6579 if ( ret != NULL && ctxt->record_info ) {
6580 node_info.end_pos = ctxt->input->consumed +
6581 (CUR_PTR - ctxt->input->base);
6582 node_info.end_line = ctxt->input->line;
6583 node_info.node = ret;
6584 xmlParserAddNodeInfo(ctxt, &node_info);
6585 }
6586 return;
6587 }
6588
6589 /*
6590 * Parse the content of the element:
6591 */
6592 xmlParseContent(ctxt);
6593 if (!IS_CHAR(RAW)) {
6594 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6596 ctxt->sax->error(ctxt->userData,
6597 "Premature end of data in tag %.30s\n", openTag);
6598 ctxt->wellFormed = 0;
6599 ctxt->disableSAX = 1;
6600
6601 /*
6602 * end of parsing of this node.
6603 */
6604 nodePop(ctxt);
6605 oldname = namePop(ctxt);
6606 spacePop(ctxt);
6607 if (oldname != NULL) {
6608#ifdef DEBUG_STACK
6609 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6610#endif
6611 xmlFree(oldname);
6612 }
6613 return;
6614 }
6615
6616 /*
6617 * parse the end of tag: '</' should be here.
6618 */
6619 xmlParseEndTag(ctxt);
6620
6621 /*
6622 * Capture end position and add node
6623 */
6624 if ( ret != NULL && ctxt->record_info ) {
6625 node_info.end_pos = ctxt->input->consumed +
6626 (CUR_PTR - ctxt->input->base);
6627 node_info.end_line = ctxt->input->line;
6628 node_info.node = ret;
6629 xmlParserAddNodeInfo(ctxt, &node_info);
6630 }
6631}
6632
6633/**
6634 * xmlParseVersionNum:
6635 * @ctxt: an XML parser context
6636 *
6637 * parse the XML version value.
6638 *
6639 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6640 *
6641 * Returns the string giving the XML version number, or NULL
6642 */
6643xmlChar *
6644xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6645 xmlChar *buf = NULL;
6646 int len = 0;
6647 int size = 10;
6648 xmlChar cur;
6649
6650 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6651 if (buf == NULL) {
6652 xmlGenericError(xmlGenericErrorContext,
6653 "malloc of %d byte failed\n", size);
6654 return(NULL);
6655 }
6656 cur = CUR;
6657 while (((cur >= 'a') && (cur <= 'z')) ||
6658 ((cur >= 'A') && (cur <= 'Z')) ||
6659 ((cur >= '0') && (cur <= '9')) ||
6660 (cur == '_') || (cur == '.') ||
6661 (cur == ':') || (cur == '-')) {
6662 if (len + 1 >= size) {
6663 size *= 2;
6664 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6665 if (buf == NULL) {
6666 xmlGenericError(xmlGenericErrorContext,
6667 "realloc of %d byte failed\n", size);
6668 return(NULL);
6669 }
6670 }
6671 buf[len++] = cur;
6672 NEXT;
6673 cur=CUR;
6674 }
6675 buf[len] = 0;
6676 return(buf);
6677}
6678
6679/**
6680 * xmlParseVersionInfo:
6681 * @ctxt: an XML parser context
6682 *
6683 * parse the XML version.
6684 *
6685 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6686 *
6687 * [25] Eq ::= S? '=' S?
6688 *
6689 * Returns the version string, e.g. "1.0"
6690 */
6691
6692xmlChar *
6693xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6694 xmlChar *version = NULL;
6695 const xmlChar *q;
6696
6697 if ((RAW == 'v') && (NXT(1) == 'e') &&
6698 (NXT(2) == 'r') && (NXT(3) == 's') &&
6699 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6700 (NXT(6) == 'n')) {
6701 SKIP(7);
6702 SKIP_BLANKS;
6703 if (RAW != '=') {
6704 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6706 ctxt->sax->error(ctxt->userData,
6707 "xmlParseVersionInfo : expected '='\n");
6708 ctxt->wellFormed = 0;
6709 ctxt->disableSAX = 1;
6710 return(NULL);
6711 }
6712 NEXT;
6713 SKIP_BLANKS;
6714 if (RAW == '"') {
6715 NEXT;
6716 q = CUR_PTR;
6717 version = xmlParseVersionNum(ctxt);
6718 if (RAW != '"') {
6719 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6721 ctxt->sax->error(ctxt->userData,
6722 "String not closed\n%.50s\n", q);
6723 ctxt->wellFormed = 0;
6724 ctxt->disableSAX = 1;
6725 } else
6726 NEXT;
6727 } else if (RAW == '\''){
6728 NEXT;
6729 q = CUR_PTR;
6730 version = xmlParseVersionNum(ctxt);
6731 if (RAW != '\'') {
6732 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6734 ctxt->sax->error(ctxt->userData,
6735 "String not closed\n%.50s\n", q);
6736 ctxt->wellFormed = 0;
6737 ctxt->disableSAX = 1;
6738 } else
6739 NEXT;
6740 } else {
6741 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6743 ctxt->sax->error(ctxt->userData,
6744 "xmlParseVersionInfo : expected ' or \"\n");
6745 ctxt->wellFormed = 0;
6746 ctxt->disableSAX = 1;
6747 }
6748 }
6749 return(version);
6750}
6751
6752/**
6753 * xmlParseEncName:
6754 * @ctxt: an XML parser context
6755 *
6756 * parse the XML encoding name
6757 *
6758 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6759 *
6760 * Returns the encoding name value or NULL
6761 */
6762xmlChar *
6763xmlParseEncName(xmlParserCtxtPtr ctxt) {
6764 xmlChar *buf = NULL;
6765 int len = 0;
6766 int size = 10;
6767 xmlChar cur;
6768
6769 cur = CUR;
6770 if (((cur >= 'a') && (cur <= 'z')) ||
6771 ((cur >= 'A') && (cur <= 'Z'))) {
6772 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6773 if (buf == NULL) {
6774 xmlGenericError(xmlGenericErrorContext,
6775 "malloc of %d byte failed\n", size);
6776 return(NULL);
6777 }
6778
6779 buf[len++] = cur;
6780 NEXT;
6781 cur = CUR;
6782 while (((cur >= 'a') && (cur <= 'z')) ||
6783 ((cur >= 'A') && (cur <= 'Z')) ||
6784 ((cur >= '0') && (cur <= '9')) ||
6785 (cur == '.') || (cur == '_') ||
6786 (cur == '-')) {
6787 if (len + 1 >= size) {
6788 size *= 2;
6789 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6790 if (buf == NULL) {
6791 xmlGenericError(xmlGenericErrorContext,
6792 "realloc of %d byte failed\n", size);
6793 return(NULL);
6794 }
6795 }
6796 buf[len++] = cur;
6797 NEXT;
6798 cur = CUR;
6799 if (cur == 0) {
6800 SHRINK;
6801 GROW;
6802 cur = CUR;
6803 }
6804 }
6805 buf[len] = 0;
6806 } else {
6807 ctxt->errNo = XML_ERR_ENCODING_NAME;
6808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6809 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6810 ctxt->wellFormed = 0;
6811 ctxt->disableSAX = 1;
6812 }
6813 return(buf);
6814}
6815
6816/**
6817 * xmlParseEncodingDecl:
6818 * @ctxt: an XML parser context
6819 *
6820 * parse the XML encoding declaration
6821 *
6822 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6823 *
6824 * this setups the conversion filters.
6825 *
6826 * Returns the encoding value or NULL
6827 */
6828
6829xmlChar *
6830xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6831 xmlChar *encoding = NULL;
6832 const xmlChar *q;
6833
6834 SKIP_BLANKS;
6835 if ((RAW == 'e') && (NXT(1) == 'n') &&
6836 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6837 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6838 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6839 SKIP(8);
6840 SKIP_BLANKS;
6841 if (RAW != '=') {
6842 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6844 ctxt->sax->error(ctxt->userData,
6845 "xmlParseEncodingDecl : expected '='\n");
6846 ctxt->wellFormed = 0;
6847 ctxt->disableSAX = 1;
6848 return(NULL);
6849 }
6850 NEXT;
6851 SKIP_BLANKS;
6852 if (RAW == '"') {
6853 NEXT;
6854 q = CUR_PTR;
6855 encoding = xmlParseEncName(ctxt);
6856 if (RAW != '"') {
6857 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6859 ctxt->sax->error(ctxt->userData,
6860 "String not closed\n%.50s\n", q);
6861 ctxt->wellFormed = 0;
6862 ctxt->disableSAX = 1;
6863 } else
6864 NEXT;
6865 } else if (RAW == '\''){
6866 NEXT;
6867 q = CUR_PTR;
6868 encoding = xmlParseEncName(ctxt);
6869 if (RAW != '\'') {
6870 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6872 ctxt->sax->error(ctxt->userData,
6873 "String not closed\n%.50s\n", q);
6874 ctxt->wellFormed = 0;
6875 ctxt->disableSAX = 1;
6876 } else
6877 NEXT;
6878 } else if (RAW == '"'){
6879 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6881 ctxt->sax->error(ctxt->userData,
6882 "xmlParseEncodingDecl : expected ' or \"\n");
6883 ctxt->wellFormed = 0;
6884 ctxt->disableSAX = 1;
6885 }
6886 if (encoding != NULL) {
6887 xmlCharEncoding enc;
6888 xmlCharEncodingHandlerPtr handler;
6889
6890 if (ctxt->input->encoding != NULL)
6891 xmlFree((xmlChar *) ctxt->input->encoding);
6892 ctxt->input->encoding = encoding;
6893
6894 enc = xmlParseCharEncoding((const char *) encoding);
6895 /*
6896 * registered set of known encodings
6897 */
6898 if (enc != XML_CHAR_ENCODING_ERROR) {
6899 xmlSwitchEncoding(ctxt, enc);
6900 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6901 xmlFree(encoding);
6902 return(NULL);
6903 }
6904 } else {
6905 /*
6906 * fallback for unknown encodings
6907 */
6908 handler = xmlFindCharEncodingHandler((const char *) encoding);
6909 if (handler != NULL) {
6910 xmlSwitchToEncoding(ctxt, handler);
6911 } else {
6912 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6914 ctxt->sax->error(ctxt->userData,
6915 "Unsupported encoding %s\n", encoding);
6916 return(NULL);
6917 }
6918 }
6919 }
6920 }
6921 return(encoding);
6922}
6923
6924/**
6925 * xmlParseSDDecl:
6926 * @ctxt: an XML parser context
6927 *
6928 * parse the XML standalone declaration
6929 *
6930 * [32] SDDecl ::= S 'standalone' Eq
6931 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6932 *
6933 * [ VC: Standalone Document Declaration ]
6934 * TODO The standalone document declaration must have the value "no"
6935 * if any external markup declarations contain declarations of:
6936 * - attributes with default values, if elements to which these
6937 * attributes apply appear in the document without specifications
6938 * of values for these attributes, or
6939 * - entities (other than amp, lt, gt, apos, quot), if references
6940 * to those entities appear in the document, or
6941 * - attributes with values subject to normalization, where the
6942 * attribute appears in the document with a value which will change
6943 * as a result of normalization, or
6944 * - element types with element content, if white space occurs directly
6945 * within any instance of those types.
6946 *
6947 * Returns 1 if standalone, 0 otherwise
6948 */
6949
6950int
6951xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6952 int standalone = -1;
6953
6954 SKIP_BLANKS;
6955 if ((RAW == 's') && (NXT(1) == 't') &&
6956 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6957 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6958 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6959 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6960 SKIP(10);
6961 SKIP_BLANKS;
6962 if (RAW != '=') {
6963 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6965 ctxt->sax->error(ctxt->userData,
6966 "XML standalone declaration : expected '='\n");
6967 ctxt->wellFormed = 0;
6968 ctxt->disableSAX = 1;
6969 return(standalone);
6970 }
6971 NEXT;
6972 SKIP_BLANKS;
6973 if (RAW == '\''){
6974 NEXT;
6975 if ((RAW == 'n') && (NXT(1) == 'o')) {
6976 standalone = 0;
6977 SKIP(2);
6978 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6979 (NXT(2) == 's')) {
6980 standalone = 1;
6981 SKIP(3);
6982 } else {
6983 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6985 ctxt->sax->error(ctxt->userData,
6986 "standalone accepts only 'yes' or 'no'\n");
6987 ctxt->wellFormed = 0;
6988 ctxt->disableSAX = 1;
6989 }
6990 if (RAW != '\'') {
6991 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6993 ctxt->sax->error(ctxt->userData, "String not closed\n");
6994 ctxt->wellFormed = 0;
6995 ctxt->disableSAX = 1;
6996 } else
6997 NEXT;
6998 } else if (RAW == '"'){
6999 NEXT;
7000 if ((RAW == 'n') && (NXT(1) == 'o')) {
7001 standalone = 0;
7002 SKIP(2);
7003 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7004 (NXT(2) == 's')) {
7005 standalone = 1;
7006 SKIP(3);
7007 } else {
7008 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7010 ctxt->sax->error(ctxt->userData,
7011 "standalone accepts only 'yes' or 'no'\n");
7012 ctxt->wellFormed = 0;
7013 ctxt->disableSAX = 1;
7014 }
7015 if (RAW != '"') {
7016 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7018 ctxt->sax->error(ctxt->userData, "String not closed\n");
7019 ctxt->wellFormed = 0;
7020 ctxt->disableSAX = 1;
7021 } else
7022 NEXT;
7023 } else {
7024 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7026 ctxt->sax->error(ctxt->userData,
7027 "Standalone value not found\n");
7028 ctxt->wellFormed = 0;
7029 ctxt->disableSAX = 1;
7030 }
7031 }
7032 return(standalone);
7033}
7034
7035/**
7036 * xmlParseXMLDecl:
7037 * @ctxt: an XML parser context
7038 *
7039 * parse an XML declaration header
7040 *
7041 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7042 */
7043
7044void
7045xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7046 xmlChar *version;
7047
7048 /*
7049 * We know that '<?xml' is here.
7050 */
7051 SKIP(5);
7052
7053 if (!IS_BLANK(RAW)) {
7054 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7056 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7057 ctxt->wellFormed = 0;
7058 ctxt->disableSAX = 1;
7059 }
7060 SKIP_BLANKS;
7061
7062 /*
7063 * We should have the VersionInfo here.
7064 */
7065 version = xmlParseVersionInfo(ctxt);
7066 if (version == NULL)
7067 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7068 ctxt->version = xmlStrdup(version);
7069 xmlFree(version);
7070
7071 /*
7072 * We may have the encoding declaration
7073 */
7074 if (!IS_BLANK(RAW)) {
7075 if ((RAW == '?') && (NXT(1) == '>')) {
7076 SKIP(2);
7077 return;
7078 }
7079 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7081 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7082 ctxt->wellFormed = 0;
7083 ctxt->disableSAX = 1;
7084 }
7085 xmlParseEncodingDecl(ctxt);
7086 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7087 /*
7088 * The XML REC instructs us to stop parsing right here
7089 */
7090 return;
7091 }
7092
7093 /*
7094 * We may have the standalone status.
7095 */
7096 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7097 if ((RAW == '?') && (NXT(1) == '>')) {
7098 SKIP(2);
7099 return;
7100 }
7101 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7103 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7104 ctxt->wellFormed = 0;
7105 ctxt->disableSAX = 1;
7106 }
7107 SKIP_BLANKS;
7108 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7109
7110 SKIP_BLANKS;
7111 if ((RAW == '?') && (NXT(1) == '>')) {
7112 SKIP(2);
7113 } else if (RAW == '>') {
7114 /* Deprecated old WD ... */
7115 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7117 ctxt->sax->error(ctxt->userData,
7118 "XML declaration must end-up with '?>'\n");
7119 ctxt->wellFormed = 0;
7120 ctxt->disableSAX = 1;
7121 NEXT;
7122 } else {
7123 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7125 ctxt->sax->error(ctxt->userData,
7126 "parsing XML declaration: '?>' expected\n");
7127 ctxt->wellFormed = 0;
7128 ctxt->disableSAX = 1;
7129 MOVETO_ENDTAG(CUR_PTR);
7130 NEXT;
7131 }
7132}
7133
7134/**
7135 * xmlParseMisc:
7136 * @ctxt: an XML parser context
7137 *
7138 * parse an XML Misc* optionnal field.
7139 *
7140 * [27] Misc ::= Comment | PI | S
7141 */
7142
7143void
7144xmlParseMisc(xmlParserCtxtPtr ctxt) {
7145 while (((RAW == '<') && (NXT(1) == '?')) ||
7146 ((RAW == '<') && (NXT(1) == '!') &&
7147 (NXT(2) == '-') && (NXT(3) == '-')) ||
7148 IS_BLANK(CUR)) {
7149 if ((RAW == '<') && (NXT(1) == '?')) {
7150 xmlParsePI(ctxt);
7151 } else if (IS_BLANK(CUR)) {
7152 NEXT;
7153 } else
7154 xmlParseComment(ctxt);
7155 }
7156}
7157
7158/**
7159 * xmlParseDocument:
7160 * @ctxt: an XML parser context
7161 *
7162 * parse an XML document (and build a tree if using the standard SAX
7163 * interface).
7164 *
7165 * [1] document ::= prolog element Misc*
7166 *
7167 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7168 *
7169 * Returns 0, -1 in case of error. the parser context is augmented
7170 * as a result of the parsing.
7171 */
7172
7173int
7174xmlParseDocument(xmlParserCtxtPtr ctxt) {
7175 xmlChar start[4];
7176 xmlCharEncoding enc;
7177
7178 xmlInitParser();
7179
7180 GROW;
7181
7182 /*
7183 * SAX: beginning of the document processing.
7184 */
7185 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7186 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7187
7188 /*
7189 * Get the 4 first bytes and decode the charset
7190 * if enc != XML_CHAR_ENCODING_NONE
7191 * plug some encoding conversion routines.
7192 */
7193 start[0] = RAW;
7194 start[1] = NXT(1);
7195 start[2] = NXT(2);
7196 start[3] = NXT(3);
7197 enc = xmlDetectCharEncoding(start, 4);
7198 if (enc != XML_CHAR_ENCODING_NONE) {
7199 xmlSwitchEncoding(ctxt, enc);
7200 }
7201
7202
7203 if (CUR == 0) {
7204 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7206 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7207 ctxt->wellFormed = 0;
7208 ctxt->disableSAX = 1;
7209 }
7210
7211 /*
7212 * Check for the XMLDecl in the Prolog.
7213 */
7214 GROW;
7215 if ((RAW == '<') && (NXT(1) == '?') &&
7216 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7217 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7218
7219 /*
7220 * Note that we will switch encoding on the fly.
7221 */
7222 xmlParseXMLDecl(ctxt);
7223 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7224 /*
7225 * The XML REC instructs us to stop parsing right here
7226 */
7227 return(-1);
7228 }
7229 ctxt->standalone = ctxt->input->standalone;
7230 SKIP_BLANKS;
7231 } else {
7232 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7233 }
7234 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7235 ctxt->sax->startDocument(ctxt->userData);
7236
7237 /*
7238 * The Misc part of the Prolog
7239 */
7240 GROW;
7241 xmlParseMisc(ctxt);
7242
7243 /*
7244 * Then possibly doc type declaration(s) and more Misc
7245 * (doctypedecl Misc*)?
7246 */
7247 GROW;
7248 if ((RAW == '<') && (NXT(1) == '!') &&
7249 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7250 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7251 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7252 (NXT(8) == 'E')) {
7253
7254 ctxt->inSubset = 1;
7255 xmlParseDocTypeDecl(ctxt);
7256 if (RAW == '[') {
7257 ctxt->instate = XML_PARSER_DTD;
7258 xmlParseInternalSubset(ctxt);
7259 }
7260
7261 /*
7262 * Create and update the external subset.
7263 */
7264 ctxt->inSubset = 2;
7265 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7266 (!ctxt->disableSAX))
7267 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7268 ctxt->extSubSystem, ctxt->extSubURI);
7269 ctxt->inSubset = 0;
7270
7271
7272 ctxt->instate = XML_PARSER_PROLOG;
7273 xmlParseMisc(ctxt);
7274 }
7275
7276 /*
7277 * Time to start parsing the tree itself
7278 */
7279 GROW;
7280 if (RAW != '<') {
7281 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7282 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7283 ctxt->sax->error(ctxt->userData,
7284 "Start tag expected, '<' not found\n");
7285 ctxt->wellFormed = 0;
7286 ctxt->disableSAX = 1;
7287 ctxt->instate = XML_PARSER_EOF;
7288 } else {
7289 ctxt->instate = XML_PARSER_CONTENT;
7290 xmlParseElement(ctxt);
7291 ctxt->instate = XML_PARSER_EPILOG;
7292
7293
7294 /*
7295 * The Misc part at the end
7296 */
7297 xmlParseMisc(ctxt);
7298
7299 if (RAW != 0) {
7300 ctxt->errNo = XML_ERR_DOCUMENT_END;
7301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7302 ctxt->sax->error(ctxt->userData,
7303 "Extra content at the end of the document\n");
7304 ctxt->wellFormed = 0;
7305 ctxt->disableSAX = 1;
7306 }
7307 ctxt->instate = XML_PARSER_EOF;
7308 }
7309
7310 /*
7311 * SAX: end of the document processing.
7312 */
7313 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7314 (!ctxt->disableSAX))
7315 ctxt->sax->endDocument(ctxt->userData);
7316
7317 if (! ctxt->wellFormed) return(-1);
7318 return(0);
7319}
7320
7321/**
7322 * xmlParseExtParsedEnt:
7323 * @ctxt: an XML parser context
7324 *
7325 * parse a genreral parsed entity
7326 * An external general parsed entity is well-formed if it matches the
7327 * production labeled extParsedEnt.
7328 *
7329 * [78] extParsedEnt ::= TextDecl? content
7330 *
7331 * Returns 0, -1 in case of error. the parser context is augmented
7332 * as a result of the parsing.
7333 */
7334
7335int
7336xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7337 xmlChar start[4];
7338 xmlCharEncoding enc;
7339
7340 xmlDefaultSAXHandlerInit();
7341
7342 GROW;
7343
7344 /*
7345 * SAX: beginning of the document processing.
7346 */
7347 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7348 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7349
7350 /*
7351 * Get the 4 first bytes and decode the charset
7352 * if enc != XML_CHAR_ENCODING_NONE
7353 * plug some encoding conversion routines.
7354 */
7355 start[0] = RAW;
7356 start[1] = NXT(1);
7357 start[2] = NXT(2);
7358 start[3] = NXT(3);
7359 enc = xmlDetectCharEncoding(start, 4);
7360 if (enc != XML_CHAR_ENCODING_NONE) {
7361 xmlSwitchEncoding(ctxt, enc);
7362 }
7363
7364
7365 if (CUR == 0) {
7366 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7367 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7368 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7369 ctxt->wellFormed = 0;
7370 ctxt->disableSAX = 1;
7371 }
7372
7373 /*
7374 * Check for the XMLDecl in the Prolog.
7375 */
7376 GROW;
7377 if ((RAW == '<') && (NXT(1) == '?') &&
7378 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7379 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7380
7381 /*
7382 * Note that we will switch encoding on the fly.
7383 */
7384 xmlParseXMLDecl(ctxt);
7385 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7386 /*
7387 * The XML REC instructs us to stop parsing right here
7388 */
7389 return(-1);
7390 }
7391 SKIP_BLANKS;
7392 } else {
7393 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7394 }
7395 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7396 ctxt->sax->startDocument(ctxt->userData);
7397
7398 /*
7399 * Doing validity checking on chunk doesn't make sense
7400 */
7401 ctxt->instate = XML_PARSER_CONTENT;
7402 ctxt->validate = 0;
7403 ctxt->loadsubset = 0;
7404 ctxt->depth = 0;
7405
7406 xmlParseContent(ctxt);
7407
7408 if ((RAW == '<') && (NXT(1) == '/')) {
7409 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7411 ctxt->sax->error(ctxt->userData,
7412 "chunk is not well balanced\n");
7413 ctxt->wellFormed = 0;
7414 ctxt->disableSAX = 1;
7415 } else if (RAW != 0) {
7416 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7417 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7418 ctxt->sax->error(ctxt->userData,
7419 "extra content at the end of well balanced chunk\n");
7420 ctxt->wellFormed = 0;
7421 ctxt->disableSAX = 1;
7422 }
7423
7424 /*
7425 * SAX: end of the document processing.
7426 */
7427 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7428 (!ctxt->disableSAX))
7429 ctxt->sax->endDocument(ctxt->userData);
7430
7431 if (! ctxt->wellFormed) return(-1);
7432 return(0);
7433}
7434
7435/************************************************************************
7436 * *
7437 * Progressive parsing interfaces *
7438 * *
7439 ************************************************************************/
7440
7441/**
7442 * xmlParseLookupSequence:
7443 * @ctxt: an XML parser context
7444 * @first: the first char to lookup
7445 * @next: the next char to lookup or zero
7446 * @third: the next char to lookup or zero
7447 *
7448 * Try to find if a sequence (first, next, third) or just (first next) or
7449 * (first) is available in the input stream.
7450 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7451 * to avoid rescanning sequences of bytes, it DOES change the state of the
7452 * parser, do not use liberally.
7453 *
7454 * Returns the index to the current parsing point if the full sequence
7455 * is available, -1 otherwise.
7456 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007457static int
Owen Taylor3473f882001-02-23 17:55:21 +00007458xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7459 xmlChar next, xmlChar third) {
7460 int base, len;
7461 xmlParserInputPtr in;
7462 const xmlChar *buf;
7463
7464 in = ctxt->input;
7465 if (in == NULL) return(-1);
7466 base = in->cur - in->base;
7467 if (base < 0) return(-1);
7468 if (ctxt->checkIndex > base)
7469 base = ctxt->checkIndex;
7470 if (in->buf == NULL) {
7471 buf = in->base;
7472 len = in->length;
7473 } else {
7474 buf = in->buf->buffer->content;
7475 len = in->buf->buffer->use;
7476 }
7477 /* take into account the sequence length */
7478 if (third) len -= 2;
7479 else if (next) len --;
7480 for (;base < len;base++) {
7481 if (buf[base] == first) {
7482 if (third != 0) {
7483 if ((buf[base + 1] != next) ||
7484 (buf[base + 2] != third)) continue;
7485 } else if (next != 0) {
7486 if (buf[base + 1] != next) continue;
7487 }
7488 ctxt->checkIndex = 0;
7489#ifdef DEBUG_PUSH
7490 if (next == 0)
7491 xmlGenericError(xmlGenericErrorContext,
7492 "PP: lookup '%c' found at %d\n",
7493 first, base);
7494 else if (third == 0)
7495 xmlGenericError(xmlGenericErrorContext,
7496 "PP: lookup '%c%c' found at %d\n",
7497 first, next, base);
7498 else
7499 xmlGenericError(xmlGenericErrorContext,
7500 "PP: lookup '%c%c%c' found at %d\n",
7501 first, next, third, base);
7502#endif
7503 return(base - (in->cur - in->base));
7504 }
7505 }
7506 ctxt->checkIndex = base;
7507#ifdef DEBUG_PUSH
7508 if (next == 0)
7509 xmlGenericError(xmlGenericErrorContext,
7510 "PP: lookup '%c' failed\n", first);
7511 else if (third == 0)
7512 xmlGenericError(xmlGenericErrorContext,
7513 "PP: lookup '%c%c' failed\n", first, next);
7514 else
7515 xmlGenericError(xmlGenericErrorContext,
7516 "PP: lookup '%c%c%c' failed\n", first, next, third);
7517#endif
7518 return(-1);
7519}
7520
7521/**
7522 * xmlParseTryOrFinish:
7523 * @ctxt: an XML parser context
7524 * @terminate: last chunk indicator
7525 *
7526 * Try to progress on parsing
7527 *
7528 * Returns zero if no parsing was possible
7529 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007530static int
Owen Taylor3473f882001-02-23 17:55:21 +00007531xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7532 int ret = 0;
7533 int avail;
7534 xmlChar cur, next;
7535
7536#ifdef DEBUG_PUSH
7537 switch (ctxt->instate) {
7538 case XML_PARSER_EOF:
7539 xmlGenericError(xmlGenericErrorContext,
7540 "PP: try EOF\n"); break;
7541 case XML_PARSER_START:
7542 xmlGenericError(xmlGenericErrorContext,
7543 "PP: try START\n"); break;
7544 case XML_PARSER_MISC:
7545 xmlGenericError(xmlGenericErrorContext,
7546 "PP: try MISC\n");break;
7547 case XML_PARSER_COMMENT:
7548 xmlGenericError(xmlGenericErrorContext,
7549 "PP: try COMMENT\n");break;
7550 case XML_PARSER_PROLOG:
7551 xmlGenericError(xmlGenericErrorContext,
7552 "PP: try PROLOG\n");break;
7553 case XML_PARSER_START_TAG:
7554 xmlGenericError(xmlGenericErrorContext,
7555 "PP: try START_TAG\n");break;
7556 case XML_PARSER_CONTENT:
7557 xmlGenericError(xmlGenericErrorContext,
7558 "PP: try CONTENT\n");break;
7559 case XML_PARSER_CDATA_SECTION:
7560 xmlGenericError(xmlGenericErrorContext,
7561 "PP: try CDATA_SECTION\n");break;
7562 case XML_PARSER_END_TAG:
7563 xmlGenericError(xmlGenericErrorContext,
7564 "PP: try END_TAG\n");break;
7565 case XML_PARSER_ENTITY_DECL:
7566 xmlGenericError(xmlGenericErrorContext,
7567 "PP: try ENTITY_DECL\n");break;
7568 case XML_PARSER_ENTITY_VALUE:
7569 xmlGenericError(xmlGenericErrorContext,
7570 "PP: try ENTITY_VALUE\n");break;
7571 case XML_PARSER_ATTRIBUTE_VALUE:
7572 xmlGenericError(xmlGenericErrorContext,
7573 "PP: try ATTRIBUTE_VALUE\n");break;
7574 case XML_PARSER_DTD:
7575 xmlGenericError(xmlGenericErrorContext,
7576 "PP: try DTD\n");break;
7577 case XML_PARSER_EPILOG:
7578 xmlGenericError(xmlGenericErrorContext,
7579 "PP: try EPILOG\n");break;
7580 case XML_PARSER_PI:
7581 xmlGenericError(xmlGenericErrorContext,
7582 "PP: try PI\n");break;
7583 case XML_PARSER_IGNORE:
7584 xmlGenericError(xmlGenericErrorContext,
7585 "PP: try IGNORE\n");break;
7586 }
7587#endif
7588
7589 while (1) {
7590 /*
7591 * Pop-up of finished entities.
7592 */
7593 while ((RAW == 0) && (ctxt->inputNr > 1))
7594 xmlPopInput(ctxt);
7595
7596 if (ctxt->input ==NULL) break;
7597 if (ctxt->input->buf == NULL)
7598 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7599 else
7600 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7601 if (avail < 1)
7602 goto done;
7603 switch (ctxt->instate) {
7604 case XML_PARSER_EOF:
7605 /*
7606 * Document parsing is done !
7607 */
7608 goto done;
7609 case XML_PARSER_START:
7610 /*
7611 * Very first chars read from the document flow.
7612 */
Owen Taylor3473f882001-02-23 17:55:21 +00007613 if (avail < 2)
7614 goto done;
7615
7616 cur = ctxt->input->cur[0];
7617 next = ctxt->input->cur[1];
7618 if (cur == 0) {
7619 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7620 ctxt->sax->setDocumentLocator(ctxt->userData,
7621 &xmlDefaultSAXLocator);
7622 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7624 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7625 ctxt->wellFormed = 0;
7626 ctxt->disableSAX = 1;
7627 ctxt->instate = XML_PARSER_EOF;
7628#ifdef DEBUG_PUSH
7629 xmlGenericError(xmlGenericErrorContext,
7630 "PP: entering EOF\n");
7631#endif
7632 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7633 ctxt->sax->endDocument(ctxt->userData);
7634 goto done;
7635 }
7636 if ((cur == '<') && (next == '?')) {
7637 /* PI or XML decl */
7638 if (avail < 5) return(ret);
7639 if ((!terminate) &&
7640 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7641 return(ret);
7642 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7643 ctxt->sax->setDocumentLocator(ctxt->userData,
7644 &xmlDefaultSAXLocator);
7645 if ((ctxt->input->cur[2] == 'x') &&
7646 (ctxt->input->cur[3] == 'm') &&
7647 (ctxt->input->cur[4] == 'l') &&
7648 (IS_BLANK(ctxt->input->cur[5]))) {
7649 ret += 5;
7650#ifdef DEBUG_PUSH
7651 xmlGenericError(xmlGenericErrorContext,
7652 "PP: Parsing XML Decl\n");
7653#endif
7654 xmlParseXMLDecl(ctxt);
7655 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7656 /*
7657 * The XML REC instructs us to stop parsing right
7658 * here
7659 */
7660 ctxt->instate = XML_PARSER_EOF;
7661 return(0);
7662 }
7663 ctxt->standalone = ctxt->input->standalone;
7664 if ((ctxt->encoding == NULL) &&
7665 (ctxt->input->encoding != NULL))
7666 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7667 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7668 (!ctxt->disableSAX))
7669 ctxt->sax->startDocument(ctxt->userData);
7670 ctxt->instate = XML_PARSER_MISC;
7671#ifdef DEBUG_PUSH
7672 xmlGenericError(xmlGenericErrorContext,
7673 "PP: entering MISC\n");
7674#endif
7675 } else {
7676 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7677 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7678 (!ctxt->disableSAX))
7679 ctxt->sax->startDocument(ctxt->userData);
7680 ctxt->instate = XML_PARSER_MISC;
7681#ifdef DEBUG_PUSH
7682 xmlGenericError(xmlGenericErrorContext,
7683 "PP: entering MISC\n");
7684#endif
7685 }
7686 } else {
7687 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7688 ctxt->sax->setDocumentLocator(ctxt->userData,
7689 &xmlDefaultSAXLocator);
7690 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7691 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7692 (!ctxt->disableSAX))
7693 ctxt->sax->startDocument(ctxt->userData);
7694 ctxt->instate = XML_PARSER_MISC;
7695#ifdef DEBUG_PUSH
7696 xmlGenericError(xmlGenericErrorContext,
7697 "PP: entering MISC\n");
7698#endif
7699 }
7700 break;
7701 case XML_PARSER_MISC:
7702 SKIP_BLANKS;
7703 if (ctxt->input->buf == NULL)
7704 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7705 else
7706 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7707 if (avail < 2)
7708 goto done;
7709 cur = ctxt->input->cur[0];
7710 next = ctxt->input->cur[1];
7711 if ((cur == '<') && (next == '?')) {
7712 if ((!terminate) &&
7713 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7714 goto done;
7715#ifdef DEBUG_PUSH
7716 xmlGenericError(xmlGenericErrorContext,
7717 "PP: Parsing PI\n");
7718#endif
7719 xmlParsePI(ctxt);
7720 } else if ((cur == '<') && (next == '!') &&
7721 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7722 if ((!terminate) &&
7723 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7724 goto done;
7725#ifdef DEBUG_PUSH
7726 xmlGenericError(xmlGenericErrorContext,
7727 "PP: Parsing Comment\n");
7728#endif
7729 xmlParseComment(ctxt);
7730 ctxt->instate = XML_PARSER_MISC;
7731 } else if ((cur == '<') && (next == '!') &&
7732 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7733 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7734 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7735 (ctxt->input->cur[8] == 'E')) {
7736 if ((!terminate) &&
7737 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7738 goto done;
7739#ifdef DEBUG_PUSH
7740 xmlGenericError(xmlGenericErrorContext,
7741 "PP: Parsing internal subset\n");
7742#endif
7743 ctxt->inSubset = 1;
7744 xmlParseDocTypeDecl(ctxt);
7745 if (RAW == '[') {
7746 ctxt->instate = XML_PARSER_DTD;
7747#ifdef DEBUG_PUSH
7748 xmlGenericError(xmlGenericErrorContext,
7749 "PP: entering DTD\n");
7750#endif
7751 } else {
7752 /*
7753 * Create and update the external subset.
7754 */
7755 ctxt->inSubset = 2;
7756 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7757 (ctxt->sax->externalSubset != NULL))
7758 ctxt->sax->externalSubset(ctxt->userData,
7759 ctxt->intSubName, ctxt->extSubSystem,
7760 ctxt->extSubURI);
7761 ctxt->inSubset = 0;
7762 ctxt->instate = XML_PARSER_PROLOG;
7763#ifdef DEBUG_PUSH
7764 xmlGenericError(xmlGenericErrorContext,
7765 "PP: entering PROLOG\n");
7766#endif
7767 }
7768 } else if ((cur == '<') && (next == '!') &&
7769 (avail < 9)) {
7770 goto done;
7771 } else {
7772 ctxt->instate = XML_PARSER_START_TAG;
7773#ifdef DEBUG_PUSH
7774 xmlGenericError(xmlGenericErrorContext,
7775 "PP: entering START_TAG\n");
7776#endif
7777 }
7778 break;
7779 case XML_PARSER_IGNORE:
7780 xmlGenericError(xmlGenericErrorContext,
7781 "PP: internal error, state == IGNORE");
7782 ctxt->instate = XML_PARSER_DTD;
7783#ifdef DEBUG_PUSH
7784 xmlGenericError(xmlGenericErrorContext,
7785 "PP: entering DTD\n");
7786#endif
7787 break;
7788 case XML_PARSER_PROLOG:
7789 SKIP_BLANKS;
7790 if (ctxt->input->buf == NULL)
7791 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7792 else
7793 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7794 if (avail < 2)
7795 goto done;
7796 cur = ctxt->input->cur[0];
7797 next = ctxt->input->cur[1];
7798 if ((cur == '<') && (next == '?')) {
7799 if ((!terminate) &&
7800 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7801 goto done;
7802#ifdef DEBUG_PUSH
7803 xmlGenericError(xmlGenericErrorContext,
7804 "PP: Parsing PI\n");
7805#endif
7806 xmlParsePI(ctxt);
7807 } else if ((cur == '<') && (next == '!') &&
7808 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7809 if ((!terminate) &&
7810 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7811 goto done;
7812#ifdef DEBUG_PUSH
7813 xmlGenericError(xmlGenericErrorContext,
7814 "PP: Parsing Comment\n");
7815#endif
7816 xmlParseComment(ctxt);
7817 ctxt->instate = XML_PARSER_PROLOG;
7818 } else if ((cur == '<') && (next == '!') &&
7819 (avail < 4)) {
7820 goto done;
7821 } else {
7822 ctxt->instate = XML_PARSER_START_TAG;
7823#ifdef DEBUG_PUSH
7824 xmlGenericError(xmlGenericErrorContext,
7825 "PP: entering START_TAG\n");
7826#endif
7827 }
7828 break;
7829 case XML_PARSER_EPILOG:
7830 SKIP_BLANKS;
7831 if (ctxt->input->buf == NULL)
7832 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7833 else
7834 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7835 if (avail < 2)
7836 goto done;
7837 cur = ctxt->input->cur[0];
7838 next = ctxt->input->cur[1];
7839 if ((cur == '<') && (next == '?')) {
7840 if ((!terminate) &&
7841 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7842 goto done;
7843#ifdef DEBUG_PUSH
7844 xmlGenericError(xmlGenericErrorContext,
7845 "PP: Parsing PI\n");
7846#endif
7847 xmlParsePI(ctxt);
7848 ctxt->instate = XML_PARSER_EPILOG;
7849 } else if ((cur == '<') && (next == '!') &&
7850 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7851 if ((!terminate) &&
7852 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7853 goto done;
7854#ifdef DEBUG_PUSH
7855 xmlGenericError(xmlGenericErrorContext,
7856 "PP: Parsing Comment\n");
7857#endif
7858 xmlParseComment(ctxt);
7859 ctxt->instate = XML_PARSER_EPILOG;
7860 } else if ((cur == '<') && (next == '!') &&
7861 (avail < 4)) {
7862 goto done;
7863 } else {
7864 ctxt->errNo = XML_ERR_DOCUMENT_END;
7865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7866 ctxt->sax->error(ctxt->userData,
7867 "Extra content at the end of the document\n");
7868 ctxt->wellFormed = 0;
7869 ctxt->disableSAX = 1;
7870 ctxt->instate = XML_PARSER_EOF;
7871#ifdef DEBUG_PUSH
7872 xmlGenericError(xmlGenericErrorContext,
7873 "PP: entering EOF\n");
7874#endif
7875 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7876 (!ctxt->disableSAX))
7877 ctxt->sax->endDocument(ctxt->userData);
7878 goto done;
7879 }
7880 break;
7881 case XML_PARSER_START_TAG: {
7882 xmlChar *name, *oldname;
7883
7884 if ((avail < 2) && (ctxt->inputNr == 1))
7885 goto done;
7886 cur = ctxt->input->cur[0];
7887 if (cur != '<') {
7888 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7890 ctxt->sax->error(ctxt->userData,
7891 "Start tag expect, '<' not found\n");
7892 ctxt->wellFormed = 0;
7893 ctxt->disableSAX = 1;
7894 ctxt->instate = XML_PARSER_EOF;
7895#ifdef DEBUG_PUSH
7896 xmlGenericError(xmlGenericErrorContext,
7897 "PP: entering EOF\n");
7898#endif
7899 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7900 (!ctxt->disableSAX))
7901 ctxt->sax->endDocument(ctxt->userData);
7902 goto done;
7903 }
7904 if ((!terminate) &&
7905 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7906 goto done;
7907 if (ctxt->spaceNr == 0)
7908 spacePush(ctxt, -1);
7909 else
7910 spacePush(ctxt, *ctxt->space);
7911 name = xmlParseStartTag(ctxt);
7912 if (name == NULL) {
7913 spacePop(ctxt);
7914 ctxt->instate = XML_PARSER_EOF;
7915#ifdef DEBUG_PUSH
7916 xmlGenericError(xmlGenericErrorContext,
7917 "PP: entering EOF\n");
7918#endif
7919 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7920 (!ctxt->disableSAX))
7921 ctxt->sax->endDocument(ctxt->userData);
7922 goto done;
7923 }
7924 namePush(ctxt, xmlStrdup(name));
7925
7926 /*
7927 * [ VC: Root Element Type ]
7928 * The Name in the document type declaration must match
7929 * the element type of the root element.
7930 */
7931 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7932 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7933 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7934
7935 /*
7936 * Check for an Empty Element.
7937 */
7938 if ((RAW == '/') && (NXT(1) == '>')) {
7939 SKIP(2);
7940 if ((ctxt->sax != NULL) &&
7941 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7942 ctxt->sax->endElement(ctxt->userData, name);
7943 xmlFree(name);
7944 oldname = namePop(ctxt);
7945 spacePop(ctxt);
7946 if (oldname != NULL) {
7947#ifdef DEBUG_STACK
7948 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7949#endif
7950 xmlFree(oldname);
7951 }
7952 if (ctxt->name == NULL) {
7953 ctxt->instate = XML_PARSER_EPILOG;
7954#ifdef DEBUG_PUSH
7955 xmlGenericError(xmlGenericErrorContext,
7956 "PP: entering EPILOG\n");
7957#endif
7958 } else {
7959 ctxt->instate = XML_PARSER_CONTENT;
7960#ifdef DEBUG_PUSH
7961 xmlGenericError(xmlGenericErrorContext,
7962 "PP: entering CONTENT\n");
7963#endif
7964 }
7965 break;
7966 }
7967 if (RAW == '>') {
7968 NEXT;
7969 } else {
7970 ctxt->errNo = XML_ERR_GT_REQUIRED;
7971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7972 ctxt->sax->error(ctxt->userData,
7973 "Couldn't find end of Start Tag %s\n",
7974 name);
7975 ctxt->wellFormed = 0;
7976 ctxt->disableSAX = 1;
7977
7978 /*
7979 * end of parsing of this node.
7980 */
7981 nodePop(ctxt);
7982 oldname = namePop(ctxt);
7983 spacePop(ctxt);
7984 if (oldname != NULL) {
7985#ifdef DEBUG_STACK
7986 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7987#endif
7988 xmlFree(oldname);
7989 }
7990 }
7991 xmlFree(name);
7992 ctxt->instate = XML_PARSER_CONTENT;
7993#ifdef DEBUG_PUSH
7994 xmlGenericError(xmlGenericErrorContext,
7995 "PP: entering CONTENT\n");
7996#endif
7997 break;
7998 }
7999 case XML_PARSER_CONTENT: {
8000 const xmlChar *test;
8001 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008002 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008003
8004 /*
8005 * Handle preparsed entities and charRef
8006 */
8007 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008008 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008009
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008010 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008011 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8012 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008013 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008014 ctxt->token = 0;
8015 }
8016 if ((avail < 2) && (ctxt->inputNr == 1))
8017 goto done;
8018 cur = ctxt->input->cur[0];
8019 next = ctxt->input->cur[1];
8020
8021 test = CUR_PTR;
8022 cons = ctxt->input->consumed;
8023 tok = ctxt->token;
8024 if ((cur == '<') && (next == '?')) {
8025 if ((!terminate) &&
8026 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8027 goto done;
8028#ifdef DEBUG_PUSH
8029 xmlGenericError(xmlGenericErrorContext,
8030 "PP: Parsing PI\n");
8031#endif
8032 xmlParsePI(ctxt);
8033 } else if ((cur == '<') && (next == '!') &&
8034 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8035 if ((!terminate) &&
8036 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8037 goto done;
8038#ifdef DEBUG_PUSH
8039 xmlGenericError(xmlGenericErrorContext,
8040 "PP: Parsing Comment\n");
8041#endif
8042 xmlParseComment(ctxt);
8043 ctxt->instate = XML_PARSER_CONTENT;
8044 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8045 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8046 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8047 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8048 (ctxt->input->cur[8] == '[')) {
8049 SKIP(9);
8050 ctxt->instate = XML_PARSER_CDATA_SECTION;
8051#ifdef DEBUG_PUSH
8052 xmlGenericError(xmlGenericErrorContext,
8053 "PP: entering CDATA_SECTION\n");
8054#endif
8055 break;
8056 } else if ((cur == '<') && (next == '!') &&
8057 (avail < 9)) {
8058 goto done;
8059 } else if ((cur == '<') && (next == '/')) {
8060 ctxt->instate = XML_PARSER_END_TAG;
8061#ifdef DEBUG_PUSH
8062 xmlGenericError(xmlGenericErrorContext,
8063 "PP: entering END_TAG\n");
8064#endif
8065 break;
8066 } else if (cur == '<') {
8067 ctxt->instate = XML_PARSER_START_TAG;
8068#ifdef DEBUG_PUSH
8069 xmlGenericError(xmlGenericErrorContext,
8070 "PP: entering START_TAG\n");
8071#endif
8072 break;
8073 } else if (cur == '&') {
8074 if ((!terminate) &&
8075 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8076 goto done;
8077#ifdef DEBUG_PUSH
8078 xmlGenericError(xmlGenericErrorContext,
8079 "PP: Parsing Reference\n");
8080#endif
8081 xmlParseReference(ctxt);
8082 } else {
8083 /* TODO Avoid the extra copy, handle directly !!! */
8084 /*
8085 * Goal of the following test is:
8086 * - minimize calls to the SAX 'character' callback
8087 * when they are mergeable
8088 * - handle an problem for isBlank when we only parse
8089 * a sequence of blank chars and the next one is
8090 * not available to check against '<' presence.
8091 * - tries to homogenize the differences in SAX
8092 * callbacks beween the push and pull versions
8093 * of the parser.
8094 */
8095 if ((ctxt->inputNr == 1) &&
8096 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8097 if ((!terminate) &&
8098 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8099 goto done;
8100 }
8101 ctxt->checkIndex = 0;
8102#ifdef DEBUG_PUSH
8103 xmlGenericError(xmlGenericErrorContext,
8104 "PP: Parsing char data\n");
8105#endif
8106 xmlParseCharData(ctxt, 0);
8107 }
8108 /*
8109 * Pop-up of finished entities.
8110 */
8111 while ((RAW == 0) && (ctxt->inputNr > 1))
8112 xmlPopInput(ctxt);
8113 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8114 (tok == ctxt->token)) {
8115 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8117 ctxt->sax->error(ctxt->userData,
8118 "detected an error in element content\n");
8119 ctxt->wellFormed = 0;
8120 ctxt->disableSAX = 1;
8121 ctxt->instate = XML_PARSER_EOF;
8122 break;
8123 }
8124 break;
8125 }
8126 case XML_PARSER_CDATA_SECTION: {
8127 /*
8128 * The Push mode need to have the SAX callback for
8129 * cdataBlock merge back contiguous callbacks.
8130 */
8131 int base;
8132
8133 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8134 if (base < 0) {
8135 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8136 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8137 if (ctxt->sax->cdataBlock != NULL)
8138 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8139 XML_PARSER_BIG_BUFFER_SIZE);
8140 }
8141 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8142 ctxt->checkIndex = 0;
8143 }
8144 goto done;
8145 } else {
8146 if ((ctxt->sax != NULL) && (base > 0) &&
8147 (!ctxt->disableSAX)) {
8148 if (ctxt->sax->cdataBlock != NULL)
8149 ctxt->sax->cdataBlock(ctxt->userData,
8150 ctxt->input->cur, base);
8151 }
8152 SKIP(base + 3);
8153 ctxt->checkIndex = 0;
8154 ctxt->instate = XML_PARSER_CONTENT;
8155#ifdef DEBUG_PUSH
8156 xmlGenericError(xmlGenericErrorContext,
8157 "PP: entering CONTENT\n");
8158#endif
8159 }
8160 break;
8161 }
8162 case XML_PARSER_END_TAG:
8163 if (avail < 2)
8164 goto done;
8165 if ((!terminate) &&
8166 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8167 goto done;
8168 xmlParseEndTag(ctxt);
8169 if (ctxt->name == NULL) {
8170 ctxt->instate = XML_PARSER_EPILOG;
8171#ifdef DEBUG_PUSH
8172 xmlGenericError(xmlGenericErrorContext,
8173 "PP: entering EPILOG\n");
8174#endif
8175 } else {
8176 ctxt->instate = XML_PARSER_CONTENT;
8177#ifdef DEBUG_PUSH
8178 xmlGenericError(xmlGenericErrorContext,
8179 "PP: entering CONTENT\n");
8180#endif
8181 }
8182 break;
8183 case XML_PARSER_DTD: {
8184 /*
8185 * Sorry but progressive parsing of the internal subset
8186 * is not expected to be supported. We first check that
8187 * the full content of the internal subset is available and
8188 * the parsing is launched only at that point.
8189 * Internal subset ends up with "']' S? '>'" in an unescaped
8190 * section and not in a ']]>' sequence which are conditional
8191 * sections (whoever argued to keep that crap in XML deserve
8192 * a place in hell !).
8193 */
8194 int base, i;
8195 xmlChar *buf;
8196 xmlChar quote = 0;
8197
8198 base = ctxt->input->cur - ctxt->input->base;
8199 if (base < 0) return(0);
8200 if (ctxt->checkIndex > base)
8201 base = ctxt->checkIndex;
8202 buf = ctxt->input->buf->buffer->content;
8203 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8204 base++) {
8205 if (quote != 0) {
8206 if (buf[base] == quote)
8207 quote = 0;
8208 continue;
8209 }
8210 if (buf[base] == '"') {
8211 quote = '"';
8212 continue;
8213 }
8214 if (buf[base] == '\'') {
8215 quote = '\'';
8216 continue;
8217 }
8218 if (buf[base] == ']') {
8219 if ((unsigned int) base +1 >=
8220 ctxt->input->buf->buffer->use)
8221 break;
8222 if (buf[base + 1] == ']') {
8223 /* conditional crap, skip both ']' ! */
8224 base++;
8225 continue;
8226 }
8227 for (i = 0;
8228 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8229 i++) {
8230 if (buf[base + i] == '>')
8231 goto found_end_int_subset;
8232 }
8233 break;
8234 }
8235 }
8236 /*
8237 * We didn't found the end of the Internal subset
8238 */
8239 if (quote == 0)
8240 ctxt->checkIndex = base;
8241#ifdef DEBUG_PUSH
8242 if (next == 0)
8243 xmlGenericError(xmlGenericErrorContext,
8244 "PP: lookup of int subset end filed\n");
8245#endif
8246 goto done;
8247
8248found_end_int_subset:
8249 xmlParseInternalSubset(ctxt);
8250 ctxt->inSubset = 2;
8251 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8252 (ctxt->sax->externalSubset != NULL))
8253 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8254 ctxt->extSubSystem, ctxt->extSubURI);
8255 ctxt->inSubset = 0;
8256 ctxt->instate = XML_PARSER_PROLOG;
8257 ctxt->checkIndex = 0;
8258#ifdef DEBUG_PUSH
8259 xmlGenericError(xmlGenericErrorContext,
8260 "PP: entering PROLOG\n");
8261#endif
8262 break;
8263 }
8264 case XML_PARSER_COMMENT:
8265 xmlGenericError(xmlGenericErrorContext,
8266 "PP: internal error, state == COMMENT\n");
8267 ctxt->instate = XML_PARSER_CONTENT;
8268#ifdef DEBUG_PUSH
8269 xmlGenericError(xmlGenericErrorContext,
8270 "PP: entering CONTENT\n");
8271#endif
8272 break;
8273 case XML_PARSER_PI:
8274 xmlGenericError(xmlGenericErrorContext,
8275 "PP: internal error, state == PI\n");
8276 ctxt->instate = XML_PARSER_CONTENT;
8277#ifdef DEBUG_PUSH
8278 xmlGenericError(xmlGenericErrorContext,
8279 "PP: entering CONTENT\n");
8280#endif
8281 break;
8282 case XML_PARSER_ENTITY_DECL:
8283 xmlGenericError(xmlGenericErrorContext,
8284 "PP: internal error, state == ENTITY_DECL\n");
8285 ctxt->instate = XML_PARSER_DTD;
8286#ifdef DEBUG_PUSH
8287 xmlGenericError(xmlGenericErrorContext,
8288 "PP: entering DTD\n");
8289#endif
8290 break;
8291 case XML_PARSER_ENTITY_VALUE:
8292 xmlGenericError(xmlGenericErrorContext,
8293 "PP: internal error, state == ENTITY_VALUE\n");
8294 ctxt->instate = XML_PARSER_CONTENT;
8295#ifdef DEBUG_PUSH
8296 xmlGenericError(xmlGenericErrorContext,
8297 "PP: entering DTD\n");
8298#endif
8299 break;
8300 case XML_PARSER_ATTRIBUTE_VALUE:
8301 xmlGenericError(xmlGenericErrorContext,
8302 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8303 ctxt->instate = XML_PARSER_START_TAG;
8304#ifdef DEBUG_PUSH
8305 xmlGenericError(xmlGenericErrorContext,
8306 "PP: entering START_TAG\n");
8307#endif
8308 break;
8309 case XML_PARSER_SYSTEM_LITERAL:
8310 xmlGenericError(xmlGenericErrorContext,
8311 "PP: internal error, state == SYSTEM_LITERAL\n");
8312 ctxt->instate = XML_PARSER_START_TAG;
8313#ifdef DEBUG_PUSH
8314 xmlGenericError(xmlGenericErrorContext,
8315 "PP: entering START_TAG\n");
8316#endif
8317 break;
8318 }
8319 }
8320done:
8321#ifdef DEBUG_PUSH
8322 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8323#endif
8324 return(ret);
8325}
8326
8327/**
Owen Taylor3473f882001-02-23 17:55:21 +00008328 * xmlParseChunk:
8329 * @ctxt: an XML parser context
8330 * @chunk: an char array
8331 * @size: the size in byte of the chunk
8332 * @terminate: last chunk indicator
8333 *
8334 * Parse a Chunk of memory
8335 *
8336 * Returns zero if no error, the xmlParserErrors otherwise.
8337 */
8338int
8339xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8340 int terminate) {
8341 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8342 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8343 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8344 int cur = ctxt->input->cur - ctxt->input->base;
8345
8346 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8347 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8348 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008349 ctxt->input->end =
8350 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008351#ifdef DEBUG_PUSH
8352 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8353#endif
8354
8355 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8356 xmlParseTryOrFinish(ctxt, terminate);
8357 } else if (ctxt->instate != XML_PARSER_EOF) {
8358 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8359 xmlParserInputBufferPtr in = ctxt->input->buf;
8360 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8361 (in->raw != NULL)) {
8362 int nbchars;
8363
8364 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8365 if (nbchars < 0) {
8366 xmlGenericError(xmlGenericErrorContext,
8367 "xmlParseChunk: encoder error\n");
8368 return(XML_ERR_INVALID_ENCODING);
8369 }
8370 }
8371 }
8372 }
8373 xmlParseTryOrFinish(ctxt, terminate);
8374 if (terminate) {
8375 /*
8376 * Check for termination
8377 */
8378 if ((ctxt->instate != XML_PARSER_EOF) &&
8379 (ctxt->instate != XML_PARSER_EPILOG)) {
8380 ctxt->errNo = XML_ERR_DOCUMENT_END;
8381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8382 ctxt->sax->error(ctxt->userData,
8383 "Extra content at the end of the document\n");
8384 ctxt->wellFormed = 0;
8385 ctxt->disableSAX = 1;
8386 }
8387 if (ctxt->instate != XML_PARSER_EOF) {
8388 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8389 (!ctxt->disableSAX))
8390 ctxt->sax->endDocument(ctxt->userData);
8391 }
8392 ctxt->instate = XML_PARSER_EOF;
8393 }
8394 return((xmlParserErrors) ctxt->errNo);
8395}
8396
8397/************************************************************************
8398 * *
8399 * I/O front end functions to the parser *
8400 * *
8401 ************************************************************************/
8402
8403/**
8404 * xmlStopParser:
8405 * @ctxt: an XML parser context
8406 *
8407 * Blocks further parser processing
8408 */
8409void
8410xmlStopParser(xmlParserCtxtPtr ctxt) {
8411 ctxt->instate = XML_PARSER_EOF;
8412 if (ctxt->input != NULL)
8413 ctxt->input->cur = BAD_CAST"";
8414}
8415
8416/**
8417 * xmlCreatePushParserCtxt:
8418 * @sax: a SAX handler
8419 * @user_data: The user data returned on SAX callbacks
8420 * @chunk: a pointer to an array of chars
8421 * @size: number of chars in the array
8422 * @filename: an optional file name or URI
8423 *
8424 * Create a parser context for using the XML parser in push mode
8425 * To allow content encoding detection, @size should be >= 4
8426 * The value of @filename is used for fetching external entities
8427 * and error/warning reports.
8428 *
8429 * Returns the new parser context or NULL
8430 */
8431xmlParserCtxtPtr
8432xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8433 const char *chunk, int size, const char *filename) {
8434 xmlParserCtxtPtr ctxt;
8435 xmlParserInputPtr inputStream;
8436 xmlParserInputBufferPtr buf;
8437 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8438
8439 /*
8440 * plug some encoding conversion routines
8441 */
8442 if ((chunk != NULL) && (size >= 4))
8443 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8444
8445 buf = xmlAllocParserInputBuffer(enc);
8446 if (buf == NULL) return(NULL);
8447
8448 ctxt = xmlNewParserCtxt();
8449 if (ctxt == NULL) {
8450 xmlFree(buf);
8451 return(NULL);
8452 }
8453 if (sax != NULL) {
8454 if (ctxt->sax != &xmlDefaultSAXHandler)
8455 xmlFree(ctxt->sax);
8456 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8457 if (ctxt->sax == NULL) {
8458 xmlFree(buf);
8459 xmlFree(ctxt);
8460 return(NULL);
8461 }
8462 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8463 if (user_data != NULL)
8464 ctxt->userData = user_data;
8465 }
8466 if (filename == NULL) {
8467 ctxt->directory = NULL;
8468 } else {
8469 ctxt->directory = xmlParserGetDirectory(filename);
8470 }
8471
8472 inputStream = xmlNewInputStream(ctxt);
8473 if (inputStream == NULL) {
8474 xmlFreeParserCtxt(ctxt);
8475 return(NULL);
8476 }
8477
8478 if (filename == NULL)
8479 inputStream->filename = NULL;
8480 else
8481 inputStream->filename = xmlMemStrdup(filename);
8482 inputStream->buf = buf;
8483 inputStream->base = inputStream->buf->buffer->content;
8484 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008485 inputStream->end =
8486 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008487 if (enc != XML_CHAR_ENCODING_NONE) {
8488 xmlSwitchEncoding(ctxt, enc);
8489 }
8490
8491 inputPush(ctxt, inputStream);
8492
8493 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8494 (ctxt->input->buf != NULL)) {
8495 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8496#ifdef DEBUG_PUSH
8497 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8498#endif
8499 }
8500
8501 return(ctxt);
8502}
8503
8504/**
8505 * xmlCreateIOParserCtxt:
8506 * @sax: a SAX handler
8507 * @user_data: The user data returned on SAX callbacks
8508 * @ioread: an I/O read function
8509 * @ioclose: an I/O close function
8510 * @ioctx: an I/O handler
8511 * @enc: the charset encoding if known
8512 *
8513 * Create a parser context for using the XML parser with an existing
8514 * I/O stream
8515 *
8516 * Returns the new parser context or NULL
8517 */
8518xmlParserCtxtPtr
8519xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8520 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8521 void *ioctx, xmlCharEncoding enc) {
8522 xmlParserCtxtPtr ctxt;
8523 xmlParserInputPtr inputStream;
8524 xmlParserInputBufferPtr buf;
8525
8526 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8527 if (buf == NULL) return(NULL);
8528
8529 ctxt = xmlNewParserCtxt();
8530 if (ctxt == NULL) {
8531 xmlFree(buf);
8532 return(NULL);
8533 }
8534 if (sax != NULL) {
8535 if (ctxt->sax != &xmlDefaultSAXHandler)
8536 xmlFree(ctxt->sax);
8537 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8538 if (ctxt->sax == NULL) {
8539 xmlFree(buf);
8540 xmlFree(ctxt);
8541 return(NULL);
8542 }
8543 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8544 if (user_data != NULL)
8545 ctxt->userData = user_data;
8546 }
8547
8548 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8549 if (inputStream == NULL) {
8550 xmlFreeParserCtxt(ctxt);
8551 return(NULL);
8552 }
8553 inputPush(ctxt, inputStream);
8554
8555 return(ctxt);
8556}
8557
8558/************************************************************************
8559 * *
8560 * Front ends when parsing a Dtd *
8561 * *
8562 ************************************************************************/
8563
8564/**
8565 * xmlIOParseDTD:
8566 * @sax: the SAX handler block or NULL
8567 * @input: an Input Buffer
8568 * @enc: the charset encoding if known
8569 *
8570 * Load and parse a DTD
8571 *
8572 * Returns the resulting xmlDtdPtr or NULL in case of error.
8573 * @input will be freed at parsing end.
8574 */
8575
8576xmlDtdPtr
8577xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8578 xmlCharEncoding enc) {
8579 xmlDtdPtr ret = NULL;
8580 xmlParserCtxtPtr ctxt;
8581 xmlParserInputPtr pinput = NULL;
8582
8583 if (input == NULL)
8584 return(NULL);
8585
8586 ctxt = xmlNewParserCtxt();
8587 if (ctxt == NULL) {
8588 return(NULL);
8589 }
8590
8591 /*
8592 * Set-up the SAX context
8593 */
8594 if (sax != NULL) {
8595 if (ctxt->sax != NULL)
8596 xmlFree(ctxt->sax);
8597 ctxt->sax = sax;
8598 ctxt->userData = NULL;
8599 }
8600
8601 /*
8602 * generate a parser input from the I/O handler
8603 */
8604
8605 pinput = xmlNewIOInputStream(ctxt, input, enc);
8606 if (pinput == NULL) {
8607 if (sax != NULL) ctxt->sax = NULL;
8608 xmlFreeParserCtxt(ctxt);
8609 return(NULL);
8610 }
8611
8612 /*
8613 * plug some encoding conversion routines here.
8614 */
8615 xmlPushInput(ctxt, pinput);
8616
8617 pinput->filename = NULL;
8618 pinput->line = 1;
8619 pinput->col = 1;
8620 pinput->base = ctxt->input->cur;
8621 pinput->cur = ctxt->input->cur;
8622 pinput->free = NULL;
8623
8624 /*
8625 * let's parse that entity knowing it's an external subset.
8626 */
8627 ctxt->inSubset = 2;
8628 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8629 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8630 BAD_CAST "none", BAD_CAST "none");
8631 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8632
8633 if (ctxt->myDoc != NULL) {
8634 if (ctxt->wellFormed) {
8635 ret = ctxt->myDoc->extSubset;
8636 ctxt->myDoc->extSubset = NULL;
8637 } else {
8638 ret = NULL;
8639 }
8640 xmlFreeDoc(ctxt->myDoc);
8641 ctxt->myDoc = NULL;
8642 }
8643 if (sax != NULL) ctxt->sax = NULL;
8644 xmlFreeParserCtxt(ctxt);
8645
8646 return(ret);
8647}
8648
8649/**
8650 * xmlSAXParseDTD:
8651 * @sax: the SAX handler block
8652 * @ExternalID: a NAME* containing the External ID of the DTD
8653 * @SystemID: a NAME* containing the URL to the DTD
8654 *
8655 * Load and parse an external subset.
8656 *
8657 * Returns the resulting xmlDtdPtr or NULL in case of error.
8658 */
8659
8660xmlDtdPtr
8661xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8662 const xmlChar *SystemID) {
8663 xmlDtdPtr ret = NULL;
8664 xmlParserCtxtPtr ctxt;
8665 xmlParserInputPtr input = NULL;
8666 xmlCharEncoding enc;
8667
8668 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8669
8670 ctxt = xmlNewParserCtxt();
8671 if (ctxt == NULL) {
8672 return(NULL);
8673 }
8674
8675 /*
8676 * Set-up the SAX context
8677 */
8678 if (sax != NULL) {
8679 if (ctxt->sax != NULL)
8680 xmlFree(ctxt->sax);
8681 ctxt->sax = sax;
8682 ctxt->userData = NULL;
8683 }
8684
8685 /*
8686 * Ask the Entity resolver to load the damn thing
8687 */
8688
8689 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8690 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8691 if (input == NULL) {
8692 if (sax != NULL) ctxt->sax = NULL;
8693 xmlFreeParserCtxt(ctxt);
8694 return(NULL);
8695 }
8696
8697 /*
8698 * plug some encoding conversion routines here.
8699 */
8700 xmlPushInput(ctxt, input);
8701 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8702 xmlSwitchEncoding(ctxt, enc);
8703
8704 if (input->filename == NULL)
8705 input->filename = (char *) xmlStrdup(SystemID);
8706 input->line = 1;
8707 input->col = 1;
8708 input->base = ctxt->input->cur;
8709 input->cur = ctxt->input->cur;
8710 input->free = NULL;
8711
8712 /*
8713 * let's parse that entity knowing it's an external subset.
8714 */
8715 ctxt->inSubset = 2;
8716 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8717 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8718 ExternalID, SystemID);
8719 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8720
8721 if (ctxt->myDoc != NULL) {
8722 if (ctxt->wellFormed) {
8723 ret = ctxt->myDoc->extSubset;
8724 ctxt->myDoc->extSubset = NULL;
8725 } else {
8726 ret = NULL;
8727 }
8728 xmlFreeDoc(ctxt->myDoc);
8729 ctxt->myDoc = NULL;
8730 }
8731 if (sax != NULL) ctxt->sax = NULL;
8732 xmlFreeParserCtxt(ctxt);
8733
8734 return(ret);
8735}
8736
8737/**
8738 * xmlParseDTD:
8739 * @ExternalID: a NAME* containing the External ID of the DTD
8740 * @SystemID: a NAME* containing the URL to the DTD
8741 *
8742 * Load and parse an external subset.
8743 *
8744 * Returns the resulting xmlDtdPtr or NULL in case of error.
8745 */
8746
8747xmlDtdPtr
8748xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8749 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8750}
8751
8752/************************************************************************
8753 * *
8754 * Front ends when parsing an Entity *
8755 * *
8756 ************************************************************************/
8757
8758/**
Owen Taylor3473f882001-02-23 17:55:21 +00008759 * xmlParseCtxtExternalEntity:
8760 * @ctx: the existing parsing context
8761 * @URL: the URL for the entity to load
8762 * @ID: the System ID for the entity to load
8763 * @list: the return value for the set of parsed nodes
8764 *
8765 * Parse an external general entity within an existing parsing context
8766 * An external general parsed entity is well-formed if it matches the
8767 * production labeled extParsedEnt.
8768 *
8769 * [78] extParsedEnt ::= TextDecl? content
8770 *
8771 * Returns 0 if the entity is well formed, -1 in case of args problem and
8772 * the parser error code otherwise
8773 */
8774
8775int
8776xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8777 const xmlChar *ID, xmlNodePtr *list) {
8778 xmlParserCtxtPtr ctxt;
8779 xmlDocPtr newDoc;
8780 xmlSAXHandlerPtr oldsax = NULL;
8781 int ret = 0;
8782
8783 if (ctx->depth > 40) {
8784 return(XML_ERR_ENTITY_LOOP);
8785 }
8786
8787 if (list != NULL)
8788 *list = NULL;
8789 if ((URL == NULL) && (ID == NULL))
8790 return(-1);
8791 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8792 return(-1);
8793
8794
8795 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8796 if (ctxt == NULL) return(-1);
8797 ctxt->userData = ctxt;
8798 oldsax = ctxt->sax;
8799 ctxt->sax = ctx->sax;
8800 newDoc = xmlNewDoc(BAD_CAST "1.0");
8801 if (newDoc == NULL) {
8802 xmlFreeParserCtxt(ctxt);
8803 return(-1);
8804 }
8805 if (ctx->myDoc != NULL) {
8806 newDoc->intSubset = ctx->myDoc->intSubset;
8807 newDoc->extSubset = ctx->myDoc->extSubset;
8808 }
8809 if (ctx->myDoc->URL != NULL) {
8810 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8811 }
8812 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8813 if (newDoc->children == NULL) {
8814 ctxt->sax = oldsax;
8815 xmlFreeParserCtxt(ctxt);
8816 newDoc->intSubset = NULL;
8817 newDoc->extSubset = NULL;
8818 xmlFreeDoc(newDoc);
8819 return(-1);
8820 }
8821 nodePush(ctxt, newDoc->children);
8822 if (ctx->myDoc == NULL) {
8823 ctxt->myDoc = newDoc;
8824 } else {
8825 ctxt->myDoc = ctx->myDoc;
8826 newDoc->children->doc = ctx->myDoc;
8827 }
8828
8829 /*
8830 * Parse a possible text declaration first
8831 */
8832 GROW;
8833 if ((RAW == '<') && (NXT(1) == '?') &&
8834 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8835 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8836 xmlParseTextDecl(ctxt);
8837 }
8838
8839 /*
8840 * Doing validity checking on chunk doesn't make sense
8841 */
8842 ctxt->instate = XML_PARSER_CONTENT;
8843 ctxt->validate = ctx->validate;
8844 ctxt->loadsubset = ctx->loadsubset;
8845 ctxt->depth = ctx->depth + 1;
8846 ctxt->replaceEntities = ctx->replaceEntities;
8847 if (ctxt->validate) {
8848 ctxt->vctxt.error = ctx->vctxt.error;
8849 ctxt->vctxt.warning = ctx->vctxt.warning;
8850 /* Allocate the Node stack */
8851 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8852 if (ctxt->vctxt.nodeTab == NULL) {
8853 xmlGenericError(xmlGenericErrorContext,
8854 "xmlParseCtxtExternalEntity: out of memory\n");
8855 ctxt->validate = 0;
8856 ctxt->vctxt.error = NULL;
8857 ctxt->vctxt.warning = NULL;
8858 } else {
8859 ctxt->vctxt.nodeNr = 0;
8860 ctxt->vctxt.nodeMax = 4;
8861 ctxt->vctxt.node = NULL;
8862 }
8863 } else {
8864 ctxt->vctxt.error = NULL;
8865 ctxt->vctxt.warning = NULL;
8866 }
8867
8868 xmlParseContent(ctxt);
8869
8870 if ((RAW == '<') && (NXT(1) == '/')) {
8871 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8873 ctxt->sax->error(ctxt->userData,
8874 "chunk is not well balanced\n");
8875 ctxt->wellFormed = 0;
8876 ctxt->disableSAX = 1;
8877 } else if (RAW != 0) {
8878 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8880 ctxt->sax->error(ctxt->userData,
8881 "extra content at the end of well balanced chunk\n");
8882 ctxt->wellFormed = 0;
8883 ctxt->disableSAX = 1;
8884 }
8885 if (ctxt->node != newDoc->children) {
8886 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8888 ctxt->sax->error(ctxt->userData,
8889 "chunk is not well balanced\n");
8890 ctxt->wellFormed = 0;
8891 ctxt->disableSAX = 1;
8892 }
8893
8894 if (!ctxt->wellFormed) {
8895 if (ctxt->errNo == 0)
8896 ret = 1;
8897 else
8898 ret = ctxt->errNo;
8899 } else {
8900 if (list != NULL) {
8901 xmlNodePtr cur;
8902
8903 /*
8904 * Return the newly created nodeset after unlinking it from
8905 * they pseudo parent.
8906 */
8907 cur = newDoc->children->children;
8908 *list = cur;
8909 while (cur != NULL) {
8910 cur->parent = NULL;
8911 cur = cur->next;
8912 }
8913 newDoc->children->children = NULL;
8914 }
8915 ret = 0;
8916 }
8917 ctxt->sax = oldsax;
8918 xmlFreeParserCtxt(ctxt);
8919 newDoc->intSubset = NULL;
8920 newDoc->extSubset = NULL;
8921 xmlFreeDoc(newDoc);
8922
8923 return(ret);
8924}
8925
8926/**
Daniel Veillard257d9102001-05-08 10:41:44 +00008927 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00008928 * @doc: the document the chunk pertains to
8929 * @sax: the SAX handler bloc (possibly NULL)
8930 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8931 * @depth: Used for loop detection, use 0
8932 * @URL: the URL for the entity to load
8933 * @ID: the System ID for the entity to load
8934 * @list: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00008935 * @private: extra field for the _private parser context
Owen Taylor3473f882001-02-23 17:55:21 +00008936 *
Daniel Veillard257d9102001-05-08 10:41:44 +00008937 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00008938 *
8939 * Returns 0 if the entity is well formed, -1 in case of args problem and
8940 * the parser error code otherwise
8941 */
8942
Daniel Veillard257d9102001-05-08 10:41:44 +00008943static int
8944xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlSAXHandlerPtr sax,
8945 void *user_data, int depth, const xmlChar *URL,
8946 const xmlChar *ID, xmlNodePtr *list, void *private) {
Owen Taylor3473f882001-02-23 17:55:21 +00008947 xmlParserCtxtPtr ctxt;
8948 xmlDocPtr newDoc;
8949 xmlSAXHandlerPtr oldsax = NULL;
8950 int ret = 0;
8951
8952 if (depth > 40) {
8953 return(XML_ERR_ENTITY_LOOP);
8954 }
8955
8956
8957
8958 if (list != NULL)
8959 *list = NULL;
8960 if ((URL == NULL) && (ID == NULL))
8961 return(-1);
8962 if (doc == NULL) /* @@ relax but check for dereferences */
8963 return(-1);
8964
8965
8966 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8967 if (ctxt == NULL) return(-1);
8968 ctxt->userData = ctxt;
Daniel Veillard257d9102001-05-08 10:41:44 +00008969 ctxt->_private = private;
Owen Taylor3473f882001-02-23 17:55:21 +00008970 if (sax != NULL) {
8971 oldsax = ctxt->sax;
8972 ctxt->sax = sax;
8973 if (user_data != NULL)
8974 ctxt->userData = user_data;
8975 }
8976 newDoc = xmlNewDoc(BAD_CAST "1.0");
8977 if (newDoc == NULL) {
8978 xmlFreeParserCtxt(ctxt);
8979 return(-1);
8980 }
8981 if (doc != NULL) {
8982 newDoc->intSubset = doc->intSubset;
8983 newDoc->extSubset = doc->extSubset;
8984 }
8985 if (doc->URL != NULL) {
8986 newDoc->URL = xmlStrdup(doc->URL);
8987 }
8988 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8989 if (newDoc->children == NULL) {
8990 if (sax != NULL)
8991 ctxt->sax = oldsax;
8992 xmlFreeParserCtxt(ctxt);
8993 newDoc->intSubset = NULL;
8994 newDoc->extSubset = NULL;
8995 xmlFreeDoc(newDoc);
8996 return(-1);
8997 }
8998 nodePush(ctxt, newDoc->children);
8999 if (doc == NULL) {
9000 ctxt->myDoc = newDoc;
9001 } else {
9002 ctxt->myDoc = doc;
9003 newDoc->children->doc = doc;
9004 }
9005
9006 /*
9007 * Parse a possible text declaration first
9008 */
9009 GROW;
9010 if ((RAW == '<') && (NXT(1) == '?') &&
9011 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9012 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9013 xmlParseTextDecl(ctxt);
9014 }
9015
9016 /*
9017 * Doing validity checking on chunk doesn't make sense
9018 */
9019 ctxt->instate = XML_PARSER_CONTENT;
9020 ctxt->validate = 0;
Daniel Veillarde470df72001-04-18 21:41:07 +00009021 ctxt->external = 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009022 ctxt->loadsubset = 0;
9023 ctxt->depth = depth;
9024
9025 xmlParseContent(ctxt);
9026
9027 if ((RAW == '<') && (NXT(1) == '/')) {
9028 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9030 ctxt->sax->error(ctxt->userData,
9031 "chunk is not well balanced\n");
9032 ctxt->wellFormed = 0;
9033 ctxt->disableSAX = 1;
9034 } else if (RAW != 0) {
9035 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9037 ctxt->sax->error(ctxt->userData,
9038 "extra content at the end of well balanced chunk\n");
9039 ctxt->wellFormed = 0;
9040 ctxt->disableSAX = 1;
9041 }
9042 if (ctxt->node != newDoc->children) {
9043 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9045 ctxt->sax->error(ctxt->userData,
9046 "chunk is not well balanced\n");
9047 ctxt->wellFormed = 0;
9048 ctxt->disableSAX = 1;
9049 }
9050
9051 if (!ctxt->wellFormed) {
9052 if (ctxt->errNo == 0)
9053 ret = 1;
9054 else
9055 ret = ctxt->errNo;
9056 } else {
9057 if (list != NULL) {
9058 xmlNodePtr cur;
9059
9060 /*
9061 * Return the newly created nodeset after unlinking it from
9062 * they pseudo parent.
9063 */
9064 cur = newDoc->children->children;
9065 *list = cur;
9066 while (cur != NULL) {
9067 cur->parent = NULL;
9068 cur = cur->next;
9069 }
9070 newDoc->children->children = NULL;
9071 }
9072 ret = 0;
9073 }
9074 if (sax != NULL)
9075 ctxt->sax = oldsax;
9076 xmlFreeParserCtxt(ctxt);
9077 newDoc->intSubset = NULL;
9078 newDoc->extSubset = NULL;
9079 xmlFreeDoc(newDoc);
9080
9081 return(ret);
9082}
9083
9084/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009085 * xmlParseExternalEntity:
9086 * @doc: the document the chunk pertains to
9087 * @sax: the SAX handler bloc (possibly NULL)
9088 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9089 * @depth: Used for loop detection, use 0
9090 * @URL: the URL for the entity to load
9091 * @ID: the System ID for the entity to load
9092 * @list: the return value for the set of parsed nodes
9093 *
9094 * Parse an external general entity
9095 * An external general parsed entity is well-formed if it matches the
9096 * production labeled extParsedEnt.
9097 *
9098 * [78] extParsedEnt ::= TextDecl? content
9099 *
9100 * Returns 0 if the entity is well formed, -1 in case of args problem and
9101 * the parser error code otherwise
9102 */
9103
9104int
9105xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9106 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
9107 return(xmlParseExternalEntityPrivate(doc, sax, user_data, depth, URL,
9108 ID, list, NULL));
9109}
9110
9111/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009112 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009113 * @doc: the document the chunk pertains to
9114 * @sax: the SAX handler bloc (possibly NULL)
9115 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9116 * @depth: Used for loop detection, use 0
9117 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9118 * @list: the return value for the set of parsed nodes
9119 *
9120 * Parse a well-balanced chunk of an XML document
9121 * called by the parser
9122 * The allowed sequence for the Well Balanced Chunk is the one defined by
9123 * the content production in the XML grammar:
9124 *
9125 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9126 *
9127 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9128 * the parser error code otherwise
9129 */
9130
9131int
9132xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9133 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9134 xmlParserCtxtPtr ctxt;
9135 xmlDocPtr newDoc;
9136 xmlSAXHandlerPtr oldsax = NULL;
9137 int size;
9138 int ret = 0;
9139
9140 if (depth > 40) {
9141 return(XML_ERR_ENTITY_LOOP);
9142 }
9143
9144
9145 if (list != NULL)
9146 *list = NULL;
9147 if (string == NULL)
9148 return(-1);
9149
9150 size = xmlStrlen(string);
9151
9152 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9153 if (ctxt == NULL) return(-1);
9154 ctxt->userData = ctxt;
9155 if (sax != NULL) {
9156 oldsax = ctxt->sax;
9157 ctxt->sax = sax;
9158 if (user_data != NULL)
9159 ctxt->userData = user_data;
9160 }
9161 newDoc = xmlNewDoc(BAD_CAST "1.0");
9162 if (newDoc == NULL) {
9163 xmlFreeParserCtxt(ctxt);
9164 return(-1);
9165 }
9166 if (doc != NULL) {
9167 newDoc->intSubset = doc->intSubset;
9168 newDoc->extSubset = doc->extSubset;
9169 }
9170 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9171 if (newDoc->children == NULL) {
9172 if (sax != NULL)
9173 ctxt->sax = oldsax;
9174 xmlFreeParserCtxt(ctxt);
9175 newDoc->intSubset = NULL;
9176 newDoc->extSubset = NULL;
9177 xmlFreeDoc(newDoc);
9178 return(-1);
9179 }
9180 nodePush(ctxt, newDoc->children);
9181 if (doc == NULL) {
9182 ctxt->myDoc = newDoc;
9183 } else {
9184 ctxt->myDoc = doc;
9185 newDoc->children->doc = doc;
9186 }
9187 ctxt->instate = XML_PARSER_CONTENT;
9188 ctxt->depth = depth;
9189
9190 /*
9191 * Doing validity checking on chunk doesn't make sense
9192 */
9193 ctxt->validate = 0;
9194 ctxt->loadsubset = 0;
9195
9196 xmlParseContent(ctxt);
9197
9198 if ((RAW == '<') && (NXT(1) == '/')) {
9199 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9201 ctxt->sax->error(ctxt->userData,
9202 "chunk is not well balanced\n");
9203 ctxt->wellFormed = 0;
9204 ctxt->disableSAX = 1;
9205 } else if (RAW != 0) {
9206 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9208 ctxt->sax->error(ctxt->userData,
9209 "extra content at the end of well balanced chunk\n");
9210 ctxt->wellFormed = 0;
9211 ctxt->disableSAX = 1;
9212 }
9213 if (ctxt->node != newDoc->children) {
9214 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9216 ctxt->sax->error(ctxt->userData,
9217 "chunk is not well balanced\n");
9218 ctxt->wellFormed = 0;
9219 ctxt->disableSAX = 1;
9220 }
9221
9222 if (!ctxt->wellFormed) {
9223 if (ctxt->errNo == 0)
9224 ret = 1;
9225 else
9226 ret = ctxt->errNo;
9227 } else {
9228 if (list != NULL) {
9229 xmlNodePtr cur;
9230
9231 /*
9232 * Return the newly created nodeset after unlinking it from
9233 * they pseudo parent.
9234 */
9235 cur = newDoc->children->children;
9236 *list = cur;
9237 while (cur != NULL) {
9238 cur->parent = NULL;
9239 cur = cur->next;
9240 }
9241 newDoc->children->children = NULL;
9242 }
9243 ret = 0;
9244 }
9245 if (sax != NULL)
9246 ctxt->sax = oldsax;
9247 xmlFreeParserCtxt(ctxt);
9248 newDoc->intSubset = NULL;
9249 newDoc->extSubset = NULL;
9250 xmlFreeDoc(newDoc);
9251
9252 return(ret);
9253}
9254
9255/**
9256 * xmlSAXParseEntity:
9257 * @sax: the SAX handler block
9258 * @filename: the filename
9259 *
9260 * parse an XML external entity out of context and build a tree.
9261 * It use the given SAX function block to handle the parsing callback.
9262 * If sax is NULL, fallback to the default DOM tree building routines.
9263 *
9264 * [78] extParsedEnt ::= TextDecl? content
9265 *
9266 * This correspond to a "Well Balanced" chunk
9267 *
9268 * Returns the resulting document tree
9269 */
9270
9271xmlDocPtr
9272xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9273 xmlDocPtr ret;
9274 xmlParserCtxtPtr ctxt;
9275 char *directory = NULL;
9276
9277 ctxt = xmlCreateFileParserCtxt(filename);
9278 if (ctxt == NULL) {
9279 return(NULL);
9280 }
9281 if (sax != NULL) {
9282 if (ctxt->sax != NULL)
9283 xmlFree(ctxt->sax);
9284 ctxt->sax = sax;
9285 ctxt->userData = NULL;
9286 }
9287
9288 if ((ctxt->directory == NULL) && (directory == NULL))
9289 directory = xmlParserGetDirectory(filename);
9290
9291 xmlParseExtParsedEnt(ctxt);
9292
9293 if (ctxt->wellFormed)
9294 ret = ctxt->myDoc;
9295 else {
9296 ret = NULL;
9297 xmlFreeDoc(ctxt->myDoc);
9298 ctxt->myDoc = NULL;
9299 }
9300 if (sax != NULL)
9301 ctxt->sax = NULL;
9302 xmlFreeParserCtxt(ctxt);
9303
9304 return(ret);
9305}
9306
9307/**
9308 * xmlParseEntity:
9309 * @filename: the filename
9310 *
9311 * parse an XML external entity out of context and build a tree.
9312 *
9313 * [78] extParsedEnt ::= TextDecl? content
9314 *
9315 * This correspond to a "Well Balanced" chunk
9316 *
9317 * Returns the resulting document tree
9318 */
9319
9320xmlDocPtr
9321xmlParseEntity(const char *filename) {
9322 return(xmlSAXParseEntity(NULL, filename));
9323}
9324
9325/**
9326 * xmlCreateEntityParserCtxt:
9327 * @URL: the entity URL
9328 * @ID: the entity PUBLIC ID
9329 * @base: a posible base for the target URI
9330 *
9331 * Create a parser context for an external entity
9332 * Automatic support for ZLIB/Compress compressed document is provided
9333 * by default if found at compile-time.
9334 *
9335 * Returns the new parser context or NULL
9336 */
9337xmlParserCtxtPtr
9338xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9339 const xmlChar *base) {
9340 xmlParserCtxtPtr ctxt;
9341 xmlParserInputPtr inputStream;
9342 char *directory = NULL;
9343 xmlChar *uri;
9344
9345 ctxt = xmlNewParserCtxt();
9346 if (ctxt == NULL) {
9347 return(NULL);
9348 }
9349
9350 uri = xmlBuildURI(URL, base);
9351
9352 if (uri == NULL) {
9353 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9354 if (inputStream == NULL) {
9355 xmlFreeParserCtxt(ctxt);
9356 return(NULL);
9357 }
9358
9359 inputPush(ctxt, inputStream);
9360
9361 if ((ctxt->directory == NULL) && (directory == NULL))
9362 directory = xmlParserGetDirectory((char *)URL);
9363 if ((ctxt->directory == NULL) && (directory != NULL))
9364 ctxt->directory = directory;
9365 } else {
9366 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9367 if (inputStream == NULL) {
9368 xmlFree(uri);
9369 xmlFreeParserCtxt(ctxt);
9370 return(NULL);
9371 }
9372
9373 inputPush(ctxt, inputStream);
9374
9375 if ((ctxt->directory == NULL) && (directory == NULL))
9376 directory = xmlParserGetDirectory((char *)uri);
9377 if ((ctxt->directory == NULL) && (directory != NULL))
9378 ctxt->directory = directory;
9379 xmlFree(uri);
9380 }
9381
9382 return(ctxt);
9383}
9384
9385/************************************************************************
9386 * *
9387 * Front ends when parsing from a file *
9388 * *
9389 ************************************************************************/
9390
9391/**
9392 * xmlCreateFileParserCtxt:
9393 * @filename: the filename
9394 *
9395 * Create a parser context for a file content.
9396 * Automatic support for ZLIB/Compress compressed document is provided
9397 * by default if found at compile-time.
9398 *
9399 * Returns the new parser context or NULL
9400 */
9401xmlParserCtxtPtr
9402xmlCreateFileParserCtxt(const char *filename)
9403{
9404 xmlParserCtxtPtr ctxt;
9405 xmlParserInputPtr inputStream;
9406 xmlParserInputBufferPtr buf;
9407 char *directory = NULL;
9408
9409 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9410 if (buf == NULL) {
9411 return(NULL);
9412 }
9413
9414 ctxt = xmlNewParserCtxt();
9415 if (ctxt == NULL) {
9416 if (xmlDefaultSAXHandler.error != NULL) {
9417 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9418 }
9419 return(NULL);
9420 }
9421
9422 inputStream = xmlNewInputStream(ctxt);
9423 if (inputStream == NULL) {
9424 xmlFreeParserCtxt(ctxt);
9425 return(NULL);
9426 }
9427
9428 inputStream->filename = xmlMemStrdup(filename);
9429 inputStream->buf = buf;
9430 inputStream->base = inputStream->buf->buffer->content;
9431 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009432 inputStream->end =
9433 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009434
9435 inputPush(ctxt, inputStream);
9436 if ((ctxt->directory == NULL) && (directory == NULL))
9437 directory = xmlParserGetDirectory(filename);
9438 if ((ctxt->directory == NULL) && (directory != NULL))
9439 ctxt->directory = directory;
9440
9441 return(ctxt);
9442}
9443
9444/**
9445 * xmlSAXParseFile:
9446 * @sax: the SAX handler block
9447 * @filename: the filename
9448 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9449 * documents
9450 *
9451 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9452 * compressed document is provided by default if found at compile-time.
9453 * It use the given SAX function block to handle the parsing callback.
9454 * If sax is NULL, fallback to the default DOM tree building routines.
9455 *
9456 * Returns the resulting document tree
9457 */
9458
9459xmlDocPtr
9460xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9461 int recovery) {
9462 xmlDocPtr ret;
9463 xmlParserCtxtPtr ctxt;
9464 char *directory = NULL;
9465
9466 ctxt = xmlCreateFileParserCtxt(filename);
9467 if (ctxt == NULL) {
9468 return(NULL);
9469 }
9470 if (sax != NULL) {
9471 if (ctxt->sax != NULL)
9472 xmlFree(ctxt->sax);
9473 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009474 }
9475
9476 if ((ctxt->directory == NULL) && (directory == NULL))
9477 directory = xmlParserGetDirectory(filename);
9478 if ((ctxt->directory == NULL) && (directory != NULL))
9479 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9480
9481 xmlParseDocument(ctxt);
9482
9483 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9484 else {
9485 ret = NULL;
9486 xmlFreeDoc(ctxt->myDoc);
9487 ctxt->myDoc = NULL;
9488 }
9489 if (sax != NULL)
9490 ctxt->sax = NULL;
9491 xmlFreeParserCtxt(ctxt);
9492
9493 return(ret);
9494}
9495
9496/**
9497 * xmlRecoverDoc:
9498 * @cur: a pointer to an array of xmlChar
9499 *
9500 * parse an XML in-memory document and build a tree.
9501 * In the case the document is not Well Formed, a tree is built anyway
9502 *
9503 * Returns the resulting document tree
9504 */
9505
9506xmlDocPtr
9507xmlRecoverDoc(xmlChar *cur) {
9508 return(xmlSAXParseDoc(NULL, cur, 1));
9509}
9510
9511/**
9512 * xmlParseFile:
9513 * @filename: the filename
9514 *
9515 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9516 * compressed document is provided by default if found at compile-time.
9517 *
9518 * Returns the resulting document tree
9519 */
9520
9521xmlDocPtr
9522xmlParseFile(const char *filename) {
9523 return(xmlSAXParseFile(NULL, filename, 0));
9524}
9525
9526/**
9527 * xmlRecoverFile:
9528 * @filename: the filename
9529 *
9530 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9531 * compressed document is provided by default if found at compile-time.
9532 * In the case the document is not Well Formed, a tree is built anyway
9533 *
9534 * Returns the resulting document tree
9535 */
9536
9537xmlDocPtr
9538xmlRecoverFile(const char *filename) {
9539 return(xmlSAXParseFile(NULL, filename, 1));
9540}
9541
9542
9543/**
9544 * xmlSetupParserForBuffer:
9545 * @ctxt: an XML parser context
9546 * @buffer: a xmlChar * buffer
9547 * @filename: a file name
9548 *
9549 * Setup the parser context to parse a new buffer; Clears any prior
9550 * contents from the parser context. The buffer parameter must not be
9551 * NULL, but the filename parameter can be
9552 */
9553void
9554xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9555 const char* filename)
9556{
9557 xmlParserInputPtr input;
9558
9559 input = xmlNewInputStream(ctxt);
9560 if (input == NULL) {
9561 perror("malloc");
9562 xmlFree(ctxt);
9563 return;
9564 }
9565
9566 xmlClearParserCtxt(ctxt);
9567 if (filename != NULL)
9568 input->filename = xmlMemStrdup(filename);
9569 input->base = buffer;
9570 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009571 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009572 inputPush(ctxt, input);
9573}
9574
9575/**
9576 * xmlSAXUserParseFile:
9577 * @sax: a SAX handler
9578 * @user_data: The user data returned on SAX callbacks
9579 * @filename: a file name
9580 *
9581 * parse an XML file and call the given SAX handler routines.
9582 * Automatic support for ZLIB/Compress compressed document is provided
9583 *
9584 * Returns 0 in case of success or a error number otherwise
9585 */
9586int
9587xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9588 const char *filename) {
9589 int ret = 0;
9590 xmlParserCtxtPtr ctxt;
9591
9592 ctxt = xmlCreateFileParserCtxt(filename);
9593 if (ctxt == NULL) return -1;
9594 if (ctxt->sax != &xmlDefaultSAXHandler)
9595 xmlFree(ctxt->sax);
9596 ctxt->sax = sax;
9597 if (user_data != NULL)
9598 ctxt->userData = user_data;
9599
9600 xmlParseDocument(ctxt);
9601
9602 if (ctxt->wellFormed)
9603 ret = 0;
9604 else {
9605 if (ctxt->errNo != 0)
9606 ret = ctxt->errNo;
9607 else
9608 ret = -1;
9609 }
9610 if (sax != NULL)
9611 ctxt->sax = NULL;
9612 xmlFreeParserCtxt(ctxt);
9613
9614 return ret;
9615}
9616
9617/************************************************************************
9618 * *
9619 * Front ends when parsing from memory *
9620 * *
9621 ************************************************************************/
9622
9623/**
9624 * xmlCreateMemoryParserCtxt:
9625 * @buffer: a pointer to a char array
9626 * @size: the size of the array
9627 *
9628 * Create a parser context for an XML in-memory document.
9629 *
9630 * Returns the new parser context or NULL
9631 */
9632xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009633xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009634 xmlParserCtxtPtr ctxt;
9635 xmlParserInputPtr input;
9636 xmlParserInputBufferPtr buf;
9637
9638 if (buffer == NULL)
9639 return(NULL);
9640 if (size <= 0)
9641 return(NULL);
9642
9643 ctxt = xmlNewParserCtxt();
9644 if (ctxt == NULL)
9645 return(NULL);
9646
9647 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9648 if (buf == NULL) return(NULL);
9649
9650 input = xmlNewInputStream(ctxt);
9651 if (input == NULL) {
9652 xmlFreeParserCtxt(ctxt);
9653 return(NULL);
9654 }
9655
9656 input->filename = NULL;
9657 input->buf = buf;
9658 input->base = input->buf->buffer->content;
9659 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009660 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009661
9662 inputPush(ctxt, input);
9663 return(ctxt);
9664}
9665
9666/**
9667 * xmlSAXParseMemory:
9668 * @sax: the SAX handler block
9669 * @buffer: an pointer to a char array
9670 * @size: the size of the array
9671 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9672 * documents
9673 *
9674 * parse an XML in-memory block and use the given SAX function block
9675 * to handle the parsing callback. If sax is NULL, fallback to the default
9676 * DOM tree building routines.
9677 *
9678 * Returns the resulting document tree
9679 */
9680xmlDocPtr
9681xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9682 xmlDocPtr ret;
9683 xmlParserCtxtPtr ctxt;
9684
9685 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9686 if (ctxt == NULL) return(NULL);
9687 if (sax != NULL) {
9688 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009689 }
9690
9691 xmlParseDocument(ctxt);
9692
9693 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9694 else {
9695 ret = NULL;
9696 xmlFreeDoc(ctxt->myDoc);
9697 ctxt->myDoc = NULL;
9698 }
9699 if (sax != NULL)
9700 ctxt->sax = NULL;
9701 xmlFreeParserCtxt(ctxt);
9702
9703 return(ret);
9704}
9705
9706/**
9707 * xmlParseMemory:
9708 * @buffer: an pointer to a char array
9709 * @size: the size of the array
9710 *
9711 * parse an XML in-memory block and build a tree.
9712 *
9713 * Returns the resulting document tree
9714 */
9715
9716xmlDocPtr xmlParseMemory(char *buffer, int size) {
9717 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9718}
9719
9720/**
9721 * xmlRecoverMemory:
9722 * @buffer: an pointer to a char array
9723 * @size: the size of the array
9724 *
9725 * parse an XML in-memory block and build a tree.
9726 * In the case the document is not Well Formed, a tree is built anyway
9727 *
9728 * Returns the resulting document tree
9729 */
9730
9731xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9732 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9733}
9734
9735/**
9736 * xmlSAXUserParseMemory:
9737 * @sax: a SAX handler
9738 * @user_data: The user data returned on SAX callbacks
9739 * @buffer: an in-memory XML document input
9740 * @size: the length of the XML document in bytes
9741 *
9742 * A better SAX parsing routine.
9743 * parse an XML in-memory buffer and call the given SAX handler routines.
9744 *
9745 * Returns 0 in case of success or a error number otherwise
9746 */
9747int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009748 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009749 int ret = 0;
9750 xmlParserCtxtPtr ctxt;
9751 xmlSAXHandlerPtr oldsax = NULL;
9752
9753 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9754 if (ctxt == NULL) return -1;
9755 if (sax != NULL) {
9756 oldsax = ctxt->sax;
9757 ctxt->sax = sax;
9758 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009759 if (user_data != NULL)
9760 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009761
9762 xmlParseDocument(ctxt);
9763
9764 if (ctxt->wellFormed)
9765 ret = 0;
9766 else {
9767 if (ctxt->errNo != 0)
9768 ret = ctxt->errNo;
9769 else
9770 ret = -1;
9771 }
9772 if (sax != NULL) {
9773 ctxt->sax = oldsax;
9774 }
9775 xmlFreeParserCtxt(ctxt);
9776
9777 return ret;
9778}
9779
9780/**
9781 * xmlCreateDocParserCtxt:
9782 * @cur: a pointer to an array of xmlChar
9783 *
9784 * Creates a parser context for an XML in-memory document.
9785 *
9786 * Returns the new parser context or NULL
9787 */
9788xmlParserCtxtPtr
9789xmlCreateDocParserCtxt(xmlChar *cur) {
9790 int len;
9791
9792 if (cur == NULL)
9793 return(NULL);
9794 len = xmlStrlen(cur);
9795 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9796}
9797
9798/**
9799 * xmlSAXParseDoc:
9800 * @sax: the SAX handler block
9801 * @cur: a pointer to an array of xmlChar
9802 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9803 * documents
9804 *
9805 * parse an XML in-memory document and build a tree.
9806 * It use the given SAX function block to handle the parsing callback.
9807 * If sax is NULL, fallback to the default DOM tree building routines.
9808 *
9809 * Returns the resulting document tree
9810 */
9811
9812xmlDocPtr
9813xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9814 xmlDocPtr ret;
9815 xmlParserCtxtPtr ctxt;
9816
9817 if (cur == NULL) return(NULL);
9818
9819
9820 ctxt = xmlCreateDocParserCtxt(cur);
9821 if (ctxt == NULL) return(NULL);
9822 if (sax != NULL) {
9823 ctxt->sax = sax;
9824 ctxt->userData = NULL;
9825 }
9826
9827 xmlParseDocument(ctxt);
9828 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9829 else {
9830 ret = NULL;
9831 xmlFreeDoc(ctxt->myDoc);
9832 ctxt->myDoc = NULL;
9833 }
9834 if (sax != NULL)
9835 ctxt->sax = NULL;
9836 xmlFreeParserCtxt(ctxt);
9837
9838 return(ret);
9839}
9840
9841/**
9842 * xmlParseDoc:
9843 * @cur: a pointer to an array of xmlChar
9844 *
9845 * parse an XML in-memory document and build a tree.
9846 *
9847 * Returns the resulting document tree
9848 */
9849
9850xmlDocPtr
9851xmlParseDoc(xmlChar *cur) {
9852 return(xmlSAXParseDoc(NULL, cur, 0));
9853}
9854
9855
9856/************************************************************************
9857 * *
9858 * Miscellaneous *
9859 * *
9860 ************************************************************************/
9861
9862#ifdef LIBXML_XPATH_ENABLED
9863#include <libxml/xpath.h>
9864#endif
9865
9866static int xmlParserInitialized = 0;
9867
9868/**
9869 * xmlInitParser:
9870 *
9871 * Initialization function for the XML parser.
9872 * This is not reentrant. Call once before processing in case of
9873 * use in multithreaded programs.
9874 */
9875
9876void
9877xmlInitParser(void) {
9878 if (xmlParserInitialized) return;
9879
9880 xmlInitCharEncodingHandlers();
9881 xmlInitializePredefinedEntities();
9882 xmlDefaultSAXHandlerInit();
9883 xmlRegisterDefaultInputCallbacks();
9884 xmlRegisterDefaultOutputCallbacks();
9885#ifdef LIBXML_HTML_ENABLED
9886 htmlInitAutoClose();
9887 htmlDefaultSAXHandlerInit();
9888#endif
9889#ifdef LIBXML_XPATH_ENABLED
9890 xmlXPathInit();
9891#endif
9892 xmlParserInitialized = 1;
9893}
9894
9895/**
9896 * xmlCleanupParser:
9897 *
9898 * Cleanup function for the XML parser. It tries to reclaim all
9899 * parsing related global memory allocated for the parser processing.
9900 * It doesn't deallocate any document related memory. Calling this
9901 * function should not prevent reusing the parser.
9902 */
9903
9904void
9905xmlCleanupParser(void) {
9906 xmlParserInitialized = 0;
9907 xmlCleanupCharEncodingHandlers();
9908 xmlCleanupPredefinedEntities();
9909}
9910
9911/**
9912 * xmlPedanticParserDefault:
9913 * @val: int 0 or 1
9914 *
9915 * Set and return the previous value for enabling pedantic warnings.
9916 *
9917 * Returns the last value for 0 for no substitution, 1 for substitution.
9918 */
9919
9920int
9921xmlPedanticParserDefault(int val) {
9922 int old = xmlPedanticParserDefaultValue;
9923
9924 xmlPedanticParserDefaultValue = val;
9925 return(old);
9926}
9927
9928/**
9929 * xmlSubstituteEntitiesDefault:
9930 * @val: int 0 or 1
9931 *
9932 * Set and return the previous value for default entity support.
9933 * Initially the parser always keep entity references instead of substituting
9934 * entity values in the output. This function has to be used to change the
9935 * default parser behaviour
9936 * SAX::subtituteEntities() has to be used for changing that on a file by
9937 * file basis.
9938 *
9939 * Returns the last value for 0 for no substitution, 1 for substitution.
9940 */
9941
9942int
9943xmlSubstituteEntitiesDefault(int val) {
9944 int old = xmlSubstituteEntitiesDefaultValue;
9945
9946 xmlSubstituteEntitiesDefaultValue = val;
9947 return(old);
9948}
9949
9950/**
9951 * xmlKeepBlanksDefault:
9952 * @val: int 0 or 1
9953 *
9954 * Set and return the previous value for default blanks text nodes support.
9955 * The 1.x version of the parser used an heuristic to try to detect
9956 * ignorable white spaces. As a result the SAX callback was generating
9957 * ignorableWhitespace() callbacks instead of characters() one, and when
9958 * using the DOM output text nodes containing those blanks were not generated.
9959 * The 2.x and later version will switch to the XML standard way and
9960 * ignorableWhitespace() are only generated when running the parser in
9961 * validating mode and when the current element doesn't allow CDATA or
9962 * mixed content.
9963 * This function is provided as a way to force the standard behaviour
9964 * on 1.X libs and to switch back to the old mode for compatibility when
9965 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9966 * by using xmlIsBlankNode() commodity function to detect the "empty"
9967 * nodes generated.
9968 * This value also affect autogeneration of indentation when saving code
9969 * if blanks sections are kept, indentation is not generated.
9970 *
9971 * Returns the last value for 0 for no substitution, 1 for substitution.
9972 */
9973
9974int
9975xmlKeepBlanksDefault(int val) {
9976 int old = xmlKeepBlanksDefaultValue;
9977
9978 xmlKeepBlanksDefaultValue = val;
9979 xmlIndentTreeOutput = !val;
9980 return(old);
9981}
9982