blob: e76c926381f59e565a73199cdccee0cfa93c9385 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
83int xmlGetWarningsDefaultValue = 1;
84int xmlParserDebugEntities = 0;
85#ifdef VMS
86int xmlSubstituteEntitiesDefaultVal = 0;
87#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
88int xmlDoValidityCheckingDefaultVal = 0;
89#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
90#else
91int xmlSubstituteEntitiesDefaultValue = 0;
92int xmlDoValidityCheckingDefaultValue = 0;
93#endif
94int xmlLoadExtDtdDefaultValue = 0;
95int xmlPedanticParserDefaultValue = 0;
96int xmlKeepBlanksDefaultValue = 1;
97
98/*
99 * List of XML prefixed PI allowed by W3C specs
100 */
101
102const char *xmlW3CPIs[] = {
103 "xml-stylesheet",
104 NULL
105};
106
107/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
108void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110 const xmlChar **str);
111
Daniel Veillard257d9102001-05-08 10:41:44 +0000112static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000113xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
114 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000115 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000116 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000117
118/************************************************************************
119 * *
120 * Parser stacks related functions and macros *
121 * *
122 ************************************************************************/
123
124xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
125 const xmlChar ** str);
126
127/*
128 * Generic function for accessing stacks in the Parser Context
129 */
130
131#define PUSH_AND_POP(scope, type, name) \
132scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
133 if (ctxt->name##Nr >= ctxt->name##Max) { \
134 ctxt->name##Max *= 2; \
135 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
136 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
137 if (ctxt->name##Tab == NULL) { \
138 xmlGenericError(xmlGenericErrorContext, \
139 "realloc failed !\n"); \
140 return(0); \
141 } \
142 } \
143 ctxt->name##Tab[ctxt->name##Nr] = value; \
144 ctxt->name = value; \
145 return(ctxt->name##Nr++); \
146} \
147scope type name##Pop(xmlParserCtxtPtr ctxt) { \
148 type ret; \
149 if (ctxt->name##Nr <= 0) return(0); \
150 ctxt->name##Nr--; \
151 if (ctxt->name##Nr > 0) \
152 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
153 else \
154 ctxt->name = NULL; \
155 ret = ctxt->name##Tab[ctxt->name##Nr]; \
156 ctxt->name##Tab[ctxt->name##Nr] = 0; \
157 return(ret); \
158} \
159
160/*
161 * Those macros actually generate the functions
162 */
163PUSH_AND_POP(extern, xmlParserInputPtr, input)
164PUSH_AND_POP(extern, xmlNodePtr, node)
165PUSH_AND_POP(extern, xmlChar*, name)
166
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000167static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000168 if (ctxt->spaceNr >= ctxt->spaceMax) {
169 ctxt->spaceMax *= 2;
170 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
171 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
172 if (ctxt->spaceTab == NULL) {
173 xmlGenericError(xmlGenericErrorContext,
174 "realloc failed !\n");
175 return(0);
176 }
177 }
178 ctxt->spaceTab[ctxt->spaceNr] = val;
179 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
180 return(ctxt->spaceNr++);
181}
182
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000183static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000184 int ret;
185 if (ctxt->spaceNr <= 0) return(0);
186 ctxt->spaceNr--;
187 if (ctxt->spaceNr > 0)
188 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
189 else
190 ctxt->space = NULL;
191 ret = ctxt->spaceTab[ctxt->spaceNr];
192 ctxt->spaceTab[ctxt->spaceNr] = -1;
193 return(ret);
194}
195
196/*
197 * Macros for accessing the content. Those should be used only by the parser,
198 * and not exported.
199 *
200 * Dirty macros, i.e. one often need to make assumption on the context to
201 * use them
202 *
203 * CUR_PTR return the current pointer to the xmlChar to be parsed.
204 * To be used with extreme caution since operations consuming
205 * characters may move the input buffer to a different location !
206 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
207 * This should be used internally by the parser
208 * only to compare to ASCII values otherwise it would break when
209 * running with UTF-8 encoding.
210 * RAW same as CUR but in the input buffer, bypass any token
211 * extraction that may have been done
212 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
213 * to compare on ASCII based substring.
214 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
215 * strings within the parser.
216 *
217 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
218 *
219 * NEXT Skip to the next character, this does the proper decoding
220 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
221 * NEXTL(l) Skip l xmlChars in the input buffer
222 * CUR_CHAR(l) returns the current unicode character (int), set l
223 * to the number of xmlChars used for the encoding [0-5].
224 * CUR_SCHAR same but operate on a string instead of the context
225 * COPY_BUF copy the current unicode char to the target buffer, increment
226 * the index
227 * GROW, SHRINK handling of input buffers
228 */
229
230#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
231#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
232#define NXT(val) ctxt->input->cur[(val)]
233#define CUR_PTR ctxt->input->cur
234
235#define SKIP(val) do { \
236 ctxt->nbChars += (val),ctxt->input->cur += (val); \
237 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000238 if ((*ctxt->input->cur == 0) && \
239 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
240 xmlPopInput(ctxt); \
241 } while (0)
242
Daniel Veillard48b2f892001-02-25 16:11:03 +0000243#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000244 xmlParserInputShrink(ctxt->input); \
245 if ((*ctxt->input->cur == 0) && \
246 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
247 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000248 }
Owen Taylor3473f882001-02-23 17:55:21 +0000249
Daniel Veillard48b2f892001-02-25 16:11:03 +0000250#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000251 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
252 if ((*ctxt->input->cur == 0) && \
253 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
254 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000255 }
Owen Taylor3473f882001-02-23 17:55:21 +0000256
257#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
258
259#define NEXT xmlNextChar(ctxt)
260
Daniel Veillard21a0f912001-02-25 19:54:14 +0000261#define NEXT1 { \
262 ctxt->input->cur++; \
263 ctxt->nbChars++; \
264 if (*ctxt->input->cur == 0) \
265 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
266 }
267
Owen Taylor3473f882001-02-23 17:55:21 +0000268#define NEXTL(l) do { \
269 if (*(ctxt->input->cur) == '\n') { \
270 ctxt->input->line++; ctxt->input->col = 1; \
271 } else ctxt->input->col++; \
272 ctxt->token = 0; ctxt->input->cur += l; \
273 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000274 } while (0)
275
276#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
277#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
278
279#define COPY_BUF(l,b,i,v) \
280 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000281 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000282
283/**
284 * xmlSkipBlankChars:
285 * @ctxt: the XML parser context
286 *
287 * skip all blanks character found at that point in the input streams.
288 * It pops up finished entities in the process if allowable at that point.
289 *
290 * Returns the number of space chars skipped
291 */
292
293int
294xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000295 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000296
Daniel Veillard02141ea2001-04-30 11:46:40 +0000297 if (ctxt->token != 0) {
298 if (!IS_BLANK(ctxt->token))
299 return(0);
300 ctxt->token = 0;
301 res++;
302 }
Owen Taylor3473f882001-02-23 17:55:21 +0000303 /*
304 * It's Okay to use CUR/NEXT here since all the blanks are on
305 * the ASCII range.
306 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000307 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
308 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000309 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000310 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000311 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000312 cur = ctxt->input->cur;
313 while (IS_BLANK(*cur)) {
314 if (*cur == '\n') {
315 ctxt->input->line++; ctxt->input->col = 1;
316 }
317 cur++;
318 res++;
319 if (*cur == 0) {
320 ctxt->input->cur = cur;
321 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
322 cur = ctxt->input->cur;
323 }
324 }
325 ctxt->input->cur = cur;
326 } else {
327 int cur;
328 do {
329 cur = CUR;
330 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
331 NEXT;
332 cur = CUR;
333 res++;
334 }
335 while ((cur == 0) && (ctxt->inputNr > 1) &&
336 (ctxt->instate != XML_PARSER_COMMENT)) {
337 xmlPopInput(ctxt);
338 cur = CUR;
339 }
340 /*
341 * Need to handle support of entities branching here
342 */
343 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
344 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
345 }
Owen Taylor3473f882001-02-23 17:55:21 +0000346 return(res);
347}
348
349/************************************************************************
350 * *
351 * Commodity functions to handle entities *
352 * *
353 ************************************************************************/
354
355/**
356 * xmlPopInput:
357 * @ctxt: an XML parser context
358 *
359 * xmlPopInput: the current input pointed by ctxt->input came to an end
360 * pop it and return the next char.
361 *
362 * Returns the current xmlChar in the parser context
363 */
364xmlChar
365xmlPopInput(xmlParserCtxtPtr ctxt) {
366 if (ctxt->inputNr == 1) return(0); /* End of main Input */
367 if (xmlParserDebugEntities)
368 xmlGenericError(xmlGenericErrorContext,
369 "Popping input %d\n", ctxt->inputNr);
370 xmlFreeInputStream(inputPop(ctxt));
371 if ((*ctxt->input->cur == 0) &&
372 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
373 return(xmlPopInput(ctxt));
374 return(CUR);
375}
376
377/**
378 * xmlPushInput:
379 * @ctxt: an XML parser context
380 * @input: an XML parser input fragment (entity, XML fragment ...).
381 *
382 * xmlPushInput: switch to a new input stream which is stacked on top
383 * of the previous one(s).
384 */
385void
386xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
387 if (input == NULL) return;
388
389 if (xmlParserDebugEntities) {
390 if ((ctxt->input != NULL) && (ctxt->input->filename))
391 xmlGenericError(xmlGenericErrorContext,
392 "%s(%d): ", ctxt->input->filename,
393 ctxt->input->line);
394 xmlGenericError(xmlGenericErrorContext,
395 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
396 }
397 inputPush(ctxt, input);
398 GROW;
399}
400
401/**
402 * xmlParseCharRef:
403 * @ctxt: an XML parser context
404 *
405 * parse Reference declarations
406 *
407 * [66] CharRef ::= '&#' [0-9]+ ';' |
408 * '&#x' [0-9a-fA-F]+ ';'
409 *
410 * [ WFC: Legal Character ]
411 * Characters referred to using character references must match the
412 * production for Char.
413 *
414 * Returns the value parsed (as an int), 0 in case of error
415 */
416int
417xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000418 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000419 int count = 0;
420
421 if (ctxt->token != 0) {
422 val = ctxt->token;
423 ctxt->token = 0;
424 return(val);
425 }
426 /*
427 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
428 */
429 if ((RAW == '&') && (NXT(1) == '#') &&
430 (NXT(2) == 'x')) {
431 SKIP(3);
432 GROW;
433 while (RAW != ';') { /* loop blocked by count */
434 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
435 val = val * 16 + (CUR - '0');
436 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
437 val = val * 16 + (CUR - 'a') + 10;
438 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
439 val = val * 16 + (CUR - 'A') + 10;
440 else {
441 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
443 ctxt->sax->error(ctxt->userData,
444 "xmlParseCharRef: invalid hexadecimal value\n");
445 ctxt->wellFormed = 0;
446 ctxt->disableSAX = 1;
447 val = 0;
448 break;
449 }
450 NEXT;
451 count++;
452 }
453 if (RAW == ';') {
454 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
455 ctxt->nbChars ++;
456 ctxt->input->cur++;
457 }
458 } else if ((RAW == '&') && (NXT(1) == '#')) {
459 SKIP(2);
460 GROW;
461 while (RAW != ';') { /* loop blocked by count */
462 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
463 val = val * 10 + (CUR - '0');
464 else {
465 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
467 ctxt->sax->error(ctxt->userData,
468 "xmlParseCharRef: invalid decimal value\n");
469 ctxt->wellFormed = 0;
470 ctxt->disableSAX = 1;
471 val = 0;
472 break;
473 }
474 NEXT;
475 count++;
476 }
477 if (RAW == ';') {
478 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
479 ctxt->nbChars ++;
480 ctxt->input->cur++;
481 }
482 } else {
483 ctxt->errNo = XML_ERR_INVALID_CHARREF;
484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
485 ctxt->sax->error(ctxt->userData,
486 "xmlParseCharRef: invalid value\n");
487 ctxt->wellFormed = 0;
488 ctxt->disableSAX = 1;
489 }
490
491 /*
492 * [ WFC: Legal Character ]
493 * Characters referred to using character references must match the
494 * production for Char.
495 */
496 if (IS_CHAR(val)) {
497 return(val);
498 } else {
499 ctxt->errNo = XML_ERR_INVALID_CHAR;
500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
501 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
502 val);
503 ctxt->wellFormed = 0;
504 ctxt->disableSAX = 1;
505 }
506 return(0);
507}
508
509/**
510 * xmlParseStringCharRef:
511 * @ctxt: an XML parser context
512 * @str: a pointer to an index in the string
513 *
514 * parse Reference declarations, variant parsing from a string rather
515 * than an an input flow.
516 *
517 * [66] CharRef ::= '&#' [0-9]+ ';' |
518 * '&#x' [0-9a-fA-F]+ ';'
519 *
520 * [ WFC: Legal Character ]
521 * Characters referred to using character references must match the
522 * production for Char.
523 *
524 * Returns the value parsed (as an int), 0 in case of error, str will be
525 * updated to the current value of the index
526 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000527static int
Owen Taylor3473f882001-02-23 17:55:21 +0000528xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
529 const xmlChar *ptr;
530 xmlChar cur;
531 int val = 0;
532
533 if ((str == NULL) || (*str == NULL)) return(0);
534 ptr = *str;
535 cur = *ptr;
536 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
537 ptr += 3;
538 cur = *ptr;
539 while (cur != ';') { /* Non input consuming loop */
540 if ((cur >= '0') && (cur <= '9'))
541 val = val * 16 + (cur - '0');
542 else if ((cur >= 'a') && (cur <= 'f'))
543 val = val * 16 + (cur - 'a') + 10;
544 else if ((cur >= 'A') && (cur <= 'F'))
545 val = val * 16 + (cur - 'A') + 10;
546 else {
547 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
549 ctxt->sax->error(ctxt->userData,
550 "xmlParseStringCharRef: invalid hexadecimal value\n");
551 ctxt->wellFormed = 0;
552 ctxt->disableSAX = 1;
553 val = 0;
554 break;
555 }
556 ptr++;
557 cur = *ptr;
558 }
559 if (cur == ';')
560 ptr++;
561 } else if ((cur == '&') && (ptr[1] == '#')){
562 ptr += 2;
563 cur = *ptr;
564 while (cur != ';') { /* Non input consuming loops */
565 if ((cur >= '0') && (cur <= '9'))
566 val = val * 10 + (cur - '0');
567 else {
568 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
570 ctxt->sax->error(ctxt->userData,
571 "xmlParseStringCharRef: invalid decimal value\n");
572 ctxt->wellFormed = 0;
573 ctxt->disableSAX = 1;
574 val = 0;
575 break;
576 }
577 ptr++;
578 cur = *ptr;
579 }
580 if (cur == ';')
581 ptr++;
582 } else {
583 ctxt->errNo = XML_ERR_INVALID_CHARREF;
584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
585 ctxt->sax->error(ctxt->userData,
586 "xmlParseCharRef: invalid value\n");
587 ctxt->wellFormed = 0;
588 ctxt->disableSAX = 1;
589 return(0);
590 }
591 *str = ptr;
592
593 /*
594 * [ WFC: Legal Character ]
595 * Characters referred to using character references must match the
596 * production for Char.
597 */
598 if (IS_CHAR(val)) {
599 return(val);
600 } else {
601 ctxt->errNo = XML_ERR_INVALID_CHAR;
602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
603 ctxt->sax->error(ctxt->userData,
604 "CharRef: invalid xmlChar value %d\n", val);
605 ctxt->wellFormed = 0;
606 ctxt->disableSAX = 1;
607 }
608 return(0);
609}
610
611/**
612 * xmlParserHandlePEReference:
613 * @ctxt: the parser context
614 *
615 * [69] PEReference ::= '%' Name ';'
616 *
617 * [ WFC: No Recursion ]
618 * A parsed entity must not contain a recursive
619 * reference to itself, either directly or indirectly.
620 *
621 * [ WFC: Entity Declared ]
622 * In a document without any DTD, a document with only an internal DTD
623 * subset which contains no parameter entity references, or a document
624 * with "standalone='yes'", ... ... The declaration of a parameter
625 * entity must precede any reference to it...
626 *
627 * [ VC: Entity Declared ]
628 * In a document with an external subset or external parameter entities
629 * with "standalone='no'", ... ... The declaration of a parameter entity
630 * must precede any reference to it...
631 *
632 * [ WFC: In DTD ]
633 * Parameter-entity references may only appear in the DTD.
634 * NOTE: misleading but this is handled.
635 *
636 * A PEReference may have been detected in the current input stream
637 * the handling is done accordingly to
638 * http://www.w3.org/TR/REC-xml#entproc
639 * i.e.
640 * - Included in literal in entity values
641 * - Included as Paraemeter Entity reference within DTDs
642 */
643void
644xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
645 xmlChar *name;
646 xmlEntityPtr entity = NULL;
647 xmlParserInputPtr input;
648
649 if (ctxt->token != 0) {
650 return;
651 }
652 if (RAW != '%') return;
653 switch(ctxt->instate) {
654 case XML_PARSER_CDATA_SECTION:
655 return;
656 case XML_PARSER_COMMENT:
657 return;
658 case XML_PARSER_START_TAG:
659 return;
660 case XML_PARSER_END_TAG:
661 return;
662 case XML_PARSER_EOF:
663 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
665 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
666 ctxt->wellFormed = 0;
667 ctxt->disableSAX = 1;
668 return;
669 case XML_PARSER_PROLOG:
670 case XML_PARSER_START:
671 case XML_PARSER_MISC:
672 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
674 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
675 ctxt->wellFormed = 0;
676 ctxt->disableSAX = 1;
677 return;
678 case XML_PARSER_ENTITY_DECL:
679 case XML_PARSER_CONTENT:
680 case XML_PARSER_ATTRIBUTE_VALUE:
681 case XML_PARSER_PI:
682 case XML_PARSER_SYSTEM_LITERAL:
683 /* we just ignore it there */
684 return;
685 case XML_PARSER_EPILOG:
686 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
688 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
689 ctxt->wellFormed = 0;
690 ctxt->disableSAX = 1;
691 return;
692 case XML_PARSER_ENTITY_VALUE:
693 /*
694 * NOTE: in the case of entity values, we don't do the
695 * substitution here since we need the literal
696 * entity value to be able to save the internal
697 * subset of the document.
698 * This will be handled by xmlStringDecodeEntities
699 */
700 return;
701 case XML_PARSER_DTD:
702 /*
703 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
704 * In the internal DTD subset, parameter-entity references
705 * can occur only where markup declarations can occur, not
706 * within markup declarations.
707 * In that case this is handled in xmlParseMarkupDecl
708 */
709 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
710 return;
711 break;
712 case XML_PARSER_IGNORE:
713 return;
714 }
715
716 NEXT;
717 name = xmlParseName(ctxt);
718 if (xmlParserDebugEntities)
719 xmlGenericError(xmlGenericErrorContext,
720 "PE Reference: %s\n", name);
721 if (name == NULL) {
722 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
724 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
725 ctxt->wellFormed = 0;
726 ctxt->disableSAX = 1;
727 } else {
728 if (RAW == ';') {
729 NEXT;
730 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
731 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
732 if (entity == NULL) {
733
734 /*
735 * [ WFC: Entity Declared ]
736 * In a document without any DTD, a document with only an
737 * internal DTD subset which contains no parameter entity
738 * references, or a document with "standalone='yes'", ...
739 * ... The declaration of a parameter entity must precede
740 * any reference to it...
741 */
742 if ((ctxt->standalone == 1) ||
743 ((ctxt->hasExternalSubset == 0) &&
744 (ctxt->hasPErefs == 0))) {
745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
746 ctxt->sax->error(ctxt->userData,
747 "PEReference: %%%s; not found\n", name);
748 ctxt->wellFormed = 0;
749 ctxt->disableSAX = 1;
750 } else {
751 /*
752 * [ VC: Entity Declared ]
753 * In a document with an external subset or external
754 * parameter entities with "standalone='no'", ...
755 * ... The declaration of a parameter entity must precede
756 * any reference to it...
757 */
758 if ((!ctxt->disableSAX) &&
759 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
760 ctxt->vctxt.error(ctxt->vctxt.userData,
761 "PEReference: %%%s; not found\n", name);
762 } else if ((!ctxt->disableSAX) &&
763 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
764 ctxt->sax->warning(ctxt->userData,
765 "PEReference: %%%s; not found\n", name);
766 ctxt->valid = 0;
767 }
768 } else {
769 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
770 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
771 /*
772 * handle the extra spaces added before and after
773 * c.f. http://www.w3.org/TR/REC-xml#as-PE
774 * this is done independantly.
775 */
776 input = xmlNewEntityInputStream(ctxt, entity);
777 xmlPushInput(ctxt, input);
778 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
779 (RAW == '<') && (NXT(1) == '?') &&
780 (NXT(2) == 'x') && (NXT(3) == 'm') &&
781 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
782 xmlParseTextDecl(ctxt);
783 }
784 if (ctxt->token == 0)
785 ctxt->token = ' ';
786 } else {
787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
788 ctxt->sax->error(ctxt->userData,
789 "xmlHandlePEReference: %s is not a parameter entity\n",
790 name);
791 ctxt->wellFormed = 0;
792 ctxt->disableSAX = 1;
793 }
794 }
795 } else {
796 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
798 ctxt->sax->error(ctxt->userData,
799 "xmlHandlePEReference: expecting ';'\n");
800 ctxt->wellFormed = 0;
801 ctxt->disableSAX = 1;
802 }
803 xmlFree(name);
804 }
805}
806
807/*
808 * Macro used to grow the current buffer.
809 */
810#define growBuffer(buffer) { \
811 buffer##_size *= 2; \
812 buffer = (xmlChar *) \
813 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
814 if (buffer == NULL) { \
815 perror("realloc failed"); \
816 return(NULL); \
817 } \
818}
819
820/**
821 * xmlStringDecodeEntities:
822 * @ctxt: the parser context
823 * @str: the input string
824 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
825 * @end: an end marker xmlChar, 0 if none
826 * @end2: an end marker xmlChar, 0 if none
827 * @end3: an end marker xmlChar, 0 if none
828 *
829 * Takes a entity string content and process to do the adequate subtitutions.
830 *
831 * [67] Reference ::= EntityRef | CharRef
832 *
833 * [69] PEReference ::= '%' Name ';'
834 *
835 * Returns A newly allocated string with the substitution done. The caller
836 * must deallocate it !
837 */
838xmlChar *
839xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
840 xmlChar end, xmlChar end2, xmlChar end3) {
841 xmlChar *buffer = NULL;
842 int buffer_size = 0;
843
844 xmlChar *current = NULL;
845 xmlEntityPtr ent;
846 int c,l;
847 int nbchars = 0;
848
849 if (str == NULL)
850 return(NULL);
851
852 if (ctxt->depth > 40) {
853 ctxt->errNo = XML_ERR_ENTITY_LOOP;
854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
855 ctxt->sax->error(ctxt->userData,
856 "Detected entity reference loop\n");
857 ctxt->wellFormed = 0;
858 ctxt->disableSAX = 1;
859 return(NULL);
860 }
861
862 /*
863 * allocate a translation buffer.
864 */
865 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
866 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
867 if (buffer == NULL) {
868 perror("xmlDecodeEntities: malloc failed");
869 return(NULL);
870 }
871
872 /*
873 * Ok loop until we reach one of the ending char or a size limit.
874 * we are operating on already parsed values.
875 */
876 c = CUR_SCHAR(str, l);
877 while ((c != 0) && (c != end) && /* non input consuming loop */
878 (c != end2) && (c != end3)) {
879
880 if (c == 0) break;
881 if ((c == '&') && (str[1] == '#')) {
882 int val = xmlParseStringCharRef(ctxt, &str);
883 if (val != 0) {
884 COPY_BUF(0,buffer,nbchars,val);
885 }
886 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
887 if (xmlParserDebugEntities)
888 xmlGenericError(xmlGenericErrorContext,
889 "String decoding Entity Reference: %.30s\n",
890 str);
891 ent = xmlParseStringEntityRef(ctxt, &str);
892 if ((ent != NULL) &&
893 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
894 if (ent->content != NULL) {
895 COPY_BUF(0,buffer,nbchars,ent->content[0]);
896 } else {
897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
898 ctxt->sax->error(ctxt->userData,
899 "internal error entity has no content\n");
900 }
901 } else if ((ent != NULL) && (ent->content != NULL)) {
902 xmlChar *rep;
903
904 ctxt->depth++;
905 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
906 0, 0, 0);
907 ctxt->depth--;
908 if (rep != NULL) {
909 current = rep;
910 while (*current != 0) { /* non input consuming loop */
911 buffer[nbchars++] = *current++;
912 if (nbchars >
913 buffer_size - XML_PARSER_BUFFER_SIZE) {
914 growBuffer(buffer);
915 }
916 }
917 xmlFree(rep);
918 }
919 } else if (ent != NULL) {
920 int i = xmlStrlen(ent->name);
921 const xmlChar *cur = ent->name;
922
923 buffer[nbchars++] = '&';
924 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
925 growBuffer(buffer);
926 }
927 for (;i > 0;i--)
928 buffer[nbchars++] = *cur++;
929 buffer[nbchars++] = ';';
930 }
931 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
932 if (xmlParserDebugEntities)
933 xmlGenericError(xmlGenericErrorContext,
934 "String decoding PE Reference: %.30s\n", str);
935 ent = xmlParseStringPEReference(ctxt, &str);
936 if (ent != NULL) {
937 xmlChar *rep;
938
939 ctxt->depth++;
940 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
941 0, 0, 0);
942 ctxt->depth--;
943 if (rep != NULL) {
944 current = rep;
945 while (*current != 0) { /* non input consuming loop */
946 buffer[nbchars++] = *current++;
947 if (nbchars >
948 buffer_size - XML_PARSER_BUFFER_SIZE) {
949 growBuffer(buffer);
950 }
951 }
952 xmlFree(rep);
953 }
954 }
955 } else {
956 COPY_BUF(l,buffer,nbchars,c);
957 str += l;
958 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
959 growBuffer(buffer);
960 }
961 }
962 c = CUR_SCHAR(str, l);
963 }
964 buffer[nbchars++] = 0;
965 return(buffer);
966}
967
968
969/************************************************************************
970 * *
971 * Commodity functions to handle xmlChars *
972 * *
973 ************************************************************************/
974
975/**
976 * xmlStrndup:
977 * @cur: the input xmlChar *
978 * @len: the len of @cur
979 *
980 * a strndup for array of xmlChar's
981 *
982 * Returns a new xmlChar * or NULL
983 */
984xmlChar *
985xmlStrndup(const xmlChar *cur, int len) {
986 xmlChar *ret;
987
988 if ((cur == NULL) || (len < 0)) return(NULL);
989 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
990 if (ret == NULL) {
991 xmlGenericError(xmlGenericErrorContext,
992 "malloc of %ld byte failed\n",
993 (len + 1) * (long)sizeof(xmlChar));
994 return(NULL);
995 }
996 memcpy(ret, cur, len * sizeof(xmlChar));
997 ret[len] = 0;
998 return(ret);
999}
1000
1001/**
1002 * xmlStrdup:
1003 * @cur: the input xmlChar *
1004 *
1005 * a strdup for array of xmlChar's. Since they are supposed to be
1006 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1007 * a termination mark of '0'.
1008 *
1009 * Returns a new xmlChar * or NULL
1010 */
1011xmlChar *
1012xmlStrdup(const xmlChar *cur) {
1013 const xmlChar *p = cur;
1014
1015 if (cur == NULL) return(NULL);
1016 while (*p != 0) p++; /* non input consuming */
1017 return(xmlStrndup(cur, p - cur));
1018}
1019
1020/**
1021 * xmlCharStrndup:
1022 * @cur: the input char *
1023 * @len: the len of @cur
1024 *
1025 * a strndup for char's to xmlChar's
1026 *
1027 * Returns a new xmlChar * or NULL
1028 */
1029
1030xmlChar *
1031xmlCharStrndup(const char *cur, int len) {
1032 int i;
1033 xmlChar *ret;
1034
1035 if ((cur == NULL) || (len < 0)) return(NULL);
1036 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1037 if (ret == NULL) {
1038 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1039 (len + 1) * (long)sizeof(xmlChar));
1040 return(NULL);
1041 }
1042 for (i = 0;i < len;i++)
1043 ret[i] = (xmlChar) cur[i];
1044 ret[len] = 0;
1045 return(ret);
1046}
1047
1048/**
1049 * xmlCharStrdup:
1050 * @cur: the input char *
1051 * @len: the len of @cur
1052 *
1053 * a strdup for char's to xmlChar's
1054 *
1055 * Returns a new xmlChar * or NULL
1056 */
1057
1058xmlChar *
1059xmlCharStrdup(const char *cur) {
1060 const char *p = cur;
1061
1062 if (cur == NULL) return(NULL);
1063 while (*p != '\0') p++; /* non input consuming */
1064 return(xmlCharStrndup(cur, p - cur));
1065}
1066
1067/**
1068 * xmlStrcmp:
1069 * @str1: the first xmlChar *
1070 * @str2: the second xmlChar *
1071 *
1072 * a strcmp for xmlChar's
1073 *
1074 * Returns the integer result of the comparison
1075 */
1076
1077int
1078xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1079 register int tmp;
1080
1081 if (str1 == str2) return(0);
1082 if (str1 == NULL) return(-1);
1083 if (str2 == NULL) return(1);
1084 do {
1085 tmp = *str1++ - *str2;
1086 if (tmp != 0) return(tmp);
1087 } while (*str2++ != 0);
1088 return 0;
1089}
1090
1091/**
1092 * xmlStrEqual:
1093 * @str1: the first xmlChar *
1094 * @str2: the second xmlChar *
1095 *
1096 * Check if both string are equal of have same content
1097 * Should be a bit more readable and faster than xmlStrEqual()
1098 *
1099 * Returns 1 if they are equal, 0 if they are different
1100 */
1101
1102int
1103xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1104 if (str1 == str2) return(1);
1105 if (str1 == NULL) return(0);
1106 if (str2 == NULL) return(0);
1107 do {
1108 if (*str1++ != *str2) return(0);
1109 } while (*str2++);
1110 return(1);
1111}
1112
1113/**
1114 * xmlStrncmp:
1115 * @str1: the first xmlChar *
1116 * @str2: the second xmlChar *
1117 * @len: the max comparison length
1118 *
1119 * a strncmp for xmlChar's
1120 *
1121 * Returns the integer result of the comparison
1122 */
1123
1124int
1125xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1126 register int tmp;
1127
1128 if (len <= 0) return(0);
1129 if (str1 == str2) return(0);
1130 if (str1 == NULL) return(-1);
1131 if (str2 == NULL) return(1);
1132 do {
1133 tmp = *str1++ - *str2;
1134 if (tmp != 0 || --len == 0) return(tmp);
1135 } while (*str2++ != 0);
1136 return 0;
1137}
1138
1139static xmlChar casemap[256] = {
1140 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1141 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1142 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1143 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1144 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1145 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1146 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1147 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1148 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1149 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1150 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1151 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1152 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1153 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1154 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1155 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1156 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1157 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1158 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1159 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1160 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1161 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1162 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1163 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1164 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1165 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1166 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1167 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1168 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1169 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1170 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1171 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1172};
1173
1174/**
1175 * xmlStrcasecmp:
1176 * @str1: the first xmlChar *
1177 * @str2: the second xmlChar *
1178 *
1179 * a strcasecmp for xmlChar's
1180 *
1181 * Returns the integer result of the comparison
1182 */
1183
1184int
1185xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1186 register int tmp;
1187
1188 if (str1 == str2) return(0);
1189 if (str1 == NULL) return(-1);
1190 if (str2 == NULL) return(1);
1191 do {
1192 tmp = casemap[*str1++] - casemap[*str2];
1193 if (tmp != 0) return(tmp);
1194 } while (*str2++ != 0);
1195 return 0;
1196}
1197
1198/**
1199 * xmlStrncasecmp:
1200 * @str1: the first xmlChar *
1201 * @str2: the second xmlChar *
1202 * @len: the max comparison length
1203 *
1204 * a strncasecmp for xmlChar's
1205 *
1206 * Returns the integer result of the comparison
1207 */
1208
1209int
1210xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1211 register int tmp;
1212
1213 if (len <= 0) return(0);
1214 if (str1 == str2) return(0);
1215 if (str1 == NULL) return(-1);
1216 if (str2 == NULL) return(1);
1217 do {
1218 tmp = casemap[*str1++] - casemap[*str2];
1219 if (tmp != 0 || --len == 0) return(tmp);
1220 } while (*str2++ != 0);
1221 return 0;
1222}
1223
1224/**
1225 * xmlStrchr:
1226 * @str: the xmlChar * array
1227 * @val: the xmlChar to search
1228 *
1229 * a strchr for xmlChar's
1230 *
1231 * Returns the xmlChar * for the first occurence or NULL.
1232 */
1233
1234const xmlChar *
1235xmlStrchr(const xmlChar *str, xmlChar val) {
1236 if (str == NULL) return(NULL);
1237 while (*str != 0) { /* non input consuming */
1238 if (*str == val) return((xmlChar *) str);
1239 str++;
1240 }
1241 return(NULL);
1242}
1243
1244/**
1245 * xmlStrstr:
1246 * @str: the xmlChar * array (haystack)
1247 * @val: the xmlChar to search (needle)
1248 *
1249 * a strstr for xmlChar's
1250 *
1251 * Returns the xmlChar * for the first occurence or NULL.
1252 */
1253
1254const xmlChar *
1255xmlStrstr(const xmlChar *str, xmlChar *val) {
1256 int n;
1257
1258 if (str == NULL) return(NULL);
1259 if (val == NULL) return(NULL);
1260 n = xmlStrlen(val);
1261
1262 if (n == 0) return(str);
1263 while (*str != 0) { /* non input consuming */
1264 if (*str == *val) {
1265 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1266 }
1267 str++;
1268 }
1269 return(NULL);
1270}
1271
1272/**
1273 * xmlStrcasestr:
1274 * @str: the xmlChar * array (haystack)
1275 * @val: the xmlChar to search (needle)
1276 *
1277 * a case-ignoring strstr for xmlChar's
1278 *
1279 * Returns the xmlChar * for the first occurence or NULL.
1280 */
1281
1282const xmlChar *
1283xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1284 int n;
1285
1286 if (str == NULL) return(NULL);
1287 if (val == NULL) return(NULL);
1288 n = xmlStrlen(val);
1289
1290 if (n == 0) return(str);
1291 while (*str != 0) { /* non input consuming */
1292 if (casemap[*str] == casemap[*val])
1293 if (!xmlStrncasecmp(str, val, n)) return(str);
1294 str++;
1295 }
1296 return(NULL);
1297}
1298
1299/**
1300 * xmlStrsub:
1301 * @str: the xmlChar * array (haystack)
1302 * @start: the index of the first char (zero based)
1303 * @len: the length of the substring
1304 *
1305 * Extract a substring of a given string
1306 *
1307 * Returns the xmlChar * for the first occurence or NULL.
1308 */
1309
1310xmlChar *
1311xmlStrsub(const xmlChar *str, int start, int len) {
1312 int i;
1313
1314 if (str == NULL) return(NULL);
1315 if (start < 0) return(NULL);
1316 if (len < 0) return(NULL);
1317
1318 for (i = 0;i < start;i++) {
1319 if (*str == 0) return(NULL);
1320 str++;
1321 }
1322 if (*str == 0) return(NULL);
1323 return(xmlStrndup(str, len));
1324}
1325
1326/**
1327 * xmlStrlen:
1328 * @str: the xmlChar * array
1329 *
1330 * length of a xmlChar's string
1331 *
1332 * Returns the number of xmlChar contained in the ARRAY.
1333 */
1334
1335int
1336xmlStrlen(const xmlChar *str) {
1337 int len = 0;
1338
1339 if (str == NULL) return(0);
1340 while (*str != 0) { /* non input consuming */
1341 str++;
1342 len++;
1343 }
1344 return(len);
1345}
1346
1347/**
1348 * xmlStrncat:
1349 * @cur: the original xmlChar * array
1350 * @add: the xmlChar * array added
1351 * @len: the length of @add
1352 *
1353 * a strncat for array of xmlChar's, it will extend cur with the len
1354 * first bytes of @add.
1355 *
1356 * Returns a new xmlChar *, the original @cur is reallocated if needed
1357 * and should not be freed
1358 */
1359
1360xmlChar *
1361xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1362 int size;
1363 xmlChar *ret;
1364
1365 if ((add == NULL) || (len == 0))
1366 return(cur);
1367 if (cur == NULL)
1368 return(xmlStrndup(add, len));
1369
1370 size = xmlStrlen(cur);
1371 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1372 if (ret == NULL) {
1373 xmlGenericError(xmlGenericErrorContext,
1374 "xmlStrncat: realloc of %ld byte failed\n",
1375 (size + len + 1) * (long)sizeof(xmlChar));
1376 return(cur);
1377 }
1378 memcpy(&ret[size], add, len * sizeof(xmlChar));
1379 ret[size + len] = 0;
1380 return(ret);
1381}
1382
1383/**
1384 * xmlStrcat:
1385 * @cur: the original xmlChar * array
1386 * @add: the xmlChar * array added
1387 *
1388 * a strcat for array of xmlChar's. Since they are supposed to be
1389 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1390 * a termination mark of '0'.
1391 *
1392 * Returns a new xmlChar * containing the concatenated string.
1393 */
1394xmlChar *
1395xmlStrcat(xmlChar *cur, const xmlChar *add) {
1396 const xmlChar *p = add;
1397
1398 if (add == NULL) return(cur);
1399 if (cur == NULL)
1400 return(xmlStrdup(add));
1401
1402 while (*p != 0) p++; /* non input consuming */
1403 return(xmlStrncat(cur, add, p - add));
1404}
1405
1406/************************************************************************
1407 * *
1408 * Commodity functions, cleanup needed ? *
1409 * *
1410 ************************************************************************/
1411
1412/**
1413 * areBlanks:
1414 * @ctxt: an XML parser context
1415 * @str: a xmlChar *
1416 * @len: the size of @str
1417 *
1418 * Is this a sequence of blank chars that one can ignore ?
1419 *
1420 * Returns 1 if ignorable 0 otherwise.
1421 */
1422
1423static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1424 int i, ret;
1425 xmlNodePtr lastChild;
1426
Daniel Veillard2f362242001-03-02 17:36:21 +00001427 if (ctxt->keepBlanks)
1428 return(0);
1429
Owen Taylor3473f882001-02-23 17:55:21 +00001430 /*
1431 * Check for xml:space value.
1432 */
1433 if (*(ctxt->space) == 1)
1434 return(0);
1435
1436 /*
1437 * Check that the string is made of blanks
1438 */
1439 for (i = 0;i < len;i++)
1440 if (!(IS_BLANK(str[i]))) return(0);
1441
1442 /*
1443 * Look if the element is mixed content in the Dtd if available
1444 */
1445 if (ctxt->myDoc != NULL) {
1446 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1447 if (ret == 0) return(1);
1448 if (ret == 1) return(0);
1449 }
1450
1451 /*
1452 * Otherwise, heuristic :-\
1453 */
Owen Taylor3473f882001-02-23 17:55:21 +00001454 if (RAW != '<') return(0);
1455 if (ctxt->node == NULL) return(0);
1456 if ((ctxt->node->children == NULL) &&
1457 (RAW == '<') && (NXT(1) == '/')) return(0);
1458
1459 lastChild = xmlGetLastChild(ctxt->node);
1460 if (lastChild == NULL) {
1461 if (ctxt->node->content != NULL) return(0);
1462 } else if (xmlNodeIsText(lastChild))
1463 return(0);
1464 else if ((ctxt->node->children != NULL) &&
1465 (xmlNodeIsText(ctxt->node->children)))
1466 return(0);
1467 return(1);
1468}
1469
1470/*
1471 * Forward definition for recusive behaviour.
1472 */
1473void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1474void xmlParseReference(xmlParserCtxtPtr ctxt);
1475
1476/************************************************************************
1477 * *
1478 * Extra stuff for namespace support *
1479 * Relates to http://www.w3.org/TR/WD-xml-names *
1480 * *
1481 ************************************************************************/
1482
1483/**
1484 * xmlSplitQName:
1485 * @ctxt: an XML parser context
1486 * @name: an XML parser context
1487 * @prefix: a xmlChar **
1488 *
1489 * parse an UTF8 encoded XML qualified name string
1490 *
1491 * [NS 5] QName ::= (Prefix ':')? LocalPart
1492 *
1493 * [NS 6] Prefix ::= NCName
1494 *
1495 * [NS 7] LocalPart ::= NCName
1496 *
1497 * Returns the local part, and prefix is updated
1498 * to get the Prefix if any.
1499 */
1500
1501xmlChar *
1502xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1503 xmlChar buf[XML_MAX_NAMELEN + 5];
1504 xmlChar *buffer = NULL;
1505 int len = 0;
1506 int max = XML_MAX_NAMELEN;
1507 xmlChar *ret = NULL;
1508 const xmlChar *cur = name;
1509 int c;
1510
1511 *prefix = NULL;
1512
1513#ifndef XML_XML_NAMESPACE
1514 /* xml: prefix is not really a namespace */
1515 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1516 (cur[2] == 'l') && (cur[3] == ':'))
1517 return(xmlStrdup(name));
1518#endif
1519
1520 /* nasty but valid */
1521 if (cur[0] == ':')
1522 return(xmlStrdup(name));
1523
1524 c = *cur++;
1525 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1526 buf[len++] = c;
1527 c = *cur++;
1528 }
1529 if (len >= max) {
1530 /*
1531 * Okay someone managed to make a huge name, so he's ready to pay
1532 * for the processing speed.
1533 */
1534 max = len * 2;
1535
1536 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1537 if (buffer == NULL) {
1538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1539 ctxt->sax->error(ctxt->userData,
1540 "xmlSplitQName: out of memory\n");
1541 return(NULL);
1542 }
1543 memcpy(buffer, buf, len);
1544 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1545 if (len + 10 > max) {
1546 max *= 2;
1547 buffer = (xmlChar *) xmlRealloc(buffer,
1548 max * sizeof(xmlChar));
1549 if (buffer == NULL) {
1550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1551 ctxt->sax->error(ctxt->userData,
1552 "xmlSplitQName: out of memory\n");
1553 return(NULL);
1554 }
1555 }
1556 buffer[len++] = c;
1557 c = *cur++;
1558 }
1559 buffer[len] = 0;
1560 }
1561
1562 if (buffer == NULL)
1563 ret = xmlStrndup(buf, len);
1564 else {
1565 ret = buffer;
1566 buffer = NULL;
1567 max = XML_MAX_NAMELEN;
1568 }
1569
1570
1571 if (c == ':') {
1572 c = *cur++;
1573 if (c == 0) return(ret);
1574 *prefix = ret;
1575 len = 0;
1576
1577 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1578 buf[len++] = c;
1579 c = *cur++;
1580 }
1581 if (len >= max) {
1582 /*
1583 * Okay someone managed to make a huge name, so he's ready to pay
1584 * for the processing speed.
1585 */
1586 max = len * 2;
1587
1588 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1589 if (buffer == NULL) {
1590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1591 ctxt->sax->error(ctxt->userData,
1592 "xmlSplitQName: out of memory\n");
1593 return(NULL);
1594 }
1595 memcpy(buffer, buf, len);
1596 while (c != 0) { /* tested bigname2.xml */
1597 if (len + 10 > max) {
1598 max *= 2;
1599 buffer = (xmlChar *) xmlRealloc(buffer,
1600 max * sizeof(xmlChar));
1601 if (buffer == NULL) {
1602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1603 ctxt->sax->error(ctxt->userData,
1604 "xmlSplitQName: out of memory\n");
1605 return(NULL);
1606 }
1607 }
1608 buffer[len++] = c;
1609 c = *cur++;
1610 }
1611 buffer[len] = 0;
1612 }
1613
1614 if (buffer == NULL)
1615 ret = xmlStrndup(buf, len);
1616 else {
1617 ret = buffer;
1618 }
1619 }
1620
1621 return(ret);
1622}
1623
1624/************************************************************************
1625 * *
1626 * The parser itself *
1627 * Relates to http://www.w3.org/TR/REC-xml *
1628 * *
1629 ************************************************************************/
1630
Daniel Veillard76d66f42001-05-16 21:05:17 +00001631static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001632/**
1633 * xmlParseName:
1634 * @ctxt: an XML parser context
1635 *
1636 * parse an XML name.
1637 *
1638 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1639 * CombiningChar | Extender
1640 *
1641 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1642 *
1643 * [6] Names ::= Name (S Name)*
1644 *
1645 * Returns the Name parsed or NULL
1646 */
1647
1648xmlChar *
1649xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001650 const xmlChar *in;
1651 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001652 int count = 0;
1653
1654 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001655
1656 /*
1657 * Accelerator for simple ASCII names
1658 */
1659 in = ctxt->input->cur;
1660 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1661 ((*in >= 0x41) && (*in <= 0x5A)) ||
1662 (*in == '_') || (*in == ':')) {
1663 in++;
1664 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1665 ((*in >= 0x41) && (*in <= 0x5A)) ||
1666 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001667 (*in == '_') || (*in == '-') ||
1668 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001669 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001670 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001671 count = in - ctxt->input->cur;
1672 ret = xmlStrndup(ctxt->input->cur, count);
1673 ctxt->input->cur = in;
1674 return(ret);
1675 }
1676 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001677 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001678}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001679
Daniel Veillard76d66f42001-05-16 21:05:17 +00001680static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001681xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1682 xmlChar buf[XML_MAX_NAMELEN + 5];
1683 int len = 0, l;
1684 int c;
1685 int count = 0;
1686
1687 /*
1688 * Handler for more complex cases
1689 */
1690 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001691 c = CUR_CHAR(l);
1692 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1693 (!IS_LETTER(c) && (c != '_') &&
1694 (c != ':'))) {
1695 return(NULL);
1696 }
1697
1698 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1699 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1700 (c == '.') || (c == '-') ||
1701 (c == '_') || (c == ':') ||
1702 (IS_COMBINING(c)) ||
1703 (IS_EXTENDER(c)))) {
1704 if (count++ > 100) {
1705 count = 0;
1706 GROW;
1707 }
1708 COPY_BUF(l,buf,len,c);
1709 NEXTL(l);
1710 c = CUR_CHAR(l);
1711 if (len >= XML_MAX_NAMELEN) {
1712 /*
1713 * Okay someone managed to make a huge name, so he's ready to pay
1714 * for the processing speed.
1715 */
1716 xmlChar *buffer;
1717 int max = len * 2;
1718
1719 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1720 if (buffer == NULL) {
1721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1722 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001723 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001724 return(NULL);
1725 }
1726 memcpy(buffer, buf, len);
1727 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1728 (c == '.') || (c == '-') ||
1729 (c == '_') || (c == ':') ||
1730 (IS_COMBINING(c)) ||
1731 (IS_EXTENDER(c))) {
1732 if (count++ > 100) {
1733 count = 0;
1734 GROW;
1735 }
1736 if (len + 10 > max) {
1737 max *= 2;
1738 buffer = (xmlChar *) xmlRealloc(buffer,
1739 max * sizeof(xmlChar));
1740 if (buffer == NULL) {
1741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1742 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001743 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001744 return(NULL);
1745 }
1746 }
1747 COPY_BUF(l,buffer,len,c);
1748 NEXTL(l);
1749 c = CUR_CHAR(l);
1750 }
1751 buffer[len] = 0;
1752 return(buffer);
1753 }
1754 }
1755 return(xmlStrndup(buf, len));
1756}
1757
1758/**
1759 * xmlParseStringName:
1760 * @ctxt: an XML parser context
1761 * @str: a pointer to the string pointer (IN/OUT)
1762 *
1763 * parse an XML name.
1764 *
1765 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1766 * CombiningChar | Extender
1767 *
1768 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1769 *
1770 * [6] Names ::= Name (S Name)*
1771 *
1772 * Returns the Name parsed or NULL. The str pointer
1773 * is updated to the current location in the string.
1774 */
1775
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001776static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001777xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1778 xmlChar buf[XML_MAX_NAMELEN + 5];
1779 const xmlChar *cur = *str;
1780 int len = 0, l;
1781 int c;
1782
1783 c = CUR_SCHAR(cur, l);
1784 if (!IS_LETTER(c) && (c != '_') &&
1785 (c != ':')) {
1786 return(NULL);
1787 }
1788
1789 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1790 (c == '.') || (c == '-') ||
1791 (c == '_') || (c == ':') ||
1792 (IS_COMBINING(c)) ||
1793 (IS_EXTENDER(c))) {
1794 COPY_BUF(l,buf,len,c);
1795 cur += l;
1796 c = CUR_SCHAR(cur, l);
1797 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1798 /*
1799 * Okay someone managed to make a huge name, so he's ready to pay
1800 * for the processing speed.
1801 */
1802 xmlChar *buffer;
1803 int max = len * 2;
1804
1805 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1806 if (buffer == NULL) {
1807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1808 ctxt->sax->error(ctxt->userData,
1809 "xmlParseStringName: out of memory\n");
1810 return(NULL);
1811 }
1812 memcpy(buffer, buf, len);
1813 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1814 (c == '.') || (c == '-') ||
1815 (c == '_') || (c == ':') ||
1816 (IS_COMBINING(c)) ||
1817 (IS_EXTENDER(c))) {
1818 if (len + 10 > max) {
1819 max *= 2;
1820 buffer = (xmlChar *) xmlRealloc(buffer,
1821 max * sizeof(xmlChar));
1822 if (buffer == NULL) {
1823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1824 ctxt->sax->error(ctxt->userData,
1825 "xmlParseStringName: out of memory\n");
1826 return(NULL);
1827 }
1828 }
1829 COPY_BUF(l,buffer,len,c);
1830 cur += l;
1831 c = CUR_SCHAR(cur, l);
1832 }
1833 buffer[len] = 0;
1834 *str = cur;
1835 return(buffer);
1836 }
1837 }
1838 *str = cur;
1839 return(xmlStrndup(buf, len));
1840}
1841
1842/**
1843 * xmlParseNmtoken:
1844 * @ctxt: an XML parser context
1845 *
1846 * parse an XML Nmtoken.
1847 *
1848 * [7] Nmtoken ::= (NameChar)+
1849 *
1850 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1851 *
1852 * Returns the Nmtoken parsed or NULL
1853 */
1854
1855xmlChar *
1856xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1857 xmlChar buf[XML_MAX_NAMELEN + 5];
1858 int len = 0, l;
1859 int c;
1860 int count = 0;
1861
1862 GROW;
1863 c = CUR_CHAR(l);
1864
1865 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1866 (c == '.') || (c == '-') ||
1867 (c == '_') || (c == ':') ||
1868 (IS_COMBINING(c)) ||
1869 (IS_EXTENDER(c))) {
1870 if (count++ > 100) {
1871 count = 0;
1872 GROW;
1873 }
1874 COPY_BUF(l,buf,len,c);
1875 NEXTL(l);
1876 c = CUR_CHAR(l);
1877 if (len >= XML_MAX_NAMELEN) {
1878 /*
1879 * Okay someone managed to make a huge token, so he's ready to pay
1880 * for the processing speed.
1881 */
1882 xmlChar *buffer;
1883 int max = len * 2;
1884
1885 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1886 if (buffer == NULL) {
1887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1888 ctxt->sax->error(ctxt->userData,
1889 "xmlParseNmtoken: out of memory\n");
1890 return(NULL);
1891 }
1892 memcpy(buffer, buf, len);
1893 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1894 (c == '.') || (c == '-') ||
1895 (c == '_') || (c == ':') ||
1896 (IS_COMBINING(c)) ||
1897 (IS_EXTENDER(c))) {
1898 if (count++ > 100) {
1899 count = 0;
1900 GROW;
1901 }
1902 if (len + 10 > max) {
1903 max *= 2;
1904 buffer = (xmlChar *) xmlRealloc(buffer,
1905 max * sizeof(xmlChar));
1906 if (buffer == NULL) {
1907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1908 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001909 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001910 return(NULL);
1911 }
1912 }
1913 COPY_BUF(l,buffer,len,c);
1914 NEXTL(l);
1915 c = CUR_CHAR(l);
1916 }
1917 buffer[len] = 0;
1918 return(buffer);
1919 }
1920 }
1921 if (len == 0)
1922 return(NULL);
1923 return(xmlStrndup(buf, len));
1924}
1925
1926/**
1927 * xmlParseEntityValue:
1928 * @ctxt: an XML parser context
1929 * @orig: if non-NULL store a copy of the original entity value
1930 *
1931 * parse a value for ENTITY declarations
1932 *
1933 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1934 * "'" ([^%&'] | PEReference | Reference)* "'"
1935 *
1936 * Returns the EntityValue parsed with reference substitued or NULL
1937 */
1938
1939xmlChar *
1940xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1941 xmlChar *buf = NULL;
1942 int len = 0;
1943 int size = XML_PARSER_BUFFER_SIZE;
1944 int c, l;
1945 xmlChar stop;
1946 xmlChar *ret = NULL;
1947 const xmlChar *cur = NULL;
1948 xmlParserInputPtr input;
1949
1950 if (RAW == '"') stop = '"';
1951 else if (RAW == '\'') stop = '\'';
1952 else {
1953 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1955 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1956 ctxt->wellFormed = 0;
1957 ctxt->disableSAX = 1;
1958 return(NULL);
1959 }
1960 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1961 if (buf == NULL) {
1962 xmlGenericError(xmlGenericErrorContext,
1963 "malloc of %d byte failed\n", size);
1964 return(NULL);
1965 }
1966
1967 /*
1968 * The content of the entity definition is copied in a buffer.
1969 */
1970
1971 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1972 input = ctxt->input;
1973 GROW;
1974 NEXT;
1975 c = CUR_CHAR(l);
1976 /*
1977 * NOTE: 4.4.5 Included in Literal
1978 * When a parameter entity reference appears in a literal entity
1979 * value, ... a single or double quote character in the replacement
1980 * text is always treated as a normal data character and will not
1981 * terminate the literal.
1982 * In practice it means we stop the loop only when back at parsing
1983 * the initial entity and the quote is found
1984 */
1985 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1986 (ctxt->input != input))) {
1987 if (len + 5 >= size) {
1988 size *= 2;
1989 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1990 if (buf == NULL) {
1991 xmlGenericError(xmlGenericErrorContext,
1992 "realloc of %d byte failed\n", size);
1993 return(NULL);
1994 }
1995 }
1996 COPY_BUF(l,buf,len,c);
1997 NEXTL(l);
1998 /*
1999 * Pop-up of finished entities.
2000 */
2001 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2002 xmlPopInput(ctxt);
2003
2004 GROW;
2005 c = CUR_CHAR(l);
2006 if (c == 0) {
2007 GROW;
2008 c = CUR_CHAR(l);
2009 }
2010 }
2011 buf[len] = 0;
2012
2013 /*
2014 * Raise problem w.r.t. '&' and '%' being used in non-entities
2015 * reference constructs. Note Charref will be handled in
2016 * xmlStringDecodeEntities()
2017 */
2018 cur = buf;
2019 while (*cur != 0) { /* non input consuming */
2020 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2021 xmlChar *name;
2022 xmlChar tmp = *cur;
2023
2024 cur++;
2025 name = xmlParseStringName(ctxt, &cur);
2026 if ((name == NULL) || (*cur != ';')) {
2027 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2029 ctxt->sax->error(ctxt->userData,
2030 "EntityValue: '%c' forbidden except for entities references\n",
2031 tmp);
2032 ctxt->wellFormed = 0;
2033 ctxt->disableSAX = 1;
2034 }
2035 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2036 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2038 ctxt->sax->error(ctxt->userData,
2039 "EntityValue: PEReferences forbidden in internal subset\n",
2040 tmp);
2041 ctxt->wellFormed = 0;
2042 ctxt->disableSAX = 1;
2043 }
2044 if (name != NULL)
2045 xmlFree(name);
2046 }
2047 cur++;
2048 }
2049
2050 /*
2051 * Then PEReference entities are substituted.
2052 */
2053 if (c != stop) {
2054 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2056 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2057 ctxt->wellFormed = 0;
2058 ctxt->disableSAX = 1;
2059 xmlFree(buf);
2060 } else {
2061 NEXT;
2062 /*
2063 * NOTE: 4.4.7 Bypassed
2064 * When a general entity reference appears in the EntityValue in
2065 * an entity declaration, it is bypassed and left as is.
2066 * so XML_SUBSTITUTE_REF is not set here.
2067 */
2068 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2069 0, 0, 0);
2070 if (orig != NULL)
2071 *orig = buf;
2072 else
2073 xmlFree(buf);
2074 }
2075
2076 return(ret);
2077}
2078
2079/**
2080 * xmlParseAttValue:
2081 * @ctxt: an XML parser context
2082 *
2083 * parse a value for an attribute
2084 * Note: the parser won't do substitution of entities here, this
2085 * will be handled later in xmlStringGetNodeList
2086 *
2087 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2088 * "'" ([^<&'] | Reference)* "'"
2089 *
2090 * 3.3.3 Attribute-Value Normalization:
2091 * Before the value of an attribute is passed to the application or
2092 * checked for validity, the XML processor must normalize it as follows:
2093 * - a character reference is processed by appending the referenced
2094 * character to the attribute value
2095 * - an entity reference is processed by recursively processing the
2096 * replacement text of the entity
2097 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2098 * appending #x20 to the normalized value, except that only a single
2099 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2100 * parsed entity or the literal entity value of an internal parsed entity
2101 * - other characters are processed by appending them to the normalized value
2102 * If the declared value is not CDATA, then the XML processor must further
2103 * process the normalized attribute value by discarding any leading and
2104 * trailing space (#x20) characters, and by replacing sequences of space
2105 * (#x20) characters by a single space (#x20) character.
2106 * All attributes for which no declaration has been read should be treated
2107 * by a non-validating parser as if declared CDATA.
2108 *
2109 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2110 */
2111
2112xmlChar *
2113xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2114 xmlChar limit = 0;
2115 xmlChar *buf = NULL;
2116 int len = 0;
2117 int buf_size = 0;
2118 int c, l;
2119 xmlChar *current = NULL;
2120 xmlEntityPtr ent;
2121
2122
2123 SHRINK;
2124 if (NXT(0) == '"') {
2125 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2126 limit = '"';
2127 NEXT;
2128 } else if (NXT(0) == '\'') {
2129 limit = '\'';
2130 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2131 NEXT;
2132 } else {
2133 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2135 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2136 ctxt->wellFormed = 0;
2137 ctxt->disableSAX = 1;
2138 return(NULL);
2139 }
2140
2141 /*
2142 * allocate a translation buffer.
2143 */
2144 buf_size = XML_PARSER_BUFFER_SIZE;
2145 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2146 if (buf == NULL) {
2147 perror("xmlParseAttValue: malloc failed");
2148 return(NULL);
2149 }
2150
2151 /*
2152 * Ok loop until we reach one of the ending char or a size limit.
2153 */
2154 c = CUR_CHAR(l);
2155 while (((NXT(0) != limit) && /* checked */
2156 (c != '<')) || (ctxt->token != 0)) {
2157 if (c == 0) break;
2158 if (ctxt->token == '&') {
2159 /*
2160 * The reparsing will be done in xmlStringGetNodeList()
2161 * called by the attribute() function in SAX.c
2162 */
2163 static xmlChar buffer[6] = "&#38;";
2164
2165 if (len > buf_size - 10) {
2166 growBuffer(buf);
2167 }
2168 current = &buffer[0];
2169 while (*current != 0) { /* non input consuming */
2170 buf[len++] = *current++;
2171 }
2172 ctxt->token = 0;
2173 } else if (c == '&') {
2174 if (NXT(1) == '#') {
2175 int val = xmlParseCharRef(ctxt);
2176 if (val == '&') {
2177 /*
2178 * The reparsing will be done in xmlStringGetNodeList()
2179 * called by the attribute() function in SAX.c
2180 */
2181 static xmlChar buffer[6] = "&#38;";
2182
2183 if (len > buf_size - 10) {
2184 growBuffer(buf);
2185 }
2186 current = &buffer[0];
2187 while (*current != 0) { /* non input consuming */
2188 buf[len++] = *current++;
2189 }
2190 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002191 if (len > buf_size - 10) {
2192 growBuffer(buf);
2193 }
Owen Taylor3473f882001-02-23 17:55:21 +00002194 len += xmlCopyChar(0, &buf[len], val);
2195 }
2196 } else {
2197 ent = xmlParseEntityRef(ctxt);
2198 if ((ent != NULL) &&
2199 (ctxt->replaceEntities != 0)) {
2200 xmlChar *rep;
2201
2202 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2203 rep = xmlStringDecodeEntities(ctxt, ent->content,
2204 XML_SUBSTITUTE_REF, 0, 0, 0);
2205 if (rep != NULL) {
2206 current = rep;
2207 while (*current != 0) { /* non input consuming */
2208 buf[len++] = *current++;
2209 if (len > buf_size - 10) {
2210 growBuffer(buf);
2211 }
2212 }
2213 xmlFree(rep);
2214 }
2215 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002216 if (len > buf_size - 10) {
2217 growBuffer(buf);
2218 }
Owen Taylor3473f882001-02-23 17:55:21 +00002219 if (ent->content != NULL)
2220 buf[len++] = ent->content[0];
2221 }
2222 } else if (ent != NULL) {
2223 int i = xmlStrlen(ent->name);
2224 const xmlChar *cur = ent->name;
2225
2226 /*
2227 * This may look absurd but is needed to detect
2228 * entities problems
2229 */
2230 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2231 (ent->content != NULL)) {
2232 xmlChar *rep;
2233 rep = xmlStringDecodeEntities(ctxt, ent->content,
2234 XML_SUBSTITUTE_REF, 0, 0, 0);
2235 if (rep != NULL)
2236 xmlFree(rep);
2237 }
2238
2239 /*
2240 * Just output the reference
2241 */
2242 buf[len++] = '&';
2243 if (len > buf_size - i - 10) {
2244 growBuffer(buf);
2245 }
2246 for (;i > 0;i--)
2247 buf[len++] = *cur++;
2248 buf[len++] = ';';
2249 }
2250 }
2251 } else {
2252 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2253 COPY_BUF(l,buf,len,0x20);
2254 if (len > buf_size - 10) {
2255 growBuffer(buf);
2256 }
2257 } else {
2258 COPY_BUF(l,buf,len,c);
2259 if (len > buf_size - 10) {
2260 growBuffer(buf);
2261 }
2262 }
2263 NEXTL(l);
2264 }
2265 GROW;
2266 c = CUR_CHAR(l);
2267 }
2268 buf[len++] = 0;
2269 if (RAW == '<') {
2270 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2272 ctxt->sax->error(ctxt->userData,
2273 "Unescaped '<' not allowed in attributes values\n");
2274 ctxt->wellFormed = 0;
2275 ctxt->disableSAX = 1;
2276 } else if (RAW != limit) {
2277 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2279 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2280 ctxt->wellFormed = 0;
2281 ctxt->disableSAX = 1;
2282 } else
2283 NEXT;
2284 return(buf);
2285}
2286
2287/**
2288 * xmlParseSystemLiteral:
2289 * @ctxt: an XML parser context
2290 *
2291 * parse an XML Literal
2292 *
2293 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2294 *
2295 * Returns the SystemLiteral parsed or NULL
2296 */
2297
2298xmlChar *
2299xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2300 xmlChar *buf = NULL;
2301 int len = 0;
2302 int size = XML_PARSER_BUFFER_SIZE;
2303 int cur, l;
2304 xmlChar stop;
2305 int state = ctxt->instate;
2306 int count = 0;
2307
2308 SHRINK;
2309 if (RAW == '"') {
2310 NEXT;
2311 stop = '"';
2312 } else if (RAW == '\'') {
2313 NEXT;
2314 stop = '\'';
2315 } else {
2316 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2318 ctxt->sax->error(ctxt->userData,
2319 "SystemLiteral \" or ' expected\n");
2320 ctxt->wellFormed = 0;
2321 ctxt->disableSAX = 1;
2322 return(NULL);
2323 }
2324
2325 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2326 if (buf == NULL) {
2327 xmlGenericError(xmlGenericErrorContext,
2328 "malloc of %d byte failed\n", size);
2329 return(NULL);
2330 }
2331 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2332 cur = CUR_CHAR(l);
2333 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2334 if (len + 5 >= size) {
2335 size *= 2;
2336 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2337 if (buf == NULL) {
2338 xmlGenericError(xmlGenericErrorContext,
2339 "realloc of %d byte failed\n", size);
2340 ctxt->instate = (xmlParserInputState) state;
2341 return(NULL);
2342 }
2343 }
2344 count++;
2345 if (count > 50) {
2346 GROW;
2347 count = 0;
2348 }
2349 COPY_BUF(l,buf,len,cur);
2350 NEXTL(l);
2351 cur = CUR_CHAR(l);
2352 if (cur == 0) {
2353 GROW;
2354 SHRINK;
2355 cur = CUR_CHAR(l);
2356 }
2357 }
2358 buf[len] = 0;
2359 ctxt->instate = (xmlParserInputState) state;
2360 if (!IS_CHAR(cur)) {
2361 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2363 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2364 ctxt->wellFormed = 0;
2365 ctxt->disableSAX = 1;
2366 } else {
2367 NEXT;
2368 }
2369 return(buf);
2370}
2371
2372/**
2373 * xmlParsePubidLiteral:
2374 * @ctxt: an XML parser context
2375 *
2376 * parse an XML public literal
2377 *
2378 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2379 *
2380 * Returns the PubidLiteral parsed or NULL.
2381 */
2382
2383xmlChar *
2384xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2385 xmlChar *buf = NULL;
2386 int len = 0;
2387 int size = XML_PARSER_BUFFER_SIZE;
2388 xmlChar cur;
2389 xmlChar stop;
2390 int count = 0;
2391
2392 SHRINK;
2393 if (RAW == '"') {
2394 NEXT;
2395 stop = '"';
2396 } else if (RAW == '\'') {
2397 NEXT;
2398 stop = '\'';
2399 } else {
2400 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2402 ctxt->sax->error(ctxt->userData,
2403 "SystemLiteral \" or ' expected\n");
2404 ctxt->wellFormed = 0;
2405 ctxt->disableSAX = 1;
2406 return(NULL);
2407 }
2408 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2409 if (buf == NULL) {
2410 xmlGenericError(xmlGenericErrorContext,
2411 "malloc of %d byte failed\n", size);
2412 return(NULL);
2413 }
2414 cur = CUR;
2415 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2416 if (len + 1 >= size) {
2417 size *= 2;
2418 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2419 if (buf == NULL) {
2420 xmlGenericError(xmlGenericErrorContext,
2421 "realloc of %d byte failed\n", size);
2422 return(NULL);
2423 }
2424 }
2425 buf[len++] = cur;
2426 count++;
2427 if (count > 50) {
2428 GROW;
2429 count = 0;
2430 }
2431 NEXT;
2432 cur = CUR;
2433 if (cur == 0) {
2434 GROW;
2435 SHRINK;
2436 cur = CUR;
2437 }
2438 }
2439 buf[len] = 0;
2440 if (cur != stop) {
2441 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2443 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2444 ctxt->wellFormed = 0;
2445 ctxt->disableSAX = 1;
2446 } else {
2447 NEXT;
2448 }
2449 return(buf);
2450}
2451
Daniel Veillard48b2f892001-02-25 16:11:03 +00002452void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002453/**
2454 * xmlParseCharData:
2455 * @ctxt: an XML parser context
2456 * @cdata: int indicating whether we are within a CDATA section
2457 *
2458 * parse a CharData section.
2459 * if we are within a CDATA section ']]>' marks an end of section.
2460 *
2461 * The right angle bracket (>) may be represented using the string "&gt;",
2462 * and must, for compatibility, be escaped using "&gt;" or a character
2463 * reference when it appears in the string "]]>" in content, when that
2464 * string is not marking the end of a CDATA section.
2465 *
2466 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2467 */
2468
2469void
2470xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002471 const xmlChar *in;
2472 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002473 int line = ctxt->input->line;
2474 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002475
2476 SHRINK;
2477 GROW;
2478 /*
2479 * Accelerated common case where input don't need to be
2480 * modified before passing it to the handler.
2481 */
2482 if ((ctxt->token == 0) && (!cdata)) {
2483 in = ctxt->input->cur;
2484 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002485get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002486 while (((*in >= 0x20) && (*in != '<') &&
2487 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2488 in++;
2489 if (*in == 0xA) {
2490 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002491 in++;
2492 while (*in == 0xA) {
2493 ctxt->input->line++;
2494 in++;
2495 }
2496 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002497 }
2498 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002499 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002500 if (IS_BLANK(*ctxt->input->cur)) {
2501 const xmlChar *tmp = ctxt->input->cur;
2502 ctxt->input->cur = in;
2503 if (areBlanks(ctxt, tmp, nbchar)) {
2504 if (ctxt->sax->ignorableWhitespace != NULL)
2505 ctxt->sax->ignorableWhitespace(ctxt->userData,
2506 tmp, nbchar);
2507 } else {
2508 if (ctxt->sax->characters != NULL)
2509 ctxt->sax->characters(ctxt->userData,
2510 tmp, nbchar);
2511 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002512 } else {
2513 if (ctxt->sax->characters != NULL)
2514 ctxt->sax->characters(ctxt->userData,
2515 ctxt->input->cur, nbchar);
2516 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002517 }
2518 ctxt->input->cur = in;
2519 if (*in == 0xD) {
2520 in++;
2521 if (*in == 0xA) {
2522 ctxt->input->cur = in;
2523 in++;
2524 ctxt->input->line++;
2525 continue; /* while */
2526 }
2527 in--;
2528 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002529 if (*in == '<') {
2530 return;
2531 }
2532 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002533 return;
2534 }
2535 SHRINK;
2536 GROW;
2537 in = ctxt->input->cur;
2538 } while ((*in >= 0x20) && (*in <= 0x7F));
2539 nbchar = 0;
2540 }
Daniel Veillard50582112001-03-26 22:52:16 +00002541 ctxt->input->line = line;
2542 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002543 xmlParseCharDataComplex(ctxt, cdata);
2544}
2545
2546void
2547xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002548 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2549 int nbchar = 0;
2550 int cur, l;
2551 int count = 0;
2552
2553 SHRINK;
2554 GROW;
2555 cur = CUR_CHAR(l);
2556 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2557 ((cur != '&') || (ctxt->token == '&')) &&
2558 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2559 if ((cur == ']') && (NXT(1) == ']') &&
2560 (NXT(2) == '>')) {
2561 if (cdata) break;
2562 else {
2563 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2565 ctxt->sax->error(ctxt->userData,
2566 "Sequence ']]>' not allowed in content\n");
2567 /* Should this be relaxed ??? I see a "must here */
2568 ctxt->wellFormed = 0;
2569 ctxt->disableSAX = 1;
2570 }
2571 }
2572 COPY_BUF(l,buf,nbchar,cur);
2573 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2574 /*
2575 * Ok the segment is to be consumed as chars.
2576 */
2577 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2578 if (areBlanks(ctxt, buf, nbchar)) {
2579 if (ctxt->sax->ignorableWhitespace != NULL)
2580 ctxt->sax->ignorableWhitespace(ctxt->userData,
2581 buf, nbchar);
2582 } else {
2583 if (ctxt->sax->characters != NULL)
2584 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2585 }
2586 }
2587 nbchar = 0;
2588 }
2589 count++;
2590 if (count > 50) {
2591 GROW;
2592 count = 0;
2593 }
2594 NEXTL(l);
2595 cur = CUR_CHAR(l);
2596 }
2597 if (nbchar != 0) {
2598 /*
2599 * Ok the segment is to be consumed as chars.
2600 */
2601 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2602 if (areBlanks(ctxt, buf, nbchar)) {
2603 if (ctxt->sax->ignorableWhitespace != NULL)
2604 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2605 } else {
2606 if (ctxt->sax->characters != NULL)
2607 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2608 }
2609 }
2610 }
2611}
2612
2613/**
2614 * xmlParseExternalID:
2615 * @ctxt: an XML parser context
2616 * @publicID: a xmlChar** receiving PubidLiteral
2617 * @strict: indicate whether we should restrict parsing to only
2618 * production [75], see NOTE below
2619 *
2620 * Parse an External ID or a Public ID
2621 *
2622 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2623 * 'PUBLIC' S PubidLiteral S SystemLiteral
2624 *
2625 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2626 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2627 *
2628 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2629 *
2630 * Returns the function returns SystemLiteral and in the second
2631 * case publicID receives PubidLiteral, is strict is off
2632 * it is possible to return NULL and have publicID set.
2633 */
2634
2635xmlChar *
2636xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2637 xmlChar *URI = NULL;
2638
2639 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002640
2641 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002642 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2643 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2644 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2645 SKIP(6);
2646 if (!IS_BLANK(CUR)) {
2647 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2649 ctxt->sax->error(ctxt->userData,
2650 "Space required after 'SYSTEM'\n");
2651 ctxt->wellFormed = 0;
2652 ctxt->disableSAX = 1;
2653 }
2654 SKIP_BLANKS;
2655 URI = xmlParseSystemLiteral(ctxt);
2656 if (URI == NULL) {
2657 ctxt->errNo = XML_ERR_URI_REQUIRED;
2658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2659 ctxt->sax->error(ctxt->userData,
2660 "xmlParseExternalID: SYSTEM, no URI\n");
2661 ctxt->wellFormed = 0;
2662 ctxt->disableSAX = 1;
2663 }
2664 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2665 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2666 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2667 SKIP(6);
2668 if (!IS_BLANK(CUR)) {
2669 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2671 ctxt->sax->error(ctxt->userData,
2672 "Space required after 'PUBLIC'\n");
2673 ctxt->wellFormed = 0;
2674 ctxt->disableSAX = 1;
2675 }
2676 SKIP_BLANKS;
2677 *publicID = xmlParsePubidLiteral(ctxt);
2678 if (*publicID == NULL) {
2679 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2681 ctxt->sax->error(ctxt->userData,
2682 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2683 ctxt->wellFormed = 0;
2684 ctxt->disableSAX = 1;
2685 }
2686 if (strict) {
2687 /*
2688 * We don't handle [83] so "S SystemLiteral" is required.
2689 */
2690 if (!IS_BLANK(CUR)) {
2691 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2693 ctxt->sax->error(ctxt->userData,
2694 "Space required after the Public Identifier\n");
2695 ctxt->wellFormed = 0;
2696 ctxt->disableSAX = 1;
2697 }
2698 } else {
2699 /*
2700 * We handle [83] so we return immediately, if
2701 * "S SystemLiteral" is not detected. From a purely parsing
2702 * point of view that's a nice mess.
2703 */
2704 const xmlChar *ptr;
2705 GROW;
2706
2707 ptr = CUR_PTR;
2708 if (!IS_BLANK(*ptr)) return(NULL);
2709
2710 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2711 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2712 }
2713 SKIP_BLANKS;
2714 URI = xmlParseSystemLiteral(ctxt);
2715 if (URI == NULL) {
2716 ctxt->errNo = XML_ERR_URI_REQUIRED;
2717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2718 ctxt->sax->error(ctxt->userData,
2719 "xmlParseExternalID: PUBLIC, no URI\n");
2720 ctxt->wellFormed = 0;
2721 ctxt->disableSAX = 1;
2722 }
2723 }
2724 return(URI);
2725}
2726
2727/**
2728 * xmlParseComment:
2729 * @ctxt: an XML parser context
2730 *
2731 * Skip an XML (SGML) comment <!-- .... -->
2732 * The spec says that "For compatibility, the string "--" (double-hyphen)
2733 * must not occur within comments. "
2734 *
2735 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2736 */
2737void
2738xmlParseComment(xmlParserCtxtPtr ctxt) {
2739 xmlChar *buf = NULL;
2740 int len;
2741 int size = XML_PARSER_BUFFER_SIZE;
2742 int q, ql;
2743 int r, rl;
2744 int cur, l;
2745 xmlParserInputState state;
2746 xmlParserInputPtr input = ctxt->input;
2747 int count = 0;
2748
2749 /*
2750 * Check that there is a comment right here.
2751 */
2752 if ((RAW != '<') || (NXT(1) != '!') ||
2753 (NXT(2) != '-') || (NXT(3) != '-')) return;
2754
2755 state = ctxt->instate;
2756 ctxt->instate = XML_PARSER_COMMENT;
2757 SHRINK;
2758 SKIP(4);
2759 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2760 if (buf == NULL) {
2761 xmlGenericError(xmlGenericErrorContext,
2762 "malloc of %d byte failed\n", size);
2763 ctxt->instate = state;
2764 return;
2765 }
2766 q = CUR_CHAR(ql);
2767 NEXTL(ql);
2768 r = CUR_CHAR(rl);
2769 NEXTL(rl);
2770 cur = CUR_CHAR(l);
2771 len = 0;
2772 while (IS_CHAR(cur) && /* checked */
2773 ((cur != '>') ||
2774 (r != '-') || (q != '-'))) {
2775 if ((r == '-') && (q == '-') && (len > 1)) {
2776 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2778 ctxt->sax->error(ctxt->userData,
2779 "Comment must not contain '--' (double-hyphen)`\n");
2780 ctxt->wellFormed = 0;
2781 ctxt->disableSAX = 1;
2782 }
2783 if (len + 5 >= size) {
2784 size *= 2;
2785 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2786 if (buf == NULL) {
2787 xmlGenericError(xmlGenericErrorContext,
2788 "realloc of %d byte failed\n", size);
2789 ctxt->instate = state;
2790 return;
2791 }
2792 }
2793 COPY_BUF(ql,buf,len,q);
2794 q = r;
2795 ql = rl;
2796 r = cur;
2797 rl = l;
2798
2799 count++;
2800 if (count > 50) {
2801 GROW;
2802 count = 0;
2803 }
2804 NEXTL(l);
2805 cur = CUR_CHAR(l);
2806 if (cur == 0) {
2807 SHRINK;
2808 GROW;
2809 cur = CUR_CHAR(l);
2810 }
2811 }
2812 buf[len] = 0;
2813 if (!IS_CHAR(cur)) {
2814 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2815 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2816 ctxt->sax->error(ctxt->userData,
2817 "Comment not terminated \n<!--%.50s\n", buf);
2818 ctxt->wellFormed = 0;
2819 ctxt->disableSAX = 1;
2820 xmlFree(buf);
2821 } else {
2822 if (input != ctxt->input) {
2823 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2825 ctxt->sax->error(ctxt->userData,
2826"Comment doesn't start and stop in the same entity\n");
2827 ctxt->wellFormed = 0;
2828 ctxt->disableSAX = 1;
2829 }
2830 NEXT;
2831 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2832 (!ctxt->disableSAX))
2833 ctxt->sax->comment(ctxt->userData, buf);
2834 xmlFree(buf);
2835 }
2836 ctxt->instate = state;
2837}
2838
2839/**
2840 * xmlParsePITarget:
2841 * @ctxt: an XML parser context
2842 *
2843 * parse the name of a PI
2844 *
2845 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2846 *
2847 * Returns the PITarget name or NULL
2848 */
2849
2850xmlChar *
2851xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2852 xmlChar *name;
2853
2854 name = xmlParseName(ctxt);
2855 if ((name != NULL) &&
2856 ((name[0] == 'x') || (name[0] == 'X')) &&
2857 ((name[1] == 'm') || (name[1] == 'M')) &&
2858 ((name[2] == 'l') || (name[2] == 'L'))) {
2859 int i;
2860 if ((name[0] == 'x') && (name[1] == 'm') &&
2861 (name[2] == 'l') && (name[3] == 0)) {
2862 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2864 ctxt->sax->error(ctxt->userData,
2865 "XML declaration allowed only at the start of the document\n");
2866 ctxt->wellFormed = 0;
2867 ctxt->disableSAX = 1;
2868 return(name);
2869 } else if (name[3] == 0) {
2870 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2872 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2873 ctxt->wellFormed = 0;
2874 ctxt->disableSAX = 1;
2875 return(name);
2876 }
2877 for (i = 0;;i++) {
2878 if (xmlW3CPIs[i] == NULL) break;
2879 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2880 return(name);
2881 }
2882 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2883 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2884 ctxt->sax->warning(ctxt->userData,
2885 "xmlParsePItarget: invalid name prefix 'xml'\n");
2886 }
2887 }
2888 return(name);
2889}
2890
2891/**
2892 * xmlParsePI:
2893 * @ctxt: an XML parser context
2894 *
2895 * parse an XML Processing Instruction.
2896 *
2897 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2898 *
2899 * The processing is transfered to SAX once parsed.
2900 */
2901
2902void
2903xmlParsePI(xmlParserCtxtPtr ctxt) {
2904 xmlChar *buf = NULL;
2905 int len = 0;
2906 int size = XML_PARSER_BUFFER_SIZE;
2907 int cur, l;
2908 xmlChar *target;
2909 xmlParserInputState state;
2910 int count = 0;
2911
2912 if ((RAW == '<') && (NXT(1) == '?')) {
2913 xmlParserInputPtr input = ctxt->input;
2914 state = ctxt->instate;
2915 ctxt->instate = XML_PARSER_PI;
2916 /*
2917 * this is a Processing Instruction.
2918 */
2919 SKIP(2);
2920 SHRINK;
2921
2922 /*
2923 * Parse the target name and check for special support like
2924 * namespace.
2925 */
2926 target = xmlParsePITarget(ctxt);
2927 if (target != NULL) {
2928 if ((RAW == '?') && (NXT(1) == '>')) {
2929 if (input != ctxt->input) {
2930 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2932 ctxt->sax->error(ctxt->userData,
2933 "PI declaration doesn't start and stop in the same entity\n");
2934 ctxt->wellFormed = 0;
2935 ctxt->disableSAX = 1;
2936 }
2937 SKIP(2);
2938
2939 /*
2940 * SAX: PI detected.
2941 */
2942 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2943 (ctxt->sax->processingInstruction != NULL))
2944 ctxt->sax->processingInstruction(ctxt->userData,
2945 target, NULL);
2946 ctxt->instate = state;
2947 xmlFree(target);
2948 return;
2949 }
2950 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2951 if (buf == NULL) {
2952 xmlGenericError(xmlGenericErrorContext,
2953 "malloc of %d byte failed\n", size);
2954 ctxt->instate = state;
2955 return;
2956 }
2957 cur = CUR;
2958 if (!IS_BLANK(cur)) {
2959 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2961 ctxt->sax->error(ctxt->userData,
2962 "xmlParsePI: PI %s space expected\n", target);
2963 ctxt->wellFormed = 0;
2964 ctxt->disableSAX = 1;
2965 }
2966 SKIP_BLANKS;
2967 cur = CUR_CHAR(l);
2968 while (IS_CHAR(cur) && /* checked */
2969 ((cur != '?') || (NXT(1) != '>'))) {
2970 if (len + 5 >= size) {
2971 size *= 2;
2972 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2973 if (buf == NULL) {
2974 xmlGenericError(xmlGenericErrorContext,
2975 "realloc of %d byte failed\n", size);
2976 ctxt->instate = state;
2977 return;
2978 }
2979 }
2980 count++;
2981 if (count > 50) {
2982 GROW;
2983 count = 0;
2984 }
2985 COPY_BUF(l,buf,len,cur);
2986 NEXTL(l);
2987 cur = CUR_CHAR(l);
2988 if (cur == 0) {
2989 SHRINK;
2990 GROW;
2991 cur = CUR_CHAR(l);
2992 }
2993 }
2994 buf[len] = 0;
2995 if (cur != '?') {
2996 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2998 ctxt->sax->error(ctxt->userData,
2999 "xmlParsePI: PI %s never end ...\n", target);
3000 ctxt->wellFormed = 0;
3001 ctxt->disableSAX = 1;
3002 } else {
3003 if (input != ctxt->input) {
3004 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3006 ctxt->sax->error(ctxt->userData,
3007 "PI declaration doesn't start and stop in the same entity\n");
3008 ctxt->wellFormed = 0;
3009 ctxt->disableSAX = 1;
3010 }
3011 SKIP(2);
3012
3013 /*
3014 * SAX: PI detected.
3015 */
3016 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3017 (ctxt->sax->processingInstruction != NULL))
3018 ctxt->sax->processingInstruction(ctxt->userData,
3019 target, buf);
3020 }
3021 xmlFree(buf);
3022 xmlFree(target);
3023 } else {
3024 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3026 ctxt->sax->error(ctxt->userData,
3027 "xmlParsePI : no target name\n");
3028 ctxt->wellFormed = 0;
3029 ctxt->disableSAX = 1;
3030 }
3031 ctxt->instate = state;
3032 }
3033}
3034
3035/**
3036 * xmlParseNotationDecl:
3037 * @ctxt: an XML parser context
3038 *
3039 * parse a notation declaration
3040 *
3041 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3042 *
3043 * Hence there is actually 3 choices:
3044 * 'PUBLIC' S PubidLiteral
3045 * 'PUBLIC' S PubidLiteral S SystemLiteral
3046 * and 'SYSTEM' S SystemLiteral
3047 *
3048 * See the NOTE on xmlParseExternalID().
3049 */
3050
3051void
3052xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3053 xmlChar *name;
3054 xmlChar *Pubid;
3055 xmlChar *Systemid;
3056
3057 if ((RAW == '<') && (NXT(1) == '!') &&
3058 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3059 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3060 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3061 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3062 xmlParserInputPtr input = ctxt->input;
3063 SHRINK;
3064 SKIP(10);
3065 if (!IS_BLANK(CUR)) {
3066 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3067 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3068 ctxt->sax->error(ctxt->userData,
3069 "Space required after '<!NOTATION'\n");
3070 ctxt->wellFormed = 0;
3071 ctxt->disableSAX = 1;
3072 return;
3073 }
3074 SKIP_BLANKS;
3075
Daniel Veillard76d66f42001-05-16 21:05:17 +00003076 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003077 if (name == NULL) {
3078 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3080 ctxt->sax->error(ctxt->userData,
3081 "NOTATION: Name expected here\n");
3082 ctxt->wellFormed = 0;
3083 ctxt->disableSAX = 1;
3084 return;
3085 }
3086 if (!IS_BLANK(CUR)) {
3087 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3089 ctxt->sax->error(ctxt->userData,
3090 "Space required after the NOTATION name'\n");
3091 ctxt->wellFormed = 0;
3092 ctxt->disableSAX = 1;
3093 return;
3094 }
3095 SKIP_BLANKS;
3096
3097 /*
3098 * Parse the IDs.
3099 */
3100 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3101 SKIP_BLANKS;
3102
3103 if (RAW == '>') {
3104 if (input != ctxt->input) {
3105 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3106 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3107 ctxt->sax->error(ctxt->userData,
3108"Notation declaration doesn't start and stop in the same entity\n");
3109 ctxt->wellFormed = 0;
3110 ctxt->disableSAX = 1;
3111 }
3112 NEXT;
3113 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3114 (ctxt->sax->notationDecl != NULL))
3115 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3116 } else {
3117 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3119 ctxt->sax->error(ctxt->userData,
3120 "'>' required to close NOTATION declaration\n");
3121 ctxt->wellFormed = 0;
3122 ctxt->disableSAX = 1;
3123 }
3124 xmlFree(name);
3125 if (Systemid != NULL) xmlFree(Systemid);
3126 if (Pubid != NULL) xmlFree(Pubid);
3127 }
3128}
3129
3130/**
3131 * xmlParseEntityDecl:
3132 * @ctxt: an XML parser context
3133 *
3134 * parse <!ENTITY declarations
3135 *
3136 * [70] EntityDecl ::= GEDecl | PEDecl
3137 *
3138 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3139 *
3140 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3141 *
3142 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3143 *
3144 * [74] PEDef ::= EntityValue | ExternalID
3145 *
3146 * [76] NDataDecl ::= S 'NDATA' S Name
3147 *
3148 * [ VC: Notation Declared ]
3149 * The Name must match the declared name of a notation.
3150 */
3151
3152void
3153xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3154 xmlChar *name = NULL;
3155 xmlChar *value = NULL;
3156 xmlChar *URI = NULL, *literal = NULL;
3157 xmlChar *ndata = NULL;
3158 int isParameter = 0;
3159 xmlChar *orig = NULL;
3160
3161 GROW;
3162 if ((RAW == '<') && (NXT(1) == '!') &&
3163 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3164 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3165 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3166 xmlParserInputPtr input = ctxt->input;
3167 ctxt->instate = XML_PARSER_ENTITY_DECL;
3168 SHRINK;
3169 SKIP(8);
3170 if (!IS_BLANK(CUR)) {
3171 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3173 ctxt->sax->error(ctxt->userData,
3174 "Space required after '<!ENTITY'\n");
3175 ctxt->wellFormed = 0;
3176 ctxt->disableSAX = 1;
3177 }
3178 SKIP_BLANKS;
3179
3180 if (RAW == '%') {
3181 NEXT;
3182 if (!IS_BLANK(CUR)) {
3183 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3185 ctxt->sax->error(ctxt->userData,
3186 "Space required after '%'\n");
3187 ctxt->wellFormed = 0;
3188 ctxt->disableSAX = 1;
3189 }
3190 SKIP_BLANKS;
3191 isParameter = 1;
3192 }
3193
Daniel Veillard76d66f42001-05-16 21:05:17 +00003194 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003195 if (name == NULL) {
3196 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3198 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3199 ctxt->wellFormed = 0;
3200 ctxt->disableSAX = 1;
3201 return;
3202 }
3203 if (!IS_BLANK(CUR)) {
3204 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3206 ctxt->sax->error(ctxt->userData,
3207 "Space required after the entity name\n");
3208 ctxt->wellFormed = 0;
3209 ctxt->disableSAX = 1;
3210 }
3211 SKIP_BLANKS;
3212
3213 /*
3214 * handle the various case of definitions...
3215 */
3216 if (isParameter) {
3217 if ((RAW == '"') || (RAW == '\'')) {
3218 value = xmlParseEntityValue(ctxt, &orig);
3219 if (value) {
3220 if ((ctxt->sax != NULL) &&
3221 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3222 ctxt->sax->entityDecl(ctxt->userData, name,
3223 XML_INTERNAL_PARAMETER_ENTITY,
3224 NULL, NULL, value);
3225 }
3226 } else {
3227 URI = xmlParseExternalID(ctxt, &literal, 1);
3228 if ((URI == NULL) && (literal == NULL)) {
3229 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3231 ctxt->sax->error(ctxt->userData,
3232 "Entity value required\n");
3233 ctxt->wellFormed = 0;
3234 ctxt->disableSAX = 1;
3235 }
3236 if (URI) {
3237 xmlURIPtr uri;
3238
3239 uri = xmlParseURI((const char *) URI);
3240 if (uri == NULL) {
3241 ctxt->errNo = XML_ERR_INVALID_URI;
3242 if ((ctxt->sax != NULL) &&
3243 (!ctxt->disableSAX) &&
3244 (ctxt->sax->error != NULL))
3245 ctxt->sax->error(ctxt->userData,
3246 "Invalid URI: %s\n", URI);
3247 ctxt->wellFormed = 0;
3248 } else {
3249 if (uri->fragment != NULL) {
3250 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3251 if ((ctxt->sax != NULL) &&
3252 (!ctxt->disableSAX) &&
3253 (ctxt->sax->error != NULL))
3254 ctxt->sax->error(ctxt->userData,
3255 "Fragment not allowed: %s\n", URI);
3256 ctxt->wellFormed = 0;
3257 } else {
3258 if ((ctxt->sax != NULL) &&
3259 (!ctxt->disableSAX) &&
3260 (ctxt->sax->entityDecl != NULL))
3261 ctxt->sax->entityDecl(ctxt->userData, name,
3262 XML_EXTERNAL_PARAMETER_ENTITY,
3263 literal, URI, NULL);
3264 }
3265 xmlFreeURI(uri);
3266 }
3267 }
3268 }
3269 } else {
3270 if ((RAW == '"') || (RAW == '\'')) {
3271 value = xmlParseEntityValue(ctxt, &orig);
3272 if ((ctxt->sax != NULL) &&
3273 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3274 ctxt->sax->entityDecl(ctxt->userData, name,
3275 XML_INTERNAL_GENERAL_ENTITY,
3276 NULL, NULL, value);
3277 } else {
3278 URI = xmlParseExternalID(ctxt, &literal, 1);
3279 if ((URI == NULL) && (literal == NULL)) {
3280 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3281 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3282 ctxt->sax->error(ctxt->userData,
3283 "Entity value required\n");
3284 ctxt->wellFormed = 0;
3285 ctxt->disableSAX = 1;
3286 }
3287 if (URI) {
3288 xmlURIPtr uri;
3289
3290 uri = xmlParseURI((const char *)URI);
3291 if (uri == NULL) {
3292 ctxt->errNo = XML_ERR_INVALID_URI;
3293 if ((ctxt->sax != NULL) &&
3294 (!ctxt->disableSAX) &&
3295 (ctxt->sax->error != NULL))
3296 ctxt->sax->error(ctxt->userData,
3297 "Invalid URI: %s\n", URI);
3298 ctxt->wellFormed = 0;
3299 } else {
3300 if (uri->fragment != NULL) {
3301 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3302 if ((ctxt->sax != NULL) &&
3303 (!ctxt->disableSAX) &&
3304 (ctxt->sax->error != NULL))
3305 ctxt->sax->error(ctxt->userData,
3306 "Fragment not allowed: %s\n", URI);
3307 ctxt->wellFormed = 0;
3308 }
3309 xmlFreeURI(uri);
3310 }
3311 }
3312 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3313 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3314 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3315 ctxt->sax->error(ctxt->userData,
3316 "Space required before 'NDATA'\n");
3317 ctxt->wellFormed = 0;
3318 ctxt->disableSAX = 1;
3319 }
3320 SKIP_BLANKS;
3321 if ((RAW == 'N') && (NXT(1) == 'D') &&
3322 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3323 (NXT(4) == 'A')) {
3324 SKIP(5);
3325 if (!IS_BLANK(CUR)) {
3326 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3328 ctxt->sax->error(ctxt->userData,
3329 "Space required after 'NDATA'\n");
3330 ctxt->wellFormed = 0;
3331 ctxt->disableSAX = 1;
3332 }
3333 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003334 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003335 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3336 (ctxt->sax->unparsedEntityDecl != NULL))
3337 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3338 literal, URI, ndata);
3339 } else {
3340 if ((ctxt->sax != NULL) &&
3341 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3342 ctxt->sax->entityDecl(ctxt->userData, name,
3343 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3344 literal, URI, NULL);
3345 }
3346 }
3347 }
3348 SKIP_BLANKS;
3349 if (RAW != '>') {
3350 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3352 ctxt->sax->error(ctxt->userData,
3353 "xmlParseEntityDecl: entity %s not terminated\n", name);
3354 ctxt->wellFormed = 0;
3355 ctxt->disableSAX = 1;
3356 } else {
3357 if (input != ctxt->input) {
3358 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3360 ctxt->sax->error(ctxt->userData,
3361"Entity declaration doesn't start and stop in the same entity\n");
3362 ctxt->wellFormed = 0;
3363 ctxt->disableSAX = 1;
3364 }
3365 NEXT;
3366 }
3367 if (orig != NULL) {
3368 /*
3369 * Ugly mechanism to save the raw entity value.
3370 */
3371 xmlEntityPtr cur = NULL;
3372
3373 if (isParameter) {
3374 if ((ctxt->sax != NULL) &&
3375 (ctxt->sax->getParameterEntity != NULL))
3376 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3377 } else {
3378 if ((ctxt->sax != NULL) &&
3379 (ctxt->sax->getEntity != NULL))
3380 cur = ctxt->sax->getEntity(ctxt->userData, name);
3381 }
3382 if (cur != NULL) {
3383 if (cur->orig != NULL)
3384 xmlFree(orig);
3385 else
3386 cur->orig = orig;
3387 } else
3388 xmlFree(orig);
3389 }
3390 if (name != NULL) xmlFree(name);
3391 if (value != NULL) xmlFree(value);
3392 if (URI != NULL) xmlFree(URI);
3393 if (literal != NULL) xmlFree(literal);
3394 if (ndata != NULL) xmlFree(ndata);
3395 }
3396}
3397
3398/**
3399 * xmlParseDefaultDecl:
3400 * @ctxt: an XML parser context
3401 * @value: Receive a possible fixed default value for the attribute
3402 *
3403 * Parse an attribute default declaration
3404 *
3405 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3406 *
3407 * [ VC: Required Attribute ]
3408 * if the default declaration is the keyword #REQUIRED, then the
3409 * attribute must be specified for all elements of the type in the
3410 * attribute-list declaration.
3411 *
3412 * [ VC: Attribute Default Legal ]
3413 * The declared default value must meet the lexical constraints of
3414 * the declared attribute type c.f. xmlValidateAttributeDecl()
3415 *
3416 * [ VC: Fixed Attribute Default ]
3417 * if an attribute has a default value declared with the #FIXED
3418 * keyword, instances of that attribute must match the default value.
3419 *
3420 * [ WFC: No < in Attribute Values ]
3421 * handled in xmlParseAttValue()
3422 *
3423 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3424 * or XML_ATTRIBUTE_FIXED.
3425 */
3426
3427int
3428xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3429 int val;
3430 xmlChar *ret;
3431
3432 *value = NULL;
3433 if ((RAW == '#') && (NXT(1) == 'R') &&
3434 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3435 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3436 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3437 (NXT(8) == 'D')) {
3438 SKIP(9);
3439 return(XML_ATTRIBUTE_REQUIRED);
3440 }
3441 if ((RAW == '#') && (NXT(1) == 'I') &&
3442 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3443 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3444 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3445 SKIP(8);
3446 return(XML_ATTRIBUTE_IMPLIED);
3447 }
3448 val = XML_ATTRIBUTE_NONE;
3449 if ((RAW == '#') && (NXT(1) == 'F') &&
3450 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3451 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3452 SKIP(6);
3453 val = XML_ATTRIBUTE_FIXED;
3454 if (!IS_BLANK(CUR)) {
3455 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3457 ctxt->sax->error(ctxt->userData,
3458 "Space required after '#FIXED'\n");
3459 ctxt->wellFormed = 0;
3460 ctxt->disableSAX = 1;
3461 }
3462 SKIP_BLANKS;
3463 }
3464 ret = xmlParseAttValue(ctxt);
3465 ctxt->instate = XML_PARSER_DTD;
3466 if (ret == NULL) {
3467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3468 ctxt->sax->error(ctxt->userData,
3469 "Attribute default value declaration error\n");
3470 ctxt->wellFormed = 0;
3471 ctxt->disableSAX = 1;
3472 } else
3473 *value = ret;
3474 return(val);
3475}
3476
3477/**
3478 * xmlParseNotationType:
3479 * @ctxt: an XML parser context
3480 *
3481 * parse an Notation attribute type.
3482 *
3483 * Note: the leading 'NOTATION' S part has already being parsed...
3484 *
3485 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3486 *
3487 * [ VC: Notation Attributes ]
3488 * Values of this type must match one of the notation names included
3489 * in the declaration; all notation names in the declaration must be declared.
3490 *
3491 * Returns: the notation attribute tree built while parsing
3492 */
3493
3494xmlEnumerationPtr
3495xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3496 xmlChar *name;
3497 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3498
3499 if (RAW != '(') {
3500 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3502 ctxt->sax->error(ctxt->userData,
3503 "'(' required to start 'NOTATION'\n");
3504 ctxt->wellFormed = 0;
3505 ctxt->disableSAX = 1;
3506 return(NULL);
3507 }
3508 SHRINK;
3509 do {
3510 NEXT;
3511 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003512 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003513 if (name == NULL) {
3514 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3516 ctxt->sax->error(ctxt->userData,
3517 "Name expected in NOTATION declaration\n");
3518 ctxt->wellFormed = 0;
3519 ctxt->disableSAX = 1;
3520 return(ret);
3521 }
3522 cur = xmlCreateEnumeration(name);
3523 xmlFree(name);
3524 if (cur == NULL) return(ret);
3525 if (last == NULL) ret = last = cur;
3526 else {
3527 last->next = cur;
3528 last = cur;
3529 }
3530 SKIP_BLANKS;
3531 } while (RAW == '|');
3532 if (RAW != ')') {
3533 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3535 ctxt->sax->error(ctxt->userData,
3536 "')' required to finish NOTATION declaration\n");
3537 ctxt->wellFormed = 0;
3538 ctxt->disableSAX = 1;
3539 if ((last != NULL) && (last != ret))
3540 xmlFreeEnumeration(last);
3541 return(ret);
3542 }
3543 NEXT;
3544 return(ret);
3545}
3546
3547/**
3548 * xmlParseEnumerationType:
3549 * @ctxt: an XML parser context
3550 *
3551 * parse an Enumeration attribute type.
3552 *
3553 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3554 *
3555 * [ VC: Enumeration ]
3556 * Values of this type must match one of the Nmtoken tokens in
3557 * the declaration
3558 *
3559 * Returns: the enumeration attribute tree built while parsing
3560 */
3561
3562xmlEnumerationPtr
3563xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3564 xmlChar *name;
3565 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3566
3567 if (RAW != '(') {
3568 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3570 ctxt->sax->error(ctxt->userData,
3571 "'(' required to start ATTLIST enumeration\n");
3572 ctxt->wellFormed = 0;
3573 ctxt->disableSAX = 1;
3574 return(NULL);
3575 }
3576 SHRINK;
3577 do {
3578 NEXT;
3579 SKIP_BLANKS;
3580 name = xmlParseNmtoken(ctxt);
3581 if (name == NULL) {
3582 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3584 ctxt->sax->error(ctxt->userData,
3585 "NmToken expected in ATTLIST enumeration\n");
3586 ctxt->wellFormed = 0;
3587 ctxt->disableSAX = 1;
3588 return(ret);
3589 }
3590 cur = xmlCreateEnumeration(name);
3591 xmlFree(name);
3592 if (cur == NULL) return(ret);
3593 if (last == NULL) ret = last = cur;
3594 else {
3595 last->next = cur;
3596 last = cur;
3597 }
3598 SKIP_BLANKS;
3599 } while (RAW == '|');
3600 if (RAW != ')') {
3601 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3603 ctxt->sax->error(ctxt->userData,
3604 "')' required to finish ATTLIST enumeration\n");
3605 ctxt->wellFormed = 0;
3606 ctxt->disableSAX = 1;
3607 return(ret);
3608 }
3609 NEXT;
3610 return(ret);
3611}
3612
3613/**
3614 * xmlParseEnumeratedType:
3615 * @ctxt: an XML parser context
3616 * @tree: the enumeration tree built while parsing
3617 *
3618 * parse an Enumerated attribute type.
3619 *
3620 * [57] EnumeratedType ::= NotationType | Enumeration
3621 *
3622 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3623 *
3624 *
3625 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3626 */
3627
3628int
3629xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3630 if ((RAW == 'N') && (NXT(1) == 'O') &&
3631 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3632 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3633 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3634 SKIP(8);
3635 if (!IS_BLANK(CUR)) {
3636 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3638 ctxt->sax->error(ctxt->userData,
3639 "Space required after 'NOTATION'\n");
3640 ctxt->wellFormed = 0;
3641 ctxt->disableSAX = 1;
3642 return(0);
3643 }
3644 SKIP_BLANKS;
3645 *tree = xmlParseNotationType(ctxt);
3646 if (*tree == NULL) return(0);
3647 return(XML_ATTRIBUTE_NOTATION);
3648 }
3649 *tree = xmlParseEnumerationType(ctxt);
3650 if (*tree == NULL) return(0);
3651 return(XML_ATTRIBUTE_ENUMERATION);
3652}
3653
3654/**
3655 * xmlParseAttributeType:
3656 * @ctxt: an XML parser context
3657 * @tree: the enumeration tree built while parsing
3658 *
3659 * parse the Attribute list def for an element
3660 *
3661 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3662 *
3663 * [55] StringType ::= 'CDATA'
3664 *
3665 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3666 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3667 *
3668 * Validity constraints for attribute values syntax are checked in
3669 * xmlValidateAttributeValue()
3670 *
3671 * [ VC: ID ]
3672 * Values of type ID must match the Name production. A name must not
3673 * appear more than once in an XML document as a value of this type;
3674 * i.e., ID values must uniquely identify the elements which bear them.
3675 *
3676 * [ VC: One ID per Element Type ]
3677 * No element type may have more than one ID attribute specified.
3678 *
3679 * [ VC: ID Attribute Default ]
3680 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3681 *
3682 * [ VC: IDREF ]
3683 * Values of type IDREF must match the Name production, and values
3684 * of type IDREFS must match Names; each IDREF Name must match the value
3685 * of an ID attribute on some element in the XML document; i.e. IDREF
3686 * values must match the value of some ID attribute.
3687 *
3688 * [ VC: Entity Name ]
3689 * Values of type ENTITY must match the Name production, values
3690 * of type ENTITIES must match Names; each Entity Name must match the
3691 * name of an unparsed entity declared in the DTD.
3692 *
3693 * [ VC: Name Token ]
3694 * Values of type NMTOKEN must match the Nmtoken production; values
3695 * of type NMTOKENS must match Nmtokens.
3696 *
3697 * Returns the attribute type
3698 */
3699int
3700xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3701 SHRINK;
3702 if ((RAW == 'C') && (NXT(1) == 'D') &&
3703 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3704 (NXT(4) == 'A')) {
3705 SKIP(5);
3706 return(XML_ATTRIBUTE_CDATA);
3707 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3708 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3709 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3710 SKIP(6);
3711 return(XML_ATTRIBUTE_IDREFS);
3712 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3713 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3714 (NXT(4) == 'F')) {
3715 SKIP(5);
3716 return(XML_ATTRIBUTE_IDREF);
3717 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3718 SKIP(2);
3719 return(XML_ATTRIBUTE_ID);
3720 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3721 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3722 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3723 SKIP(6);
3724 return(XML_ATTRIBUTE_ENTITY);
3725 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3726 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3727 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3728 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3729 SKIP(8);
3730 return(XML_ATTRIBUTE_ENTITIES);
3731 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3732 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3733 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3734 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3735 SKIP(8);
3736 return(XML_ATTRIBUTE_NMTOKENS);
3737 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3738 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3739 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3740 (NXT(6) == 'N')) {
3741 SKIP(7);
3742 return(XML_ATTRIBUTE_NMTOKEN);
3743 }
3744 return(xmlParseEnumeratedType(ctxt, tree));
3745}
3746
3747/**
3748 * xmlParseAttributeListDecl:
3749 * @ctxt: an XML parser context
3750 *
3751 * : parse the Attribute list def for an element
3752 *
3753 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3754 *
3755 * [53] AttDef ::= S Name S AttType S DefaultDecl
3756 *
3757 */
3758void
3759xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3760 xmlChar *elemName;
3761 xmlChar *attrName;
3762 xmlEnumerationPtr tree;
3763
3764 if ((RAW == '<') && (NXT(1) == '!') &&
3765 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3766 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3767 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3768 (NXT(8) == 'T')) {
3769 xmlParserInputPtr input = ctxt->input;
3770
3771 SKIP(9);
3772 if (!IS_BLANK(CUR)) {
3773 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3775 ctxt->sax->error(ctxt->userData,
3776 "Space required after '<!ATTLIST'\n");
3777 ctxt->wellFormed = 0;
3778 ctxt->disableSAX = 1;
3779 }
3780 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003781 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003782 if (elemName == NULL) {
3783 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3785 ctxt->sax->error(ctxt->userData,
3786 "ATTLIST: no name for Element\n");
3787 ctxt->wellFormed = 0;
3788 ctxt->disableSAX = 1;
3789 return;
3790 }
3791 SKIP_BLANKS;
3792 GROW;
3793 while (RAW != '>') {
3794 const xmlChar *check = CUR_PTR;
3795 int type;
3796 int def;
3797 xmlChar *defaultValue = NULL;
3798
3799 GROW;
3800 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003801 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003802 if (attrName == NULL) {
3803 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3804 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3805 ctxt->sax->error(ctxt->userData,
3806 "ATTLIST: no name for Attribute\n");
3807 ctxt->wellFormed = 0;
3808 ctxt->disableSAX = 1;
3809 break;
3810 }
3811 GROW;
3812 if (!IS_BLANK(CUR)) {
3813 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3815 ctxt->sax->error(ctxt->userData,
3816 "Space required after the attribute name\n");
3817 ctxt->wellFormed = 0;
3818 ctxt->disableSAX = 1;
3819 if (attrName != NULL)
3820 xmlFree(attrName);
3821 if (defaultValue != NULL)
3822 xmlFree(defaultValue);
3823 break;
3824 }
3825 SKIP_BLANKS;
3826
3827 type = xmlParseAttributeType(ctxt, &tree);
3828 if (type <= 0) {
3829 if (attrName != NULL)
3830 xmlFree(attrName);
3831 if (defaultValue != NULL)
3832 xmlFree(defaultValue);
3833 break;
3834 }
3835
3836 GROW;
3837 if (!IS_BLANK(CUR)) {
3838 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3839 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3840 ctxt->sax->error(ctxt->userData,
3841 "Space required after the attribute type\n");
3842 ctxt->wellFormed = 0;
3843 ctxt->disableSAX = 1;
3844 if (attrName != NULL)
3845 xmlFree(attrName);
3846 if (defaultValue != NULL)
3847 xmlFree(defaultValue);
3848 if (tree != NULL)
3849 xmlFreeEnumeration(tree);
3850 break;
3851 }
3852 SKIP_BLANKS;
3853
3854 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3855 if (def <= 0) {
3856 if (attrName != NULL)
3857 xmlFree(attrName);
3858 if (defaultValue != NULL)
3859 xmlFree(defaultValue);
3860 if (tree != NULL)
3861 xmlFreeEnumeration(tree);
3862 break;
3863 }
3864
3865 GROW;
3866 if (RAW != '>') {
3867 if (!IS_BLANK(CUR)) {
3868 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3870 ctxt->sax->error(ctxt->userData,
3871 "Space required after the attribute default value\n");
3872 ctxt->wellFormed = 0;
3873 ctxt->disableSAX = 1;
3874 if (attrName != NULL)
3875 xmlFree(attrName);
3876 if (defaultValue != NULL)
3877 xmlFree(defaultValue);
3878 if (tree != NULL)
3879 xmlFreeEnumeration(tree);
3880 break;
3881 }
3882 SKIP_BLANKS;
3883 }
3884 if (check == CUR_PTR) {
3885 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3887 ctxt->sax->error(ctxt->userData,
3888 "xmlParseAttributeListDecl: detected internal error\n");
3889 if (attrName != NULL)
3890 xmlFree(attrName);
3891 if (defaultValue != NULL)
3892 xmlFree(defaultValue);
3893 if (tree != NULL)
3894 xmlFreeEnumeration(tree);
3895 break;
3896 }
3897 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3898 (ctxt->sax->attributeDecl != NULL))
3899 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3900 type, def, defaultValue, tree);
3901 if (attrName != NULL)
3902 xmlFree(attrName);
3903 if (defaultValue != NULL)
3904 xmlFree(defaultValue);
3905 GROW;
3906 }
3907 if (RAW == '>') {
3908 if (input != ctxt->input) {
3909 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3910 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3911 ctxt->sax->error(ctxt->userData,
3912"Attribute list declaration doesn't start and stop in the same entity\n");
3913 ctxt->wellFormed = 0;
3914 ctxt->disableSAX = 1;
3915 }
3916 NEXT;
3917 }
3918
3919 xmlFree(elemName);
3920 }
3921}
3922
3923/**
3924 * xmlParseElementMixedContentDecl:
3925 * @ctxt: an XML parser context
3926 *
3927 * parse the declaration for a Mixed Element content
3928 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3929 *
3930 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3931 * '(' S? '#PCDATA' S? ')'
3932 *
3933 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3934 *
3935 * [ VC: No Duplicate Types ]
3936 * The same name must not appear more than once in a single
3937 * mixed-content declaration.
3938 *
3939 * returns: the list of the xmlElementContentPtr describing the element choices
3940 */
3941xmlElementContentPtr
3942xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3943 xmlElementContentPtr ret = NULL, cur = NULL, n;
3944 xmlChar *elem = NULL;
3945
3946 GROW;
3947 if ((RAW == '#') && (NXT(1) == 'P') &&
3948 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3949 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3950 (NXT(6) == 'A')) {
3951 SKIP(7);
3952 SKIP_BLANKS;
3953 SHRINK;
3954 if (RAW == ')') {
3955 ctxt->entity = ctxt->input;
3956 NEXT;
3957 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3958 if (RAW == '*') {
3959 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3960 NEXT;
3961 }
3962 return(ret);
3963 }
3964 if ((RAW == '(') || (RAW == '|')) {
3965 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3966 if (ret == NULL) return(NULL);
3967 }
3968 while (RAW == '|') {
3969 NEXT;
3970 if (elem == NULL) {
3971 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3972 if (ret == NULL) return(NULL);
3973 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003974 if (cur != NULL)
3975 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003976 cur = ret;
3977 } else {
3978 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3979 if (n == NULL) return(NULL);
3980 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003981 if (n->c1 != NULL)
3982 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00003983 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003984 if (n != NULL)
3985 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003986 cur = n;
3987 xmlFree(elem);
3988 }
3989 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003990 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003991 if (elem == NULL) {
3992 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3994 ctxt->sax->error(ctxt->userData,
3995 "xmlParseElementMixedContentDecl : Name expected\n");
3996 ctxt->wellFormed = 0;
3997 ctxt->disableSAX = 1;
3998 xmlFreeElementContent(cur);
3999 return(NULL);
4000 }
4001 SKIP_BLANKS;
4002 GROW;
4003 }
4004 if ((RAW == ')') && (NXT(1) == '*')) {
4005 if (elem != NULL) {
4006 cur->c2 = xmlNewElementContent(elem,
4007 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004008 if (cur->c2 != NULL)
4009 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004010 xmlFree(elem);
4011 }
4012 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4013 ctxt->entity = ctxt->input;
4014 SKIP(2);
4015 } else {
4016 if (elem != NULL) xmlFree(elem);
4017 xmlFreeElementContent(ret);
4018 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4020 ctxt->sax->error(ctxt->userData,
4021 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4022 ctxt->wellFormed = 0;
4023 ctxt->disableSAX = 1;
4024 return(NULL);
4025 }
4026
4027 } else {
4028 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4030 ctxt->sax->error(ctxt->userData,
4031 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4032 ctxt->wellFormed = 0;
4033 ctxt->disableSAX = 1;
4034 }
4035 return(ret);
4036}
4037
4038/**
4039 * xmlParseElementChildrenContentDecl:
4040 * @ctxt: an XML parser context
4041 *
4042 * parse the declaration for a Mixed Element content
4043 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4044 *
4045 *
4046 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4047 *
4048 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4049 *
4050 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4051 *
4052 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4053 *
4054 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4055 * TODO Parameter-entity replacement text must be properly nested
4056 * with parenthetized groups. That is to say, if either of the
4057 * opening or closing parentheses in a choice, seq, or Mixed
4058 * construct is contained in the replacement text for a parameter
4059 * entity, both must be contained in the same replacement text. For
4060 * interoperability, if a parameter-entity reference appears in a
4061 * choice, seq, or Mixed construct, its replacement text should not
4062 * be empty, and neither the first nor last non-blank character of
4063 * the replacement text should be a connector (| or ,).
4064 *
4065 * returns: the tree of xmlElementContentPtr describing the element
4066 * hierarchy.
4067 */
4068xmlElementContentPtr
4069#ifdef VMS
4070xmlParseElementChildrenContentD
4071#else
4072xmlParseElementChildrenContentDecl
4073#endif
4074(xmlParserCtxtPtr ctxt) {
4075 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4076 xmlChar *elem;
4077 xmlChar type = 0;
4078
4079 SKIP_BLANKS;
4080 GROW;
4081 if (RAW == '(') {
4082 /* Recurse on first child */
4083 NEXT;
4084 SKIP_BLANKS;
4085 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4086 SKIP_BLANKS;
4087 GROW;
4088 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004089 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004090 if (elem == NULL) {
4091 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4093 ctxt->sax->error(ctxt->userData,
4094 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4095 ctxt->wellFormed = 0;
4096 ctxt->disableSAX = 1;
4097 return(NULL);
4098 }
4099 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4100 GROW;
4101 if (RAW == '?') {
4102 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4103 NEXT;
4104 } else if (RAW == '*') {
4105 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4106 NEXT;
4107 } else if (RAW == '+') {
4108 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4109 NEXT;
4110 } else {
4111 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4112 }
4113 xmlFree(elem);
4114 GROW;
4115 }
4116 SKIP_BLANKS;
4117 SHRINK;
4118 while (RAW != ')') {
4119 /*
4120 * Each loop we parse one separator and one element.
4121 */
4122 if (RAW == ',') {
4123 if (type == 0) type = CUR;
4124
4125 /*
4126 * Detect "Name | Name , Name" error
4127 */
4128 else if (type != CUR) {
4129 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4131 ctxt->sax->error(ctxt->userData,
4132 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4133 type);
4134 ctxt->wellFormed = 0;
4135 ctxt->disableSAX = 1;
4136 if ((op != NULL) && (op != ret))
4137 xmlFreeElementContent(op);
4138 if ((last != NULL) && (last != ret) &&
4139 (last != ret->c1) && (last != ret->c2))
4140 xmlFreeElementContent(last);
4141 if (ret != NULL)
4142 xmlFreeElementContent(ret);
4143 return(NULL);
4144 }
4145 NEXT;
4146
4147 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4148 if (op == NULL) {
4149 xmlFreeElementContent(ret);
4150 return(NULL);
4151 }
4152 if (last == NULL) {
4153 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004154 if (ret != NULL)
4155 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004156 ret = cur = op;
4157 } else {
4158 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004159 if (op != NULL)
4160 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004161 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004162 if (last != NULL)
4163 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004164 cur =op;
4165 last = NULL;
4166 }
4167 } else if (RAW == '|') {
4168 if (type == 0) type = CUR;
4169
4170 /*
4171 * Detect "Name , Name | Name" error
4172 */
4173 else if (type != CUR) {
4174 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4176 ctxt->sax->error(ctxt->userData,
4177 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4178 type);
4179 ctxt->wellFormed = 0;
4180 ctxt->disableSAX = 1;
4181 if ((op != NULL) && (op != ret) && (op != last))
4182 xmlFreeElementContent(op);
4183 if ((last != NULL) && (last != ret) &&
4184 (last != ret->c1) && (last != ret->c2))
4185 xmlFreeElementContent(last);
4186 if (ret != NULL)
4187 xmlFreeElementContent(ret);
4188 return(NULL);
4189 }
4190 NEXT;
4191
4192 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4193 if (op == NULL) {
4194 if ((op != NULL) && (op != ret))
4195 xmlFreeElementContent(op);
4196 if ((last != NULL) && (last != ret) &&
4197 (last != ret->c1) && (last != ret->c2))
4198 xmlFreeElementContent(last);
4199 if (ret != NULL)
4200 xmlFreeElementContent(ret);
4201 return(NULL);
4202 }
4203 if (last == NULL) {
4204 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004205 if (ret != NULL)
4206 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004207 ret = cur = op;
4208 } else {
4209 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004210 if (op != NULL)
4211 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004212 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004213 if (last != NULL)
4214 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004215 cur =op;
4216 last = NULL;
4217 }
4218 } else {
4219 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4220 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4221 ctxt->sax->error(ctxt->userData,
4222 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4223 ctxt->wellFormed = 0;
4224 ctxt->disableSAX = 1;
4225 if ((op != NULL) && (op != ret))
4226 xmlFreeElementContent(op);
4227 if ((last != NULL) && (last != ret) &&
4228 (last != ret->c1) && (last != ret->c2))
4229 xmlFreeElementContent(last);
4230 if (ret != NULL)
4231 xmlFreeElementContent(ret);
4232 return(NULL);
4233 }
4234 GROW;
4235 SKIP_BLANKS;
4236 GROW;
4237 if (RAW == '(') {
4238 /* Recurse on second child */
4239 NEXT;
4240 SKIP_BLANKS;
4241 last = xmlParseElementChildrenContentDecl(ctxt);
4242 SKIP_BLANKS;
4243 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004244 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004245 if (elem == NULL) {
4246 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4248 ctxt->sax->error(ctxt->userData,
4249 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4250 ctxt->wellFormed = 0;
4251 ctxt->disableSAX = 1;
4252 if ((op != NULL) && (op != ret))
4253 xmlFreeElementContent(op);
4254 if ((last != NULL) && (last != ret) &&
4255 (last != ret->c1) && (last != ret->c2))
4256 xmlFreeElementContent(last);
4257 if (ret != NULL)
4258 xmlFreeElementContent(ret);
4259 return(NULL);
4260 }
4261 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4262 xmlFree(elem);
4263 if (RAW == '?') {
4264 last->ocur = XML_ELEMENT_CONTENT_OPT;
4265 NEXT;
4266 } else if (RAW == '*') {
4267 last->ocur = XML_ELEMENT_CONTENT_MULT;
4268 NEXT;
4269 } else if (RAW == '+') {
4270 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4271 NEXT;
4272 } else {
4273 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4274 }
4275 }
4276 SKIP_BLANKS;
4277 GROW;
4278 }
4279 if ((cur != NULL) && (last != NULL)) {
4280 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004281 if (last != NULL)
4282 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004283 }
4284 ctxt->entity = ctxt->input;
4285 NEXT;
4286 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004287 if (ret != NULL)
4288 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004289 NEXT;
4290 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004291 if (ret != NULL)
4292 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004293 NEXT;
4294 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004295 if (ret != NULL)
4296 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004297 NEXT;
4298 }
4299 return(ret);
4300}
4301
4302/**
4303 * xmlParseElementContentDecl:
4304 * @ctxt: an XML parser context
4305 * @name: the name of the element being defined.
4306 * @result: the Element Content pointer will be stored here if any
4307 *
4308 * parse the declaration for an Element content either Mixed or Children,
4309 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4310 *
4311 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4312 *
4313 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4314 */
4315
4316int
4317xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4318 xmlElementContentPtr *result) {
4319
4320 xmlElementContentPtr tree = NULL;
4321 xmlParserInputPtr input = ctxt->input;
4322 int res;
4323
4324 *result = NULL;
4325
4326 if (RAW != '(') {
4327 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4329 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004330 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004331 ctxt->wellFormed = 0;
4332 ctxt->disableSAX = 1;
4333 return(-1);
4334 }
4335 NEXT;
4336 GROW;
4337 SKIP_BLANKS;
4338 if ((RAW == '#') && (NXT(1) == 'P') &&
4339 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4340 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4341 (NXT(6) == 'A')) {
4342 tree = xmlParseElementMixedContentDecl(ctxt);
4343 res = XML_ELEMENT_TYPE_MIXED;
4344 } else {
4345 tree = xmlParseElementChildrenContentDecl(ctxt);
4346 res = XML_ELEMENT_TYPE_ELEMENT;
4347 }
4348 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4349 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4351 ctxt->sax->error(ctxt->userData,
4352"Element content declaration doesn't start and stop in the same entity\n");
4353 ctxt->wellFormed = 0;
4354 ctxt->disableSAX = 1;
4355 }
4356 SKIP_BLANKS;
4357 *result = tree;
4358 return(res);
4359}
4360
4361/**
4362 * xmlParseElementDecl:
4363 * @ctxt: an XML parser context
4364 *
4365 * parse an Element declaration.
4366 *
4367 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4368 *
4369 * [ VC: Unique Element Type Declaration ]
4370 * No element type may be declared more than once
4371 *
4372 * Returns the type of the element, or -1 in case of error
4373 */
4374int
4375xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4376 xmlChar *name;
4377 int ret = -1;
4378 xmlElementContentPtr content = NULL;
4379
4380 GROW;
4381 if ((RAW == '<') && (NXT(1) == '!') &&
4382 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4383 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4384 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4385 (NXT(8) == 'T')) {
4386 xmlParserInputPtr input = ctxt->input;
4387
4388 SKIP(9);
4389 if (!IS_BLANK(CUR)) {
4390 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4392 ctxt->sax->error(ctxt->userData,
4393 "Space required after 'ELEMENT'\n");
4394 ctxt->wellFormed = 0;
4395 ctxt->disableSAX = 1;
4396 }
4397 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004398 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004399 if (name == NULL) {
4400 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4402 ctxt->sax->error(ctxt->userData,
4403 "xmlParseElementDecl: no name for Element\n");
4404 ctxt->wellFormed = 0;
4405 ctxt->disableSAX = 1;
4406 return(-1);
4407 }
4408 while ((RAW == 0) && (ctxt->inputNr > 1))
4409 xmlPopInput(ctxt);
4410 if (!IS_BLANK(CUR)) {
4411 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4413 ctxt->sax->error(ctxt->userData,
4414 "Space required after the element name\n");
4415 ctxt->wellFormed = 0;
4416 ctxt->disableSAX = 1;
4417 }
4418 SKIP_BLANKS;
4419 if ((RAW == 'E') && (NXT(1) == 'M') &&
4420 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4421 (NXT(4) == 'Y')) {
4422 SKIP(5);
4423 /*
4424 * Element must always be empty.
4425 */
4426 ret = XML_ELEMENT_TYPE_EMPTY;
4427 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4428 (NXT(2) == 'Y')) {
4429 SKIP(3);
4430 /*
4431 * Element is a generic container.
4432 */
4433 ret = XML_ELEMENT_TYPE_ANY;
4434 } else if (RAW == '(') {
4435 ret = xmlParseElementContentDecl(ctxt, name, &content);
4436 } else {
4437 /*
4438 * [ WFC: PEs in Internal Subset ] error handling.
4439 */
4440 if ((RAW == '%') && (ctxt->external == 0) &&
4441 (ctxt->inputNr == 1)) {
4442 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4444 ctxt->sax->error(ctxt->userData,
4445 "PEReference: forbidden within markup decl in internal subset\n");
4446 } else {
4447 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4448 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4449 ctxt->sax->error(ctxt->userData,
4450 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4451 }
4452 ctxt->wellFormed = 0;
4453 ctxt->disableSAX = 1;
4454 if (name != NULL) xmlFree(name);
4455 return(-1);
4456 }
4457
4458 SKIP_BLANKS;
4459 /*
4460 * Pop-up of finished entities.
4461 */
4462 while ((RAW == 0) && (ctxt->inputNr > 1))
4463 xmlPopInput(ctxt);
4464 SKIP_BLANKS;
4465
4466 if (RAW != '>') {
4467 ctxt->errNo = XML_ERR_GT_REQUIRED;
4468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4469 ctxt->sax->error(ctxt->userData,
4470 "xmlParseElementDecl: expected '>' at the end\n");
4471 ctxt->wellFormed = 0;
4472 ctxt->disableSAX = 1;
4473 } else {
4474 if (input != ctxt->input) {
4475 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4477 ctxt->sax->error(ctxt->userData,
4478"Element declaration doesn't start and stop in the same entity\n");
4479 ctxt->wellFormed = 0;
4480 ctxt->disableSAX = 1;
4481 }
4482
4483 NEXT;
4484 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4485 (ctxt->sax->elementDecl != NULL))
4486 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4487 content);
4488 }
4489 if (content != NULL) {
4490 xmlFreeElementContent(content);
4491 }
4492 if (name != NULL) {
4493 xmlFree(name);
4494 }
4495 }
4496 return(ret);
4497}
4498
4499/**
4500 * xmlParseMarkupDecl:
4501 * @ctxt: an XML parser context
4502 *
4503 * parse Markup declarations
4504 *
4505 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4506 * NotationDecl | PI | Comment
4507 *
4508 * [ VC: Proper Declaration/PE Nesting ]
4509 * Parameter-entity replacement text must be properly nested with
4510 * markup declarations. That is to say, if either the first character
4511 * or the last character of a markup declaration (markupdecl above) is
4512 * contained in the replacement text for a parameter-entity reference,
4513 * both must be contained in the same replacement text.
4514 *
4515 * [ WFC: PEs in Internal Subset ]
4516 * In the internal DTD subset, parameter-entity references can occur
4517 * only where markup declarations can occur, not within markup declarations.
4518 * (This does not apply to references that occur in external parameter
4519 * entities or to the external subset.)
4520 */
4521void
4522xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4523 GROW;
4524 xmlParseElementDecl(ctxt);
4525 xmlParseAttributeListDecl(ctxt);
4526 xmlParseEntityDecl(ctxt);
4527 xmlParseNotationDecl(ctxt);
4528 xmlParsePI(ctxt);
4529 xmlParseComment(ctxt);
4530 /*
4531 * This is only for internal subset. On external entities,
4532 * the replacement is done before parsing stage
4533 */
4534 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4535 xmlParsePEReference(ctxt);
4536 ctxt->instate = XML_PARSER_DTD;
4537}
4538
4539/**
4540 * xmlParseTextDecl:
4541 * @ctxt: an XML parser context
4542 *
4543 * parse an XML declaration header for external entities
4544 *
4545 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4546 *
4547 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4548 */
4549
4550void
4551xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4552 xmlChar *version;
4553
4554 /*
4555 * We know that '<?xml' is here.
4556 */
4557 if ((RAW == '<') && (NXT(1) == '?') &&
4558 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4559 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4560 SKIP(5);
4561 } else {
4562 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4564 ctxt->sax->error(ctxt->userData,
4565 "Text declaration '<?xml' required\n");
4566 ctxt->wellFormed = 0;
4567 ctxt->disableSAX = 1;
4568
4569 return;
4570 }
4571
4572 if (!IS_BLANK(CUR)) {
4573 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4575 ctxt->sax->error(ctxt->userData,
4576 "Space needed after '<?xml'\n");
4577 ctxt->wellFormed = 0;
4578 ctxt->disableSAX = 1;
4579 }
4580 SKIP_BLANKS;
4581
4582 /*
4583 * We may have the VersionInfo here.
4584 */
4585 version = xmlParseVersionInfo(ctxt);
4586 if (version == NULL)
4587 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4588 ctxt->input->version = version;
4589
4590 /*
4591 * We must have the encoding declaration
4592 */
4593 if (!IS_BLANK(CUR)) {
4594 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4596 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4597 ctxt->wellFormed = 0;
4598 ctxt->disableSAX = 1;
4599 }
4600 xmlParseEncodingDecl(ctxt);
4601 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4602 /*
4603 * The XML REC instructs us to stop parsing right here
4604 */
4605 return;
4606 }
4607
4608 SKIP_BLANKS;
4609 if ((RAW == '?') && (NXT(1) == '>')) {
4610 SKIP(2);
4611 } else if (RAW == '>') {
4612 /* Deprecated old WD ... */
4613 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4615 ctxt->sax->error(ctxt->userData,
4616 "XML declaration must end-up with '?>'\n");
4617 ctxt->wellFormed = 0;
4618 ctxt->disableSAX = 1;
4619 NEXT;
4620 } else {
4621 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4623 ctxt->sax->error(ctxt->userData,
4624 "parsing XML declaration: '?>' expected\n");
4625 ctxt->wellFormed = 0;
4626 ctxt->disableSAX = 1;
4627 MOVETO_ENDTAG(CUR_PTR);
4628 NEXT;
4629 }
4630}
4631
4632/*
4633 * xmlParseConditionalSections
4634 * @ctxt: an XML parser context
4635 *
4636 * [61] conditionalSect ::= includeSect | ignoreSect
4637 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4638 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4639 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4640 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4641 */
4642
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004643static void
Owen Taylor3473f882001-02-23 17:55:21 +00004644xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4645 SKIP(3);
4646 SKIP_BLANKS;
4647 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4648 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4649 (NXT(6) == 'E')) {
4650 SKIP(7);
4651 SKIP_BLANKS;
4652 if (RAW != '[') {
4653 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4655 ctxt->sax->error(ctxt->userData,
4656 "XML conditional section '[' expected\n");
4657 ctxt->wellFormed = 0;
4658 ctxt->disableSAX = 1;
4659 } else {
4660 NEXT;
4661 }
4662 if (xmlParserDebugEntities) {
4663 if ((ctxt->input != NULL) && (ctxt->input->filename))
4664 xmlGenericError(xmlGenericErrorContext,
4665 "%s(%d): ", ctxt->input->filename,
4666 ctxt->input->line);
4667 xmlGenericError(xmlGenericErrorContext,
4668 "Entering INCLUDE Conditional Section\n");
4669 }
4670
4671 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4672 (NXT(2) != '>'))) {
4673 const xmlChar *check = CUR_PTR;
4674 int cons = ctxt->input->consumed;
4675 int tok = ctxt->token;
4676
4677 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4678 xmlParseConditionalSections(ctxt);
4679 } else if (IS_BLANK(CUR)) {
4680 NEXT;
4681 } else if (RAW == '%') {
4682 xmlParsePEReference(ctxt);
4683 } else
4684 xmlParseMarkupDecl(ctxt);
4685
4686 /*
4687 * Pop-up of finished entities.
4688 */
4689 while ((RAW == 0) && (ctxt->inputNr > 1))
4690 xmlPopInput(ctxt);
4691
4692 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4693 (tok == ctxt->token)) {
4694 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4696 ctxt->sax->error(ctxt->userData,
4697 "Content error in the external subset\n");
4698 ctxt->wellFormed = 0;
4699 ctxt->disableSAX = 1;
4700 break;
4701 }
4702 }
4703 if (xmlParserDebugEntities) {
4704 if ((ctxt->input != NULL) && (ctxt->input->filename))
4705 xmlGenericError(xmlGenericErrorContext,
4706 "%s(%d): ", ctxt->input->filename,
4707 ctxt->input->line);
4708 xmlGenericError(xmlGenericErrorContext,
4709 "Leaving INCLUDE Conditional Section\n");
4710 }
4711
4712 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4713 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4714 int state;
4715 int instate;
4716 int depth = 0;
4717
4718 SKIP(6);
4719 SKIP_BLANKS;
4720 if (RAW != '[') {
4721 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4723 ctxt->sax->error(ctxt->userData,
4724 "XML conditional section '[' expected\n");
4725 ctxt->wellFormed = 0;
4726 ctxt->disableSAX = 1;
4727 } else {
4728 NEXT;
4729 }
4730 if (xmlParserDebugEntities) {
4731 if ((ctxt->input != NULL) && (ctxt->input->filename))
4732 xmlGenericError(xmlGenericErrorContext,
4733 "%s(%d): ", ctxt->input->filename,
4734 ctxt->input->line);
4735 xmlGenericError(xmlGenericErrorContext,
4736 "Entering IGNORE Conditional Section\n");
4737 }
4738
4739 /*
4740 * Parse up to the end of the conditionnal section
4741 * But disable SAX event generating DTD building in the meantime
4742 */
4743 state = ctxt->disableSAX;
4744 instate = ctxt->instate;
4745 ctxt->disableSAX = 1;
4746 ctxt->instate = XML_PARSER_IGNORE;
4747
4748 while (depth >= 0) {
4749 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4750 depth++;
4751 SKIP(3);
4752 continue;
4753 }
4754 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4755 if (--depth >= 0) SKIP(3);
4756 continue;
4757 }
4758 NEXT;
4759 continue;
4760 }
4761
4762 ctxt->disableSAX = state;
4763 ctxt->instate = instate;
4764
4765 if (xmlParserDebugEntities) {
4766 if ((ctxt->input != NULL) && (ctxt->input->filename))
4767 xmlGenericError(xmlGenericErrorContext,
4768 "%s(%d): ", ctxt->input->filename,
4769 ctxt->input->line);
4770 xmlGenericError(xmlGenericErrorContext,
4771 "Leaving IGNORE Conditional Section\n");
4772 }
4773
4774 } else {
4775 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4777 ctxt->sax->error(ctxt->userData,
4778 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4779 ctxt->wellFormed = 0;
4780 ctxt->disableSAX = 1;
4781 }
4782
4783 if (RAW == 0)
4784 SHRINK;
4785
4786 if (RAW == 0) {
4787 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4789 ctxt->sax->error(ctxt->userData,
4790 "XML conditional section not closed\n");
4791 ctxt->wellFormed = 0;
4792 ctxt->disableSAX = 1;
4793 } else {
4794 SKIP(3);
4795 }
4796}
4797
4798/**
4799 * xmlParseExternalSubset:
4800 * @ctxt: an XML parser context
4801 * @ExternalID: the external identifier
4802 * @SystemID: the system identifier (or URL)
4803 *
4804 * parse Markup declarations from an external subset
4805 *
4806 * [30] extSubset ::= textDecl? extSubsetDecl
4807 *
4808 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4809 */
4810void
4811xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4812 const xmlChar *SystemID) {
4813 GROW;
4814 if ((RAW == '<') && (NXT(1) == '?') &&
4815 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4816 (NXT(4) == 'l')) {
4817 xmlParseTextDecl(ctxt);
4818 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4819 /*
4820 * The XML REC instructs us to stop parsing right here
4821 */
4822 ctxt->instate = XML_PARSER_EOF;
4823 return;
4824 }
4825 }
4826 if (ctxt->myDoc == NULL) {
4827 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4828 }
4829 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4830 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4831
4832 ctxt->instate = XML_PARSER_DTD;
4833 ctxt->external = 1;
4834 while (((RAW == '<') && (NXT(1) == '?')) ||
4835 ((RAW == '<') && (NXT(1) == '!')) ||
4836 IS_BLANK(CUR)) {
4837 const xmlChar *check = CUR_PTR;
4838 int cons = ctxt->input->consumed;
4839 int tok = ctxt->token;
4840
4841 GROW;
4842 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4843 xmlParseConditionalSections(ctxt);
4844 } else if (IS_BLANK(CUR)) {
4845 NEXT;
4846 } else if (RAW == '%') {
4847 xmlParsePEReference(ctxt);
4848 } else
4849 xmlParseMarkupDecl(ctxt);
4850
4851 /*
4852 * Pop-up of finished entities.
4853 */
4854 while ((RAW == 0) && (ctxt->inputNr > 1))
4855 xmlPopInput(ctxt);
4856
4857 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4858 (tok == ctxt->token)) {
4859 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4861 ctxt->sax->error(ctxt->userData,
4862 "Content error in the external subset\n");
4863 ctxt->wellFormed = 0;
4864 ctxt->disableSAX = 1;
4865 break;
4866 }
4867 }
4868
4869 if (RAW != 0) {
4870 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4872 ctxt->sax->error(ctxt->userData,
4873 "Extra content at the end of the document\n");
4874 ctxt->wellFormed = 0;
4875 ctxt->disableSAX = 1;
4876 }
4877
4878}
4879
4880/**
4881 * xmlParseReference:
4882 * @ctxt: an XML parser context
4883 *
4884 * parse and handle entity references in content, depending on the SAX
4885 * interface, this may end-up in a call to character() if this is a
4886 * CharRef, a predefined entity, if there is no reference() callback.
4887 * or if the parser was asked to switch to that mode.
4888 *
4889 * [67] Reference ::= EntityRef | CharRef
4890 */
4891void
4892xmlParseReference(xmlParserCtxtPtr ctxt) {
4893 xmlEntityPtr ent;
4894 xmlChar *val;
4895 if (RAW != '&') return;
4896
4897 if (NXT(1) == '#') {
4898 int i = 0;
4899 xmlChar out[10];
4900 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004901 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004902
4903 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4904 /*
4905 * So we are using non-UTF-8 buffers
4906 * Check that the char fit on 8bits, if not
4907 * generate a CharRef.
4908 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004909 if (value <= 0xFF) {
4910 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004911 out[1] = 0;
4912 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4913 (!ctxt->disableSAX))
4914 ctxt->sax->characters(ctxt->userData, out, 1);
4915 } else {
4916 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004917 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004918 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004919 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004920 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4921 (!ctxt->disableSAX))
4922 ctxt->sax->reference(ctxt->userData, out);
4923 }
4924 } else {
4925 /*
4926 * Just encode the value in UTF-8
4927 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004928 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004929 out[i] = 0;
4930 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4931 (!ctxt->disableSAX))
4932 ctxt->sax->characters(ctxt->userData, out, i);
4933 }
4934 } else {
4935 ent = xmlParseEntityRef(ctxt);
4936 if (ent == NULL) return;
4937 if ((ent->name != NULL) &&
4938 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4939 xmlNodePtr list = NULL;
4940 int ret;
4941
4942
4943 /*
4944 * The first reference to the entity trigger a parsing phase
4945 * where the ent->children is filled with the result from
4946 * the parsing.
4947 */
4948 if (ent->children == NULL) {
4949 xmlChar *value;
4950 value = ent->content;
4951
4952 /*
4953 * Check that this entity is well formed
4954 */
4955 if ((value != NULL) &&
4956 (value[1] == 0) && (value[0] == '<') &&
4957 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4958 /*
4959 * DONE: get definite answer on this !!!
4960 * Lots of entity decls are used to declare a single
4961 * char
4962 * <!ENTITY lt "<">
4963 * Which seems to be valid since
4964 * 2.4: The ampersand character (&) and the left angle
4965 * bracket (<) may appear in their literal form only
4966 * when used ... They are also legal within the literal
4967 * entity value of an internal entity declaration;i
4968 * see "4.3.2 Well-Formed Parsed Entities".
4969 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4970 * Looking at the OASIS test suite and James Clark
4971 * tests, this is broken. However the XML REC uses
4972 * it. Is the XML REC not well-formed ????
4973 * This is a hack to avoid this problem
4974 *
4975 * ANSWER: since lt gt amp .. are already defined,
4976 * this is a redefinition and hence the fact that the
4977 * contentis not well balanced is not a Wf error, this
4978 * is lousy but acceptable.
4979 */
4980 list = xmlNewDocText(ctxt->myDoc, value);
4981 if (list != NULL) {
4982 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4983 (ent->children == NULL)) {
4984 ent->children = list;
4985 ent->last = list;
4986 list->parent = (xmlNodePtr) ent;
4987 } else {
4988 xmlFreeNodeList(list);
4989 }
4990 } else if (list != NULL) {
4991 xmlFreeNodeList(list);
4992 }
4993 } else {
4994 /*
4995 * 4.3.2: An internal general parsed entity is well-formed
4996 * if its replacement text matches the production labeled
4997 * content.
4998 */
4999 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5000 ctxt->depth++;
5001 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5002 ctxt->sax, NULL, ctxt->depth,
5003 value, &list);
5004 ctxt->depth--;
5005 } else if (ent->etype ==
5006 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5007 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005008 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005009 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005010 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005011 ctxt->depth--;
5012 } else {
5013 ret = -1;
5014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5015 ctxt->sax->error(ctxt->userData,
5016 "Internal: invalid entity type\n");
5017 }
5018 if (ret == XML_ERR_ENTITY_LOOP) {
5019 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5021 ctxt->sax->error(ctxt->userData,
5022 "Detected entity reference loop\n");
5023 ctxt->wellFormed = 0;
5024 ctxt->disableSAX = 1;
5025 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005026 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5027 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005028 (ent->children == NULL)) {
5029 ent->children = list;
5030 while (list != NULL) {
5031 list->parent = (xmlNodePtr) ent;
5032 if (list->next == NULL)
5033 ent->last = list;
5034 list = list->next;
5035 }
5036 } else {
5037 xmlFreeNodeList(list);
5038 }
5039 } else if (ret > 0) {
5040 ctxt->errNo = ret;
5041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5042 ctxt->sax->error(ctxt->userData,
5043 "Entity value required\n");
5044 ctxt->wellFormed = 0;
5045 ctxt->disableSAX = 1;
5046 } else if (list != NULL) {
5047 xmlFreeNodeList(list);
5048 }
5049 }
5050 }
5051 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5052 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5053 /*
5054 * Create a node.
5055 */
5056 ctxt->sax->reference(ctxt->userData, ent->name);
5057 return;
5058 } else if (ctxt->replaceEntities) {
5059 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5060 /*
5061 * Seems we are generating the DOM content, do
5062 * a simple tree copy
5063 */
5064 xmlNodePtr new;
5065 new = xmlCopyNodeList(ent->children);
5066
5067 xmlAddChildList(ctxt->node, new);
5068 /*
5069 * This is to avoid a nasty side effect, see
5070 * characters() in SAX.c
5071 */
5072 ctxt->nodemem = 0;
5073 ctxt->nodelen = 0;
5074 return;
5075 } else {
5076 /*
5077 * Probably running in SAX mode
5078 */
5079 xmlParserInputPtr input;
5080
5081 input = xmlNewEntityInputStream(ctxt, ent);
5082 xmlPushInput(ctxt, input);
5083 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5084 (RAW == '<') && (NXT(1) == '?') &&
5085 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5086 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5087 xmlParseTextDecl(ctxt);
5088 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5089 /*
5090 * The XML REC instructs us to stop parsing right here
5091 */
5092 ctxt->instate = XML_PARSER_EOF;
5093 return;
5094 }
5095 if (input->standalone == 1) {
5096 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5098 ctxt->sax->error(ctxt->userData,
5099 "external parsed entities cannot be standalone\n");
5100 ctxt->wellFormed = 0;
5101 ctxt->disableSAX = 1;
5102 }
5103 }
5104 return;
5105 }
5106 }
5107 } else {
5108 val = ent->content;
5109 if (val == NULL) return;
5110 /*
5111 * inline the entity.
5112 */
5113 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5114 (!ctxt->disableSAX))
5115 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5116 }
5117 }
5118}
5119
5120/**
5121 * xmlParseEntityRef:
5122 * @ctxt: an XML parser context
5123 *
5124 * parse ENTITY references declarations
5125 *
5126 * [68] EntityRef ::= '&' Name ';'
5127 *
5128 * [ WFC: Entity Declared ]
5129 * In a document without any DTD, a document with only an internal DTD
5130 * subset which contains no parameter entity references, or a document
5131 * with "standalone='yes'", the Name given in the entity reference
5132 * must match that in an entity declaration, except that well-formed
5133 * documents need not declare any of the following entities: amp, lt,
5134 * gt, apos, quot. The declaration of a parameter entity must precede
5135 * any reference to it. Similarly, the declaration of a general entity
5136 * must precede any reference to it which appears in a default value in an
5137 * attribute-list declaration. Note that if entities are declared in the
5138 * external subset or in external parameter entities, a non-validating
5139 * processor is not obligated to read and process their declarations;
5140 * for such documents, the rule that an entity must be declared is a
5141 * well-formedness constraint only if standalone='yes'.
5142 *
5143 * [ WFC: Parsed Entity ]
5144 * An entity reference must not contain the name of an unparsed entity
5145 *
5146 * Returns the xmlEntityPtr if found, or NULL otherwise.
5147 */
5148xmlEntityPtr
5149xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5150 xmlChar *name;
5151 xmlEntityPtr ent = NULL;
5152
5153 GROW;
5154
5155 if (RAW == '&') {
5156 NEXT;
5157 name = xmlParseName(ctxt);
5158 if (name == NULL) {
5159 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5161 ctxt->sax->error(ctxt->userData,
5162 "xmlParseEntityRef: no name\n");
5163 ctxt->wellFormed = 0;
5164 ctxt->disableSAX = 1;
5165 } else {
5166 if (RAW == ';') {
5167 NEXT;
5168 /*
5169 * Ask first SAX for entity resolution, otherwise try the
5170 * predefined set.
5171 */
5172 if (ctxt->sax != NULL) {
5173 if (ctxt->sax->getEntity != NULL)
5174 ent = ctxt->sax->getEntity(ctxt->userData, name);
5175 if (ent == NULL)
5176 ent = xmlGetPredefinedEntity(name);
5177 }
5178 /*
5179 * [ WFC: Entity Declared ]
5180 * In a document without any DTD, a document with only an
5181 * internal DTD subset which contains no parameter entity
5182 * references, or a document with "standalone='yes'", the
5183 * Name given in the entity reference must match that in an
5184 * entity declaration, except that well-formed documents
5185 * need not declare any of the following entities: amp, lt,
5186 * gt, apos, quot.
5187 * The declaration of a parameter entity must precede any
5188 * reference to it.
5189 * Similarly, the declaration of a general entity must
5190 * precede any reference to it which appears in a default
5191 * value in an attribute-list declaration. Note that if
5192 * entities are declared in the external subset or in
5193 * external parameter entities, a non-validating processor
5194 * is not obligated to read and process their declarations;
5195 * for such documents, the rule that an entity must be
5196 * declared is a well-formedness constraint only if
5197 * standalone='yes'.
5198 */
5199 if (ent == NULL) {
5200 if ((ctxt->standalone == 1) ||
5201 ((ctxt->hasExternalSubset == 0) &&
5202 (ctxt->hasPErefs == 0))) {
5203 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5205 ctxt->sax->error(ctxt->userData,
5206 "Entity '%s' not defined\n", name);
5207 ctxt->wellFormed = 0;
5208 ctxt->disableSAX = 1;
5209 } else {
5210 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5211 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5212 ctxt->sax->warning(ctxt->userData,
5213 "Entity '%s' not defined\n", name);
5214 }
5215 }
5216
5217 /*
5218 * [ WFC: Parsed Entity ]
5219 * An entity reference must not contain the name of an
5220 * unparsed entity
5221 */
5222 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5223 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5225 ctxt->sax->error(ctxt->userData,
5226 "Entity reference to unparsed entity %s\n", name);
5227 ctxt->wellFormed = 0;
5228 ctxt->disableSAX = 1;
5229 }
5230
5231 /*
5232 * [ WFC: No External Entity References ]
5233 * Attribute values cannot contain direct or indirect
5234 * entity references to external entities.
5235 */
5236 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5237 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5238 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5240 ctxt->sax->error(ctxt->userData,
5241 "Attribute references external entity '%s'\n", name);
5242 ctxt->wellFormed = 0;
5243 ctxt->disableSAX = 1;
5244 }
5245 /*
5246 * [ WFC: No < in Attribute Values ]
5247 * The replacement text of any entity referred to directly or
5248 * indirectly in an attribute value (other than "&lt;") must
5249 * not contain a <.
5250 */
5251 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5252 (ent != NULL) &&
5253 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5254 (ent->content != NULL) &&
5255 (xmlStrchr(ent->content, '<'))) {
5256 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5258 ctxt->sax->error(ctxt->userData,
5259 "'<' in entity '%s' is not allowed in attributes values\n", name);
5260 ctxt->wellFormed = 0;
5261 ctxt->disableSAX = 1;
5262 }
5263
5264 /*
5265 * Internal check, no parameter entities here ...
5266 */
5267 else {
5268 switch (ent->etype) {
5269 case XML_INTERNAL_PARAMETER_ENTITY:
5270 case XML_EXTERNAL_PARAMETER_ENTITY:
5271 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5273 ctxt->sax->error(ctxt->userData,
5274 "Attempt to reference the parameter entity '%s'\n", name);
5275 ctxt->wellFormed = 0;
5276 ctxt->disableSAX = 1;
5277 break;
5278 default:
5279 break;
5280 }
5281 }
5282
5283 /*
5284 * [ WFC: No Recursion ]
5285 * A parsed entity must not contain a recursive reference
5286 * to itself, either directly or indirectly.
5287 * Done somewhere else
5288 */
5289
5290 } else {
5291 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5292 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5293 ctxt->sax->error(ctxt->userData,
5294 "xmlParseEntityRef: expecting ';'\n");
5295 ctxt->wellFormed = 0;
5296 ctxt->disableSAX = 1;
5297 }
5298 xmlFree(name);
5299 }
5300 }
5301 return(ent);
5302}
5303
5304/**
5305 * xmlParseStringEntityRef:
5306 * @ctxt: an XML parser context
5307 * @str: a pointer to an index in the string
5308 *
5309 * parse ENTITY references declarations, but this version parses it from
5310 * a string value.
5311 *
5312 * [68] EntityRef ::= '&' Name ';'
5313 *
5314 * [ WFC: Entity Declared ]
5315 * In a document without any DTD, a document with only an internal DTD
5316 * subset which contains no parameter entity references, or a document
5317 * with "standalone='yes'", the Name given in the entity reference
5318 * must match that in an entity declaration, except that well-formed
5319 * documents need not declare any of the following entities: amp, lt,
5320 * gt, apos, quot. The declaration of a parameter entity must precede
5321 * any reference to it. Similarly, the declaration of a general entity
5322 * must precede any reference to it which appears in a default value in an
5323 * attribute-list declaration. Note that if entities are declared in the
5324 * external subset or in external parameter entities, a non-validating
5325 * processor is not obligated to read and process their declarations;
5326 * for such documents, the rule that an entity must be declared is a
5327 * well-formedness constraint only if standalone='yes'.
5328 *
5329 * [ WFC: Parsed Entity ]
5330 * An entity reference must not contain the name of an unparsed entity
5331 *
5332 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5333 * is updated to the current location in the string.
5334 */
5335xmlEntityPtr
5336xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5337 xmlChar *name;
5338 const xmlChar *ptr;
5339 xmlChar cur;
5340 xmlEntityPtr ent = NULL;
5341
5342 if ((str == NULL) || (*str == NULL))
5343 return(NULL);
5344 ptr = *str;
5345 cur = *ptr;
5346 if (cur == '&') {
5347 ptr++;
5348 cur = *ptr;
5349 name = xmlParseStringName(ctxt, &ptr);
5350 if (name == NULL) {
5351 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5353 ctxt->sax->error(ctxt->userData,
5354 "xmlParseEntityRef: no name\n");
5355 ctxt->wellFormed = 0;
5356 ctxt->disableSAX = 1;
5357 } else {
5358 if (*ptr == ';') {
5359 ptr++;
5360 /*
5361 * Ask first SAX for entity resolution, otherwise try the
5362 * predefined set.
5363 */
5364 if (ctxt->sax != NULL) {
5365 if (ctxt->sax->getEntity != NULL)
5366 ent = ctxt->sax->getEntity(ctxt->userData, name);
5367 if (ent == NULL)
5368 ent = xmlGetPredefinedEntity(name);
5369 }
5370 /*
5371 * [ WFC: Entity Declared ]
5372 * In a document without any DTD, a document with only an
5373 * internal DTD subset which contains no parameter entity
5374 * references, or a document with "standalone='yes'", the
5375 * Name given in the entity reference must match that in an
5376 * entity declaration, except that well-formed documents
5377 * need not declare any of the following entities: amp, lt,
5378 * gt, apos, quot.
5379 * The declaration of a parameter entity must precede any
5380 * reference to it.
5381 * Similarly, the declaration of a general entity must
5382 * precede any reference to it which appears in a default
5383 * value in an attribute-list declaration. Note that if
5384 * entities are declared in the external subset or in
5385 * external parameter entities, a non-validating processor
5386 * is not obligated to read and process their declarations;
5387 * for such documents, the rule that an entity must be
5388 * declared is a well-formedness constraint only if
5389 * standalone='yes'.
5390 */
5391 if (ent == NULL) {
5392 if ((ctxt->standalone == 1) ||
5393 ((ctxt->hasExternalSubset == 0) &&
5394 (ctxt->hasPErefs == 0))) {
5395 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5397 ctxt->sax->error(ctxt->userData,
5398 "Entity '%s' not defined\n", name);
5399 ctxt->wellFormed = 0;
5400 ctxt->disableSAX = 1;
5401 } else {
5402 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5403 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5404 ctxt->sax->warning(ctxt->userData,
5405 "Entity '%s' not defined\n", name);
5406 }
5407 }
5408
5409 /*
5410 * [ WFC: Parsed Entity ]
5411 * An entity reference must not contain the name of an
5412 * unparsed entity
5413 */
5414 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5415 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5417 ctxt->sax->error(ctxt->userData,
5418 "Entity reference to unparsed entity %s\n", name);
5419 ctxt->wellFormed = 0;
5420 ctxt->disableSAX = 1;
5421 }
5422
5423 /*
5424 * [ WFC: No External Entity References ]
5425 * Attribute values cannot contain direct or indirect
5426 * entity references to external entities.
5427 */
5428 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5429 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5430 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5431 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5432 ctxt->sax->error(ctxt->userData,
5433 "Attribute references external entity '%s'\n", name);
5434 ctxt->wellFormed = 0;
5435 ctxt->disableSAX = 1;
5436 }
5437 /*
5438 * [ WFC: No < in Attribute Values ]
5439 * The replacement text of any entity referred to directly or
5440 * indirectly in an attribute value (other than "&lt;") must
5441 * not contain a <.
5442 */
5443 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5444 (ent != NULL) &&
5445 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5446 (ent->content != NULL) &&
5447 (xmlStrchr(ent->content, '<'))) {
5448 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5450 ctxt->sax->error(ctxt->userData,
5451 "'<' in entity '%s' is not allowed in attributes values\n", name);
5452 ctxt->wellFormed = 0;
5453 ctxt->disableSAX = 1;
5454 }
5455
5456 /*
5457 * Internal check, no parameter entities here ...
5458 */
5459 else {
5460 switch (ent->etype) {
5461 case XML_INTERNAL_PARAMETER_ENTITY:
5462 case XML_EXTERNAL_PARAMETER_ENTITY:
5463 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5465 ctxt->sax->error(ctxt->userData,
5466 "Attempt to reference the parameter entity '%s'\n", name);
5467 ctxt->wellFormed = 0;
5468 ctxt->disableSAX = 1;
5469 break;
5470 default:
5471 break;
5472 }
5473 }
5474
5475 /*
5476 * [ WFC: No Recursion ]
5477 * A parsed entity must not contain a recursive reference
5478 * to itself, either directly or indirectly.
5479 * Done somewhwere else
5480 */
5481
5482 } else {
5483 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5485 ctxt->sax->error(ctxt->userData,
5486 "xmlParseEntityRef: expecting ';'\n");
5487 ctxt->wellFormed = 0;
5488 ctxt->disableSAX = 1;
5489 }
5490 xmlFree(name);
5491 }
5492 }
5493 *str = ptr;
5494 return(ent);
5495}
5496
5497/**
5498 * xmlParsePEReference:
5499 * @ctxt: an XML parser context
5500 *
5501 * parse PEReference declarations
5502 * The entity content is handled directly by pushing it's content as
5503 * a new input stream.
5504 *
5505 * [69] PEReference ::= '%' Name ';'
5506 *
5507 * [ WFC: No Recursion ]
5508 * A parsed entity must not contain a recursive
5509 * reference to itself, either directly or indirectly.
5510 *
5511 * [ WFC: Entity Declared ]
5512 * In a document without any DTD, a document with only an internal DTD
5513 * subset which contains no parameter entity references, or a document
5514 * with "standalone='yes'", ... ... The declaration of a parameter
5515 * entity must precede any reference to it...
5516 *
5517 * [ VC: Entity Declared ]
5518 * In a document with an external subset or external parameter entities
5519 * with "standalone='no'", ... ... The declaration of a parameter entity
5520 * must precede any reference to it...
5521 *
5522 * [ WFC: In DTD ]
5523 * Parameter-entity references may only appear in the DTD.
5524 * NOTE: misleading but this is handled.
5525 */
5526void
5527xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5528 xmlChar *name;
5529 xmlEntityPtr entity = NULL;
5530 xmlParserInputPtr input;
5531
5532 if (RAW == '%') {
5533 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005534 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005535 if (name == NULL) {
5536 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5538 ctxt->sax->error(ctxt->userData,
5539 "xmlParsePEReference: no name\n");
5540 ctxt->wellFormed = 0;
5541 ctxt->disableSAX = 1;
5542 } else {
5543 if (RAW == ';') {
5544 NEXT;
5545 if ((ctxt->sax != NULL) &&
5546 (ctxt->sax->getParameterEntity != NULL))
5547 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5548 name);
5549 if (entity == NULL) {
5550 /*
5551 * [ WFC: Entity Declared ]
5552 * In a document without any DTD, a document with only an
5553 * internal DTD subset which contains no parameter entity
5554 * references, or a document with "standalone='yes'", ...
5555 * ... The declaration of a parameter entity must precede
5556 * any reference to it...
5557 */
5558 if ((ctxt->standalone == 1) ||
5559 ((ctxt->hasExternalSubset == 0) &&
5560 (ctxt->hasPErefs == 0))) {
5561 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5562 if ((!ctxt->disableSAX) &&
5563 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5564 ctxt->sax->error(ctxt->userData,
5565 "PEReference: %%%s; not found\n", name);
5566 ctxt->wellFormed = 0;
5567 ctxt->disableSAX = 1;
5568 } else {
5569 /*
5570 * [ VC: Entity Declared ]
5571 * In a document with an external subset or external
5572 * parameter entities with "standalone='no'", ...
5573 * ... The declaration of a parameter entity must precede
5574 * any reference to it...
5575 */
5576 if ((!ctxt->disableSAX) &&
5577 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5578 ctxt->sax->warning(ctxt->userData,
5579 "PEReference: %%%s; not found\n", name);
5580 ctxt->valid = 0;
5581 }
5582 } else {
5583 /*
5584 * Internal checking in case the entity quest barfed
5585 */
5586 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5587 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5588 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5589 ctxt->sax->warning(ctxt->userData,
5590 "Internal: %%%s; is not a parameter entity\n", name);
5591 } else {
5592 /*
5593 * TODO !!!
5594 * handle the extra spaces added before and after
5595 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5596 */
5597 input = xmlNewEntityInputStream(ctxt, entity);
5598 xmlPushInput(ctxt, input);
5599 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5600 (RAW == '<') && (NXT(1) == '?') &&
5601 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5602 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5603 xmlParseTextDecl(ctxt);
5604 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5605 /*
5606 * The XML REC instructs us to stop parsing
5607 * right here
5608 */
5609 ctxt->instate = XML_PARSER_EOF;
5610 xmlFree(name);
5611 return;
5612 }
5613 }
5614 if (ctxt->token == 0)
5615 ctxt->token = ' ';
5616 }
5617 }
5618 ctxt->hasPErefs = 1;
5619 } else {
5620 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5622 ctxt->sax->error(ctxt->userData,
5623 "xmlParsePEReference: expecting ';'\n");
5624 ctxt->wellFormed = 0;
5625 ctxt->disableSAX = 1;
5626 }
5627 xmlFree(name);
5628 }
5629 }
5630}
5631
5632/**
5633 * xmlParseStringPEReference:
5634 * @ctxt: an XML parser context
5635 * @str: a pointer to an index in the string
5636 *
5637 * parse PEReference declarations
5638 *
5639 * [69] PEReference ::= '%' Name ';'
5640 *
5641 * [ WFC: No Recursion ]
5642 * A parsed entity must not contain a recursive
5643 * reference to itself, either directly or indirectly.
5644 *
5645 * [ WFC: Entity Declared ]
5646 * In a document without any DTD, a document with only an internal DTD
5647 * subset which contains no parameter entity references, or a document
5648 * with "standalone='yes'", ... ... The declaration of a parameter
5649 * entity must precede any reference to it...
5650 *
5651 * [ VC: Entity Declared ]
5652 * In a document with an external subset or external parameter entities
5653 * with "standalone='no'", ... ... The declaration of a parameter entity
5654 * must precede any reference to it...
5655 *
5656 * [ WFC: In DTD ]
5657 * Parameter-entity references may only appear in the DTD.
5658 * NOTE: misleading but this is handled.
5659 *
5660 * Returns the string of the entity content.
5661 * str is updated to the current value of the index
5662 */
5663xmlEntityPtr
5664xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5665 const xmlChar *ptr;
5666 xmlChar cur;
5667 xmlChar *name;
5668 xmlEntityPtr entity = NULL;
5669
5670 if ((str == NULL) || (*str == NULL)) return(NULL);
5671 ptr = *str;
5672 cur = *ptr;
5673 if (cur == '%') {
5674 ptr++;
5675 cur = *ptr;
5676 name = xmlParseStringName(ctxt, &ptr);
5677 if (name == NULL) {
5678 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5680 ctxt->sax->error(ctxt->userData,
5681 "xmlParseStringPEReference: no name\n");
5682 ctxt->wellFormed = 0;
5683 ctxt->disableSAX = 1;
5684 } else {
5685 cur = *ptr;
5686 if (cur == ';') {
5687 ptr++;
5688 cur = *ptr;
5689 if ((ctxt->sax != NULL) &&
5690 (ctxt->sax->getParameterEntity != NULL))
5691 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5692 name);
5693 if (entity == NULL) {
5694 /*
5695 * [ WFC: Entity Declared ]
5696 * In a document without any DTD, a document with only an
5697 * internal DTD subset which contains no parameter entity
5698 * references, or a document with "standalone='yes'", ...
5699 * ... The declaration of a parameter entity must precede
5700 * any reference to it...
5701 */
5702 if ((ctxt->standalone == 1) ||
5703 ((ctxt->hasExternalSubset == 0) &&
5704 (ctxt->hasPErefs == 0))) {
5705 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5707 ctxt->sax->error(ctxt->userData,
5708 "PEReference: %%%s; not found\n", name);
5709 ctxt->wellFormed = 0;
5710 ctxt->disableSAX = 1;
5711 } else {
5712 /*
5713 * [ VC: Entity Declared ]
5714 * In a document with an external subset or external
5715 * parameter entities with "standalone='no'", ...
5716 * ... The declaration of a parameter entity must
5717 * precede any reference to it...
5718 */
5719 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5720 ctxt->sax->warning(ctxt->userData,
5721 "PEReference: %%%s; not found\n", name);
5722 ctxt->valid = 0;
5723 }
5724 } else {
5725 /*
5726 * Internal checking in case the entity quest barfed
5727 */
5728 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5729 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5730 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5731 ctxt->sax->warning(ctxt->userData,
5732 "Internal: %%%s; is not a parameter entity\n", name);
5733 }
5734 }
5735 ctxt->hasPErefs = 1;
5736 } else {
5737 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5739 ctxt->sax->error(ctxt->userData,
5740 "xmlParseStringPEReference: expecting ';'\n");
5741 ctxt->wellFormed = 0;
5742 ctxt->disableSAX = 1;
5743 }
5744 xmlFree(name);
5745 }
5746 }
5747 *str = ptr;
5748 return(entity);
5749}
5750
5751/**
5752 * xmlParseDocTypeDecl:
5753 * @ctxt: an XML parser context
5754 *
5755 * parse a DOCTYPE declaration
5756 *
5757 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5758 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5759 *
5760 * [ VC: Root Element Type ]
5761 * The Name in the document type declaration must match the element
5762 * type of the root element.
5763 */
5764
5765void
5766xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5767 xmlChar *name = NULL;
5768 xmlChar *ExternalID = NULL;
5769 xmlChar *URI = NULL;
5770
5771 /*
5772 * We know that '<!DOCTYPE' has been detected.
5773 */
5774 SKIP(9);
5775
5776 SKIP_BLANKS;
5777
5778 /*
5779 * Parse the DOCTYPE name.
5780 */
5781 name = xmlParseName(ctxt);
5782 if (name == NULL) {
5783 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5785 ctxt->sax->error(ctxt->userData,
5786 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5787 ctxt->wellFormed = 0;
5788 ctxt->disableSAX = 1;
5789 }
5790 ctxt->intSubName = name;
5791
5792 SKIP_BLANKS;
5793
5794 /*
5795 * Check for SystemID and ExternalID
5796 */
5797 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5798
5799 if ((URI != NULL) || (ExternalID != NULL)) {
5800 ctxt->hasExternalSubset = 1;
5801 }
5802 ctxt->extSubURI = URI;
5803 ctxt->extSubSystem = ExternalID;
5804
5805 SKIP_BLANKS;
5806
5807 /*
5808 * Create and update the internal subset.
5809 */
5810 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5811 (!ctxt->disableSAX))
5812 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5813
5814 /*
5815 * Is there any internal subset declarations ?
5816 * they are handled separately in xmlParseInternalSubset()
5817 */
5818 if (RAW == '[')
5819 return;
5820
5821 /*
5822 * We should be at the end of the DOCTYPE declaration.
5823 */
5824 if (RAW != '>') {
5825 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5827 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5828 ctxt->wellFormed = 0;
5829 ctxt->disableSAX = 1;
5830 }
5831 NEXT;
5832}
5833
5834/**
5835 * xmlParseInternalsubset:
5836 * @ctxt: an XML parser context
5837 *
5838 * parse the internal subset declaration
5839 *
5840 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5841 */
5842
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005843static void
Owen Taylor3473f882001-02-23 17:55:21 +00005844xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5845 /*
5846 * Is there any DTD definition ?
5847 */
5848 if (RAW == '[') {
5849 ctxt->instate = XML_PARSER_DTD;
5850 NEXT;
5851 /*
5852 * Parse the succession of Markup declarations and
5853 * PEReferences.
5854 * Subsequence (markupdecl | PEReference | S)*
5855 */
5856 while (RAW != ']') {
5857 const xmlChar *check = CUR_PTR;
5858 int cons = ctxt->input->consumed;
5859
5860 SKIP_BLANKS;
5861 xmlParseMarkupDecl(ctxt);
5862 xmlParsePEReference(ctxt);
5863
5864 /*
5865 * Pop-up of finished entities.
5866 */
5867 while ((RAW == 0) && (ctxt->inputNr > 1))
5868 xmlPopInput(ctxt);
5869
5870 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5871 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5873 ctxt->sax->error(ctxt->userData,
5874 "xmlParseInternalSubset: error detected in Markup declaration\n");
5875 ctxt->wellFormed = 0;
5876 ctxt->disableSAX = 1;
5877 break;
5878 }
5879 }
5880 if (RAW == ']') {
5881 NEXT;
5882 SKIP_BLANKS;
5883 }
5884 }
5885
5886 /*
5887 * We should be at the end of the DOCTYPE declaration.
5888 */
5889 if (RAW != '>') {
5890 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5892 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5893 ctxt->wellFormed = 0;
5894 ctxt->disableSAX = 1;
5895 }
5896 NEXT;
5897}
5898
5899/**
5900 * xmlParseAttribute:
5901 * @ctxt: an XML parser context
5902 * @value: a xmlChar ** used to store the value of the attribute
5903 *
5904 * parse an attribute
5905 *
5906 * [41] Attribute ::= Name Eq AttValue
5907 *
5908 * [ WFC: No External Entity References ]
5909 * Attribute values cannot contain direct or indirect entity references
5910 * to external entities.
5911 *
5912 * [ WFC: No < in Attribute Values ]
5913 * The replacement text of any entity referred to directly or indirectly in
5914 * an attribute value (other than "&lt;") must not contain a <.
5915 *
5916 * [ VC: Attribute Value Type ]
5917 * The attribute must have been declared; the value must be of the type
5918 * declared for it.
5919 *
5920 * [25] Eq ::= S? '=' S?
5921 *
5922 * With namespace:
5923 *
5924 * [NS 11] Attribute ::= QName Eq AttValue
5925 *
5926 * Also the case QName == xmlns:??? is handled independently as a namespace
5927 * definition.
5928 *
5929 * Returns the attribute name, and the value in *value.
5930 */
5931
5932xmlChar *
5933xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5934 xmlChar *name, *val;
5935
5936 *value = NULL;
5937 name = xmlParseName(ctxt);
5938 if (name == NULL) {
5939 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5941 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5942 ctxt->wellFormed = 0;
5943 ctxt->disableSAX = 1;
5944 return(NULL);
5945 }
5946
5947 /*
5948 * read the value
5949 */
5950 SKIP_BLANKS;
5951 if (RAW == '=') {
5952 NEXT;
5953 SKIP_BLANKS;
5954 val = xmlParseAttValue(ctxt);
5955 ctxt->instate = XML_PARSER_CONTENT;
5956 } else {
5957 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5959 ctxt->sax->error(ctxt->userData,
5960 "Specification mandate value for attribute %s\n", name);
5961 ctxt->wellFormed = 0;
5962 ctxt->disableSAX = 1;
5963 xmlFree(name);
5964 return(NULL);
5965 }
5966
5967 /*
5968 * Check that xml:lang conforms to the specification
5969 * No more registered as an error, just generate a warning now
5970 * since this was deprecated in XML second edition
5971 */
5972 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5973 if (!xmlCheckLanguageID(val)) {
5974 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5975 ctxt->sax->warning(ctxt->userData,
5976 "Malformed value for xml:lang : %s\n", val);
5977 }
5978 }
5979
5980 /*
5981 * Check that xml:space conforms to the specification
5982 */
5983 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5984 if (xmlStrEqual(val, BAD_CAST "default"))
5985 *(ctxt->space) = 0;
5986 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5987 *(ctxt->space) = 1;
5988 else {
5989 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5991 ctxt->sax->error(ctxt->userData,
5992"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5993 val);
5994 ctxt->wellFormed = 0;
5995 ctxt->disableSAX = 1;
5996 }
5997 }
5998
5999 *value = val;
6000 return(name);
6001}
6002
6003/**
6004 * xmlParseStartTag:
6005 * @ctxt: an XML parser context
6006 *
6007 * parse a start of tag either for rule element or
6008 * EmptyElement. In both case we don't parse the tag closing chars.
6009 *
6010 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6011 *
6012 * [ WFC: Unique Att Spec ]
6013 * No attribute name may appear more than once in the same start-tag or
6014 * empty-element tag.
6015 *
6016 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6017 *
6018 * [ WFC: Unique Att Spec ]
6019 * No attribute name may appear more than once in the same start-tag or
6020 * empty-element tag.
6021 *
6022 * With namespace:
6023 *
6024 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6025 *
6026 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6027 *
6028 * Returns the element name parsed
6029 */
6030
6031xmlChar *
6032xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6033 xmlChar *name;
6034 xmlChar *attname;
6035 xmlChar *attvalue;
6036 const xmlChar **atts = NULL;
6037 int nbatts = 0;
6038 int maxatts = 0;
6039 int i;
6040
6041 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006042 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006043
6044 name = xmlParseName(ctxt);
6045 if (name == NULL) {
6046 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6048 ctxt->sax->error(ctxt->userData,
6049 "xmlParseStartTag: invalid element name\n");
6050 ctxt->wellFormed = 0;
6051 ctxt->disableSAX = 1;
6052 return(NULL);
6053 }
6054
6055 /*
6056 * Now parse the attributes, it ends up with the ending
6057 *
6058 * (S Attribute)* S?
6059 */
6060 SKIP_BLANKS;
6061 GROW;
6062
Daniel Veillard21a0f912001-02-25 19:54:14 +00006063 while ((RAW != '>') &&
6064 ((RAW != '/') || (NXT(1) != '>')) &&
6065 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006066 const xmlChar *q = CUR_PTR;
6067 int cons = ctxt->input->consumed;
6068
6069 attname = xmlParseAttribute(ctxt, &attvalue);
6070 if ((attname != NULL) && (attvalue != NULL)) {
6071 /*
6072 * [ WFC: Unique Att Spec ]
6073 * No attribute name may appear more than once in the same
6074 * start-tag or empty-element tag.
6075 */
6076 for (i = 0; i < nbatts;i += 2) {
6077 if (xmlStrEqual(atts[i], attname)) {
6078 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6080 ctxt->sax->error(ctxt->userData,
6081 "Attribute %s redefined\n",
6082 attname);
6083 ctxt->wellFormed = 0;
6084 ctxt->disableSAX = 1;
6085 xmlFree(attname);
6086 xmlFree(attvalue);
6087 goto failed;
6088 }
6089 }
6090
6091 /*
6092 * Add the pair to atts
6093 */
6094 if (atts == NULL) {
6095 maxatts = 10;
6096 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6097 if (atts == NULL) {
6098 xmlGenericError(xmlGenericErrorContext,
6099 "malloc of %ld byte failed\n",
6100 maxatts * (long)sizeof(xmlChar *));
6101 return(NULL);
6102 }
6103 } else if (nbatts + 4 > maxatts) {
6104 maxatts *= 2;
6105 atts = (const xmlChar **) xmlRealloc((void *) atts,
6106 maxatts * sizeof(xmlChar *));
6107 if (atts == NULL) {
6108 xmlGenericError(xmlGenericErrorContext,
6109 "realloc of %ld byte failed\n",
6110 maxatts * (long)sizeof(xmlChar *));
6111 return(NULL);
6112 }
6113 }
6114 atts[nbatts++] = attname;
6115 atts[nbatts++] = attvalue;
6116 atts[nbatts] = NULL;
6117 atts[nbatts + 1] = NULL;
6118 } else {
6119 if (attname != NULL)
6120 xmlFree(attname);
6121 if (attvalue != NULL)
6122 xmlFree(attvalue);
6123 }
6124
6125failed:
6126
6127 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6128 break;
6129 if (!IS_BLANK(RAW)) {
6130 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6132 ctxt->sax->error(ctxt->userData,
6133 "attributes construct error\n");
6134 ctxt->wellFormed = 0;
6135 ctxt->disableSAX = 1;
6136 }
6137 SKIP_BLANKS;
6138 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6139 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6141 ctxt->sax->error(ctxt->userData,
6142 "xmlParseStartTag: problem parsing attributes\n");
6143 ctxt->wellFormed = 0;
6144 ctxt->disableSAX = 1;
6145 break;
6146 }
6147 GROW;
6148 }
6149
6150 /*
6151 * SAX: Start of Element !
6152 */
6153 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6154 (!ctxt->disableSAX))
6155 ctxt->sax->startElement(ctxt->userData, name, atts);
6156
6157 if (atts != NULL) {
6158 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6159 xmlFree((void *) atts);
6160 }
6161 return(name);
6162}
6163
6164/**
6165 * xmlParseEndTag:
6166 * @ctxt: an XML parser context
6167 *
6168 * parse an end of tag
6169 *
6170 * [42] ETag ::= '</' Name S? '>'
6171 *
6172 * With namespace
6173 *
6174 * [NS 9] ETag ::= '</' QName S? '>'
6175 */
6176
6177void
6178xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6179 xmlChar *name;
6180 xmlChar *oldname;
6181
6182 GROW;
6183 if ((RAW != '<') || (NXT(1) != '/')) {
6184 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6186 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6187 ctxt->wellFormed = 0;
6188 ctxt->disableSAX = 1;
6189 return;
6190 }
6191 SKIP(2);
6192
6193 name = xmlParseName(ctxt);
6194
6195 /*
6196 * We should definitely be at the ending "S? '>'" part
6197 */
6198 GROW;
6199 SKIP_BLANKS;
6200 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6201 ctxt->errNo = XML_ERR_GT_REQUIRED;
6202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6203 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6204 ctxt->wellFormed = 0;
6205 ctxt->disableSAX = 1;
6206 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006207 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006208
6209 /*
6210 * [ WFC: Element Type Match ]
6211 * The Name in an element's end-tag must match the element type in the
6212 * start-tag.
6213 *
6214 */
6215 if ((name == NULL) || (ctxt->name == NULL) ||
6216 (!xmlStrEqual(name, ctxt->name))) {
6217 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6219 if ((name != NULL) && (ctxt->name != NULL)) {
6220 ctxt->sax->error(ctxt->userData,
6221 "Opening and ending tag mismatch: %s and %s\n",
6222 ctxt->name, name);
6223 } else if (ctxt->name != NULL) {
6224 ctxt->sax->error(ctxt->userData,
6225 "Ending tag eror for: %s\n", ctxt->name);
6226 } else {
6227 ctxt->sax->error(ctxt->userData,
6228 "Ending tag error: internal error ???\n");
6229 }
6230
6231 }
6232 ctxt->wellFormed = 0;
6233 ctxt->disableSAX = 1;
6234 }
6235
6236 /*
6237 * SAX: End of Tag
6238 */
6239 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6240 (!ctxt->disableSAX))
6241 ctxt->sax->endElement(ctxt->userData, name);
6242
6243 if (name != NULL)
6244 xmlFree(name);
6245 oldname = namePop(ctxt);
6246 spacePop(ctxt);
6247 if (oldname != NULL) {
6248#ifdef DEBUG_STACK
6249 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6250#endif
6251 xmlFree(oldname);
6252 }
6253 return;
6254}
6255
6256/**
6257 * xmlParseCDSect:
6258 * @ctxt: an XML parser context
6259 *
6260 * Parse escaped pure raw content.
6261 *
6262 * [18] CDSect ::= CDStart CData CDEnd
6263 *
6264 * [19] CDStart ::= '<![CDATA['
6265 *
6266 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6267 *
6268 * [21] CDEnd ::= ']]>'
6269 */
6270void
6271xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6272 xmlChar *buf = NULL;
6273 int len = 0;
6274 int size = XML_PARSER_BUFFER_SIZE;
6275 int r, rl;
6276 int s, sl;
6277 int cur, l;
6278 int count = 0;
6279
6280 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6281 (NXT(2) == '[') && (NXT(3) == 'C') &&
6282 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6283 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6284 (NXT(8) == '[')) {
6285 SKIP(9);
6286 } else
6287 return;
6288
6289 ctxt->instate = XML_PARSER_CDATA_SECTION;
6290 r = CUR_CHAR(rl);
6291 if (!IS_CHAR(r)) {
6292 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6294 ctxt->sax->error(ctxt->userData,
6295 "CData section not finished\n");
6296 ctxt->wellFormed = 0;
6297 ctxt->disableSAX = 1;
6298 ctxt->instate = XML_PARSER_CONTENT;
6299 return;
6300 }
6301 NEXTL(rl);
6302 s = CUR_CHAR(sl);
6303 if (!IS_CHAR(s)) {
6304 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6306 ctxt->sax->error(ctxt->userData,
6307 "CData section not finished\n");
6308 ctxt->wellFormed = 0;
6309 ctxt->disableSAX = 1;
6310 ctxt->instate = XML_PARSER_CONTENT;
6311 return;
6312 }
6313 NEXTL(sl);
6314 cur = CUR_CHAR(l);
6315 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6316 if (buf == NULL) {
6317 xmlGenericError(xmlGenericErrorContext,
6318 "malloc of %d byte failed\n", size);
6319 return;
6320 }
6321 while (IS_CHAR(cur) &&
6322 ((r != ']') || (s != ']') || (cur != '>'))) {
6323 if (len + 5 >= size) {
6324 size *= 2;
6325 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6326 if (buf == NULL) {
6327 xmlGenericError(xmlGenericErrorContext,
6328 "realloc of %d byte failed\n", size);
6329 return;
6330 }
6331 }
6332 COPY_BUF(rl,buf,len,r);
6333 r = s;
6334 rl = sl;
6335 s = cur;
6336 sl = l;
6337 count++;
6338 if (count > 50) {
6339 GROW;
6340 count = 0;
6341 }
6342 NEXTL(l);
6343 cur = CUR_CHAR(l);
6344 }
6345 buf[len] = 0;
6346 ctxt->instate = XML_PARSER_CONTENT;
6347 if (cur != '>') {
6348 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6350 ctxt->sax->error(ctxt->userData,
6351 "CData section not finished\n%.50s\n", buf);
6352 ctxt->wellFormed = 0;
6353 ctxt->disableSAX = 1;
6354 xmlFree(buf);
6355 return;
6356 }
6357 NEXTL(l);
6358
6359 /*
6360 * Ok the buffer is to be consumed as cdata.
6361 */
6362 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6363 if (ctxt->sax->cdataBlock != NULL)
6364 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6365 }
6366 xmlFree(buf);
6367}
6368
6369/**
6370 * xmlParseContent:
6371 * @ctxt: an XML parser context
6372 *
6373 * Parse a content:
6374 *
6375 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6376 */
6377
6378void
6379xmlParseContent(xmlParserCtxtPtr ctxt) {
6380 GROW;
6381 while (((RAW != 0) || (ctxt->token != 0)) &&
6382 ((RAW != '<') || (NXT(1) != '/'))) {
6383 const xmlChar *test = CUR_PTR;
6384 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006385 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006386 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006387
6388 /*
6389 * Handle possible processed charrefs.
6390 */
6391 if (ctxt->token != 0) {
6392 xmlParseCharData(ctxt, 0);
6393 }
6394 /*
6395 * First case : a Processing Instruction.
6396 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006397 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006398 xmlParsePI(ctxt);
6399 }
6400
6401 /*
6402 * Second case : a CDSection
6403 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006404 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006405 (NXT(2) == '[') && (NXT(3) == 'C') &&
6406 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6407 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6408 (NXT(8) == '[')) {
6409 xmlParseCDSect(ctxt);
6410 }
6411
6412 /*
6413 * Third case : a comment
6414 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006415 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006416 (NXT(2) == '-') && (NXT(3) == '-')) {
6417 xmlParseComment(ctxt);
6418 ctxt->instate = XML_PARSER_CONTENT;
6419 }
6420
6421 /*
6422 * Fourth case : a sub-element.
6423 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006424 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006425 xmlParseElement(ctxt);
6426 }
6427
6428 /*
6429 * Fifth case : a reference. If if has not been resolved,
6430 * parsing returns it's Name, create the node
6431 */
6432
Daniel Veillard21a0f912001-02-25 19:54:14 +00006433 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006434 xmlParseReference(ctxt);
6435 }
6436
6437 /*
6438 * Last case, text. Note that References are handled directly.
6439 */
6440 else {
6441 xmlParseCharData(ctxt, 0);
6442 }
6443
6444 GROW;
6445 /*
6446 * Pop-up of finished entities.
6447 */
6448 while ((RAW == 0) && (ctxt->inputNr > 1))
6449 xmlPopInput(ctxt);
6450 SHRINK;
6451
6452 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6453 (tok == ctxt->token)) {
6454 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6456 ctxt->sax->error(ctxt->userData,
6457 "detected an error in element content\n");
6458 ctxt->wellFormed = 0;
6459 ctxt->disableSAX = 1;
6460 ctxt->instate = XML_PARSER_EOF;
6461 break;
6462 }
6463 }
6464}
6465
6466/**
6467 * xmlParseElement:
6468 * @ctxt: an XML parser context
6469 *
6470 * parse an XML element, this is highly recursive
6471 *
6472 * [39] element ::= EmptyElemTag | STag content ETag
6473 *
6474 * [ WFC: Element Type Match ]
6475 * The Name in an element's end-tag must match the element type in the
6476 * start-tag.
6477 *
6478 * [ VC: Element Valid ]
6479 * An element is valid if there is a declaration matching elementdecl
6480 * where the Name matches the element type and one of the following holds:
6481 * - The declaration matches EMPTY and the element has no content.
6482 * - The declaration matches children and the sequence of child elements
6483 * belongs to the language generated by the regular expression in the
6484 * content model, with optional white space (characters matching the
6485 * nonterminal S) between each pair of child elements.
6486 * - The declaration matches Mixed and the content consists of character
6487 * data and child elements whose types match names in the content model.
6488 * - The declaration matches ANY, and the types of any child elements have
6489 * been declared.
6490 */
6491
6492void
6493xmlParseElement(xmlParserCtxtPtr ctxt) {
6494 const xmlChar *openTag = CUR_PTR;
6495 xmlChar *name;
6496 xmlChar *oldname;
6497 xmlParserNodeInfo node_info;
6498 xmlNodePtr ret;
6499
6500 /* Capture start position */
6501 if (ctxt->record_info) {
6502 node_info.begin_pos = ctxt->input->consumed +
6503 (CUR_PTR - ctxt->input->base);
6504 node_info.begin_line = ctxt->input->line;
6505 }
6506
6507 if (ctxt->spaceNr == 0)
6508 spacePush(ctxt, -1);
6509 else
6510 spacePush(ctxt, *ctxt->space);
6511
6512 name = xmlParseStartTag(ctxt);
6513 if (name == NULL) {
6514 spacePop(ctxt);
6515 return;
6516 }
6517 namePush(ctxt, name);
6518 ret = ctxt->node;
6519
6520 /*
6521 * [ VC: Root Element Type ]
6522 * The Name in the document type declaration must match the element
6523 * type of the root element.
6524 */
6525 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6526 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6527 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6528
6529 /*
6530 * Check for an Empty Element.
6531 */
6532 if ((RAW == '/') && (NXT(1) == '>')) {
6533 SKIP(2);
6534 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6535 (!ctxt->disableSAX))
6536 ctxt->sax->endElement(ctxt->userData, name);
6537 oldname = namePop(ctxt);
6538 spacePop(ctxt);
6539 if (oldname != NULL) {
6540#ifdef DEBUG_STACK
6541 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6542#endif
6543 xmlFree(oldname);
6544 }
6545 if ( ret != NULL && ctxt->record_info ) {
6546 node_info.end_pos = ctxt->input->consumed +
6547 (CUR_PTR - ctxt->input->base);
6548 node_info.end_line = ctxt->input->line;
6549 node_info.node = ret;
6550 xmlParserAddNodeInfo(ctxt, &node_info);
6551 }
6552 return;
6553 }
6554 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006555 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006556 } else {
6557 ctxt->errNo = XML_ERR_GT_REQUIRED;
6558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6559 ctxt->sax->error(ctxt->userData,
6560 "Couldn't find end of Start Tag\n%.30s\n",
6561 openTag);
6562 ctxt->wellFormed = 0;
6563 ctxt->disableSAX = 1;
6564
6565 /*
6566 * end of parsing of this node.
6567 */
6568 nodePop(ctxt);
6569 oldname = namePop(ctxt);
6570 spacePop(ctxt);
6571 if (oldname != NULL) {
6572#ifdef DEBUG_STACK
6573 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6574#endif
6575 xmlFree(oldname);
6576 }
6577
6578 /*
6579 * Capture end position and add node
6580 */
6581 if ( ret != NULL && ctxt->record_info ) {
6582 node_info.end_pos = ctxt->input->consumed +
6583 (CUR_PTR - ctxt->input->base);
6584 node_info.end_line = ctxt->input->line;
6585 node_info.node = ret;
6586 xmlParserAddNodeInfo(ctxt, &node_info);
6587 }
6588 return;
6589 }
6590
6591 /*
6592 * Parse the content of the element:
6593 */
6594 xmlParseContent(ctxt);
6595 if (!IS_CHAR(RAW)) {
6596 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6598 ctxt->sax->error(ctxt->userData,
6599 "Premature end of data in tag %.30s\n", openTag);
6600 ctxt->wellFormed = 0;
6601 ctxt->disableSAX = 1;
6602
6603 /*
6604 * end of parsing of this node.
6605 */
6606 nodePop(ctxt);
6607 oldname = namePop(ctxt);
6608 spacePop(ctxt);
6609 if (oldname != NULL) {
6610#ifdef DEBUG_STACK
6611 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6612#endif
6613 xmlFree(oldname);
6614 }
6615 return;
6616 }
6617
6618 /*
6619 * parse the end of tag: '</' should be here.
6620 */
6621 xmlParseEndTag(ctxt);
6622
6623 /*
6624 * Capture end position and add node
6625 */
6626 if ( ret != NULL && ctxt->record_info ) {
6627 node_info.end_pos = ctxt->input->consumed +
6628 (CUR_PTR - ctxt->input->base);
6629 node_info.end_line = ctxt->input->line;
6630 node_info.node = ret;
6631 xmlParserAddNodeInfo(ctxt, &node_info);
6632 }
6633}
6634
6635/**
6636 * xmlParseVersionNum:
6637 * @ctxt: an XML parser context
6638 *
6639 * parse the XML version value.
6640 *
6641 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6642 *
6643 * Returns the string giving the XML version number, or NULL
6644 */
6645xmlChar *
6646xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6647 xmlChar *buf = NULL;
6648 int len = 0;
6649 int size = 10;
6650 xmlChar cur;
6651
6652 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6653 if (buf == NULL) {
6654 xmlGenericError(xmlGenericErrorContext,
6655 "malloc of %d byte failed\n", size);
6656 return(NULL);
6657 }
6658 cur = CUR;
6659 while (((cur >= 'a') && (cur <= 'z')) ||
6660 ((cur >= 'A') && (cur <= 'Z')) ||
6661 ((cur >= '0') && (cur <= '9')) ||
6662 (cur == '_') || (cur == '.') ||
6663 (cur == ':') || (cur == '-')) {
6664 if (len + 1 >= size) {
6665 size *= 2;
6666 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6667 if (buf == NULL) {
6668 xmlGenericError(xmlGenericErrorContext,
6669 "realloc of %d byte failed\n", size);
6670 return(NULL);
6671 }
6672 }
6673 buf[len++] = cur;
6674 NEXT;
6675 cur=CUR;
6676 }
6677 buf[len] = 0;
6678 return(buf);
6679}
6680
6681/**
6682 * xmlParseVersionInfo:
6683 * @ctxt: an XML parser context
6684 *
6685 * parse the XML version.
6686 *
6687 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6688 *
6689 * [25] Eq ::= S? '=' S?
6690 *
6691 * Returns the version string, e.g. "1.0"
6692 */
6693
6694xmlChar *
6695xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6696 xmlChar *version = NULL;
6697 const xmlChar *q;
6698
6699 if ((RAW == 'v') && (NXT(1) == 'e') &&
6700 (NXT(2) == 'r') && (NXT(3) == 's') &&
6701 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6702 (NXT(6) == 'n')) {
6703 SKIP(7);
6704 SKIP_BLANKS;
6705 if (RAW != '=') {
6706 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6708 ctxt->sax->error(ctxt->userData,
6709 "xmlParseVersionInfo : expected '='\n");
6710 ctxt->wellFormed = 0;
6711 ctxt->disableSAX = 1;
6712 return(NULL);
6713 }
6714 NEXT;
6715 SKIP_BLANKS;
6716 if (RAW == '"') {
6717 NEXT;
6718 q = CUR_PTR;
6719 version = xmlParseVersionNum(ctxt);
6720 if (RAW != '"') {
6721 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6723 ctxt->sax->error(ctxt->userData,
6724 "String not closed\n%.50s\n", q);
6725 ctxt->wellFormed = 0;
6726 ctxt->disableSAX = 1;
6727 } else
6728 NEXT;
6729 } else if (RAW == '\''){
6730 NEXT;
6731 q = CUR_PTR;
6732 version = xmlParseVersionNum(ctxt);
6733 if (RAW != '\'') {
6734 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6736 ctxt->sax->error(ctxt->userData,
6737 "String not closed\n%.50s\n", q);
6738 ctxt->wellFormed = 0;
6739 ctxt->disableSAX = 1;
6740 } else
6741 NEXT;
6742 } else {
6743 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6745 ctxt->sax->error(ctxt->userData,
6746 "xmlParseVersionInfo : expected ' or \"\n");
6747 ctxt->wellFormed = 0;
6748 ctxt->disableSAX = 1;
6749 }
6750 }
6751 return(version);
6752}
6753
6754/**
6755 * xmlParseEncName:
6756 * @ctxt: an XML parser context
6757 *
6758 * parse the XML encoding name
6759 *
6760 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6761 *
6762 * Returns the encoding name value or NULL
6763 */
6764xmlChar *
6765xmlParseEncName(xmlParserCtxtPtr ctxt) {
6766 xmlChar *buf = NULL;
6767 int len = 0;
6768 int size = 10;
6769 xmlChar cur;
6770
6771 cur = CUR;
6772 if (((cur >= 'a') && (cur <= 'z')) ||
6773 ((cur >= 'A') && (cur <= 'Z'))) {
6774 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6775 if (buf == NULL) {
6776 xmlGenericError(xmlGenericErrorContext,
6777 "malloc of %d byte failed\n", size);
6778 return(NULL);
6779 }
6780
6781 buf[len++] = cur;
6782 NEXT;
6783 cur = CUR;
6784 while (((cur >= 'a') && (cur <= 'z')) ||
6785 ((cur >= 'A') && (cur <= 'Z')) ||
6786 ((cur >= '0') && (cur <= '9')) ||
6787 (cur == '.') || (cur == '_') ||
6788 (cur == '-')) {
6789 if (len + 1 >= size) {
6790 size *= 2;
6791 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6792 if (buf == NULL) {
6793 xmlGenericError(xmlGenericErrorContext,
6794 "realloc of %d byte failed\n", size);
6795 return(NULL);
6796 }
6797 }
6798 buf[len++] = cur;
6799 NEXT;
6800 cur = CUR;
6801 if (cur == 0) {
6802 SHRINK;
6803 GROW;
6804 cur = CUR;
6805 }
6806 }
6807 buf[len] = 0;
6808 } else {
6809 ctxt->errNo = XML_ERR_ENCODING_NAME;
6810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6811 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6812 ctxt->wellFormed = 0;
6813 ctxt->disableSAX = 1;
6814 }
6815 return(buf);
6816}
6817
6818/**
6819 * xmlParseEncodingDecl:
6820 * @ctxt: an XML parser context
6821 *
6822 * parse the XML encoding declaration
6823 *
6824 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6825 *
6826 * this setups the conversion filters.
6827 *
6828 * Returns the encoding value or NULL
6829 */
6830
6831xmlChar *
6832xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6833 xmlChar *encoding = NULL;
6834 const xmlChar *q;
6835
6836 SKIP_BLANKS;
6837 if ((RAW == 'e') && (NXT(1) == 'n') &&
6838 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6839 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6840 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6841 SKIP(8);
6842 SKIP_BLANKS;
6843 if (RAW != '=') {
6844 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6846 ctxt->sax->error(ctxt->userData,
6847 "xmlParseEncodingDecl : expected '='\n");
6848 ctxt->wellFormed = 0;
6849 ctxt->disableSAX = 1;
6850 return(NULL);
6851 }
6852 NEXT;
6853 SKIP_BLANKS;
6854 if (RAW == '"') {
6855 NEXT;
6856 q = CUR_PTR;
6857 encoding = xmlParseEncName(ctxt);
6858 if (RAW != '"') {
6859 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6861 ctxt->sax->error(ctxt->userData,
6862 "String not closed\n%.50s\n", q);
6863 ctxt->wellFormed = 0;
6864 ctxt->disableSAX = 1;
6865 } else
6866 NEXT;
6867 } else if (RAW == '\''){
6868 NEXT;
6869 q = CUR_PTR;
6870 encoding = xmlParseEncName(ctxt);
6871 if (RAW != '\'') {
6872 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6874 ctxt->sax->error(ctxt->userData,
6875 "String not closed\n%.50s\n", q);
6876 ctxt->wellFormed = 0;
6877 ctxt->disableSAX = 1;
6878 } else
6879 NEXT;
6880 } else if (RAW == '"'){
6881 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6883 ctxt->sax->error(ctxt->userData,
6884 "xmlParseEncodingDecl : expected ' or \"\n");
6885 ctxt->wellFormed = 0;
6886 ctxt->disableSAX = 1;
6887 }
6888 if (encoding != NULL) {
6889 xmlCharEncoding enc;
6890 xmlCharEncodingHandlerPtr handler;
6891
6892 if (ctxt->input->encoding != NULL)
6893 xmlFree((xmlChar *) ctxt->input->encoding);
6894 ctxt->input->encoding = encoding;
6895
6896 enc = xmlParseCharEncoding((const char *) encoding);
6897 /*
6898 * registered set of known encodings
6899 */
6900 if (enc != XML_CHAR_ENCODING_ERROR) {
6901 xmlSwitchEncoding(ctxt, enc);
6902 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6903 xmlFree(encoding);
6904 return(NULL);
6905 }
6906 } else {
6907 /*
6908 * fallback for unknown encodings
6909 */
6910 handler = xmlFindCharEncodingHandler((const char *) encoding);
6911 if (handler != NULL) {
6912 xmlSwitchToEncoding(ctxt, handler);
6913 } else {
6914 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6916 ctxt->sax->error(ctxt->userData,
6917 "Unsupported encoding %s\n", encoding);
6918 return(NULL);
6919 }
6920 }
6921 }
6922 }
6923 return(encoding);
6924}
6925
6926/**
6927 * xmlParseSDDecl:
6928 * @ctxt: an XML parser context
6929 *
6930 * parse the XML standalone declaration
6931 *
6932 * [32] SDDecl ::= S 'standalone' Eq
6933 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6934 *
6935 * [ VC: Standalone Document Declaration ]
6936 * TODO The standalone document declaration must have the value "no"
6937 * if any external markup declarations contain declarations of:
6938 * - attributes with default values, if elements to which these
6939 * attributes apply appear in the document without specifications
6940 * of values for these attributes, or
6941 * - entities (other than amp, lt, gt, apos, quot), if references
6942 * to those entities appear in the document, or
6943 * - attributes with values subject to normalization, where the
6944 * attribute appears in the document with a value which will change
6945 * as a result of normalization, or
6946 * - element types with element content, if white space occurs directly
6947 * within any instance of those types.
6948 *
6949 * Returns 1 if standalone, 0 otherwise
6950 */
6951
6952int
6953xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6954 int standalone = -1;
6955
6956 SKIP_BLANKS;
6957 if ((RAW == 's') && (NXT(1) == 't') &&
6958 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6959 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6960 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6961 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6962 SKIP(10);
6963 SKIP_BLANKS;
6964 if (RAW != '=') {
6965 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6967 ctxt->sax->error(ctxt->userData,
6968 "XML standalone declaration : expected '='\n");
6969 ctxt->wellFormed = 0;
6970 ctxt->disableSAX = 1;
6971 return(standalone);
6972 }
6973 NEXT;
6974 SKIP_BLANKS;
6975 if (RAW == '\''){
6976 NEXT;
6977 if ((RAW == 'n') && (NXT(1) == 'o')) {
6978 standalone = 0;
6979 SKIP(2);
6980 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6981 (NXT(2) == 's')) {
6982 standalone = 1;
6983 SKIP(3);
6984 } else {
6985 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6987 ctxt->sax->error(ctxt->userData,
6988 "standalone accepts only 'yes' or 'no'\n");
6989 ctxt->wellFormed = 0;
6990 ctxt->disableSAX = 1;
6991 }
6992 if (RAW != '\'') {
6993 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6995 ctxt->sax->error(ctxt->userData, "String not closed\n");
6996 ctxt->wellFormed = 0;
6997 ctxt->disableSAX = 1;
6998 } else
6999 NEXT;
7000 } else if (RAW == '"'){
7001 NEXT;
7002 if ((RAW == 'n') && (NXT(1) == 'o')) {
7003 standalone = 0;
7004 SKIP(2);
7005 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7006 (NXT(2) == 's')) {
7007 standalone = 1;
7008 SKIP(3);
7009 } else {
7010 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7012 ctxt->sax->error(ctxt->userData,
7013 "standalone accepts only 'yes' or 'no'\n");
7014 ctxt->wellFormed = 0;
7015 ctxt->disableSAX = 1;
7016 }
7017 if (RAW != '"') {
7018 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7020 ctxt->sax->error(ctxt->userData, "String not closed\n");
7021 ctxt->wellFormed = 0;
7022 ctxt->disableSAX = 1;
7023 } else
7024 NEXT;
7025 } else {
7026 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7028 ctxt->sax->error(ctxt->userData,
7029 "Standalone value not found\n");
7030 ctxt->wellFormed = 0;
7031 ctxt->disableSAX = 1;
7032 }
7033 }
7034 return(standalone);
7035}
7036
7037/**
7038 * xmlParseXMLDecl:
7039 * @ctxt: an XML parser context
7040 *
7041 * parse an XML declaration header
7042 *
7043 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7044 */
7045
7046void
7047xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7048 xmlChar *version;
7049
7050 /*
7051 * We know that '<?xml' is here.
7052 */
7053 SKIP(5);
7054
7055 if (!IS_BLANK(RAW)) {
7056 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7058 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7059 ctxt->wellFormed = 0;
7060 ctxt->disableSAX = 1;
7061 }
7062 SKIP_BLANKS;
7063
7064 /*
7065 * We should have the VersionInfo here.
7066 */
7067 version = xmlParseVersionInfo(ctxt);
7068 if (version == NULL)
7069 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7070 ctxt->version = xmlStrdup(version);
7071 xmlFree(version);
7072
7073 /*
7074 * We may have the encoding declaration
7075 */
7076 if (!IS_BLANK(RAW)) {
7077 if ((RAW == '?') && (NXT(1) == '>')) {
7078 SKIP(2);
7079 return;
7080 }
7081 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7082 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7083 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7084 ctxt->wellFormed = 0;
7085 ctxt->disableSAX = 1;
7086 }
7087 xmlParseEncodingDecl(ctxt);
7088 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7089 /*
7090 * The XML REC instructs us to stop parsing right here
7091 */
7092 return;
7093 }
7094
7095 /*
7096 * We may have the standalone status.
7097 */
7098 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7099 if ((RAW == '?') && (NXT(1) == '>')) {
7100 SKIP(2);
7101 return;
7102 }
7103 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7105 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7106 ctxt->wellFormed = 0;
7107 ctxt->disableSAX = 1;
7108 }
7109 SKIP_BLANKS;
7110 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7111
7112 SKIP_BLANKS;
7113 if ((RAW == '?') && (NXT(1) == '>')) {
7114 SKIP(2);
7115 } else if (RAW == '>') {
7116 /* Deprecated old WD ... */
7117 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7119 ctxt->sax->error(ctxt->userData,
7120 "XML declaration must end-up with '?>'\n");
7121 ctxt->wellFormed = 0;
7122 ctxt->disableSAX = 1;
7123 NEXT;
7124 } else {
7125 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7127 ctxt->sax->error(ctxt->userData,
7128 "parsing XML declaration: '?>' expected\n");
7129 ctxt->wellFormed = 0;
7130 ctxt->disableSAX = 1;
7131 MOVETO_ENDTAG(CUR_PTR);
7132 NEXT;
7133 }
7134}
7135
7136/**
7137 * xmlParseMisc:
7138 * @ctxt: an XML parser context
7139 *
7140 * parse an XML Misc* optionnal field.
7141 *
7142 * [27] Misc ::= Comment | PI | S
7143 */
7144
7145void
7146xmlParseMisc(xmlParserCtxtPtr ctxt) {
7147 while (((RAW == '<') && (NXT(1) == '?')) ||
7148 ((RAW == '<') && (NXT(1) == '!') &&
7149 (NXT(2) == '-') && (NXT(3) == '-')) ||
7150 IS_BLANK(CUR)) {
7151 if ((RAW == '<') && (NXT(1) == '?')) {
7152 xmlParsePI(ctxt);
7153 } else if (IS_BLANK(CUR)) {
7154 NEXT;
7155 } else
7156 xmlParseComment(ctxt);
7157 }
7158}
7159
7160/**
7161 * xmlParseDocument:
7162 * @ctxt: an XML parser context
7163 *
7164 * parse an XML document (and build a tree if using the standard SAX
7165 * interface).
7166 *
7167 * [1] document ::= prolog element Misc*
7168 *
7169 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7170 *
7171 * Returns 0, -1 in case of error. the parser context is augmented
7172 * as a result of the parsing.
7173 */
7174
7175int
7176xmlParseDocument(xmlParserCtxtPtr ctxt) {
7177 xmlChar start[4];
7178 xmlCharEncoding enc;
7179
7180 xmlInitParser();
7181
7182 GROW;
7183
7184 /*
7185 * SAX: beginning of the document processing.
7186 */
7187 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7188 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7189
7190 /*
7191 * Get the 4 first bytes and decode the charset
7192 * if enc != XML_CHAR_ENCODING_NONE
7193 * plug some encoding conversion routines.
7194 */
7195 start[0] = RAW;
7196 start[1] = NXT(1);
7197 start[2] = NXT(2);
7198 start[3] = NXT(3);
7199 enc = xmlDetectCharEncoding(start, 4);
7200 if (enc != XML_CHAR_ENCODING_NONE) {
7201 xmlSwitchEncoding(ctxt, enc);
7202 }
7203
7204
7205 if (CUR == 0) {
7206 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7208 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7209 ctxt->wellFormed = 0;
7210 ctxt->disableSAX = 1;
7211 }
7212
7213 /*
7214 * Check for the XMLDecl in the Prolog.
7215 */
7216 GROW;
7217 if ((RAW == '<') && (NXT(1) == '?') &&
7218 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7219 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7220
7221 /*
7222 * Note that we will switch encoding on the fly.
7223 */
7224 xmlParseXMLDecl(ctxt);
7225 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7226 /*
7227 * The XML REC instructs us to stop parsing right here
7228 */
7229 return(-1);
7230 }
7231 ctxt->standalone = ctxt->input->standalone;
7232 SKIP_BLANKS;
7233 } else {
7234 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7235 }
7236 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7237 ctxt->sax->startDocument(ctxt->userData);
7238
7239 /*
7240 * The Misc part of the Prolog
7241 */
7242 GROW;
7243 xmlParseMisc(ctxt);
7244
7245 /*
7246 * Then possibly doc type declaration(s) and more Misc
7247 * (doctypedecl Misc*)?
7248 */
7249 GROW;
7250 if ((RAW == '<') && (NXT(1) == '!') &&
7251 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7252 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7253 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7254 (NXT(8) == 'E')) {
7255
7256 ctxt->inSubset = 1;
7257 xmlParseDocTypeDecl(ctxt);
7258 if (RAW == '[') {
7259 ctxt->instate = XML_PARSER_DTD;
7260 xmlParseInternalSubset(ctxt);
7261 }
7262
7263 /*
7264 * Create and update the external subset.
7265 */
7266 ctxt->inSubset = 2;
7267 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7268 (!ctxt->disableSAX))
7269 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7270 ctxt->extSubSystem, ctxt->extSubURI);
7271 ctxt->inSubset = 0;
7272
7273
7274 ctxt->instate = XML_PARSER_PROLOG;
7275 xmlParseMisc(ctxt);
7276 }
7277
7278 /*
7279 * Time to start parsing the tree itself
7280 */
7281 GROW;
7282 if (RAW != '<') {
7283 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7285 ctxt->sax->error(ctxt->userData,
7286 "Start tag expected, '<' not found\n");
7287 ctxt->wellFormed = 0;
7288 ctxt->disableSAX = 1;
7289 ctxt->instate = XML_PARSER_EOF;
7290 } else {
7291 ctxt->instate = XML_PARSER_CONTENT;
7292 xmlParseElement(ctxt);
7293 ctxt->instate = XML_PARSER_EPILOG;
7294
7295
7296 /*
7297 * The Misc part at the end
7298 */
7299 xmlParseMisc(ctxt);
7300
7301 if (RAW != 0) {
7302 ctxt->errNo = XML_ERR_DOCUMENT_END;
7303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7304 ctxt->sax->error(ctxt->userData,
7305 "Extra content at the end of the document\n");
7306 ctxt->wellFormed = 0;
7307 ctxt->disableSAX = 1;
7308 }
7309 ctxt->instate = XML_PARSER_EOF;
7310 }
7311
7312 /*
7313 * SAX: end of the document processing.
7314 */
7315 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7316 (!ctxt->disableSAX))
7317 ctxt->sax->endDocument(ctxt->userData);
7318
7319 if (! ctxt->wellFormed) return(-1);
7320 return(0);
7321}
7322
7323/**
7324 * xmlParseExtParsedEnt:
7325 * @ctxt: an XML parser context
7326 *
7327 * parse a genreral parsed entity
7328 * An external general parsed entity is well-formed if it matches the
7329 * production labeled extParsedEnt.
7330 *
7331 * [78] extParsedEnt ::= TextDecl? content
7332 *
7333 * Returns 0, -1 in case of error. the parser context is augmented
7334 * as a result of the parsing.
7335 */
7336
7337int
7338xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7339 xmlChar start[4];
7340 xmlCharEncoding enc;
7341
7342 xmlDefaultSAXHandlerInit();
7343
7344 GROW;
7345
7346 /*
7347 * SAX: beginning of the document processing.
7348 */
7349 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7350 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7351
7352 /*
7353 * Get the 4 first bytes and decode the charset
7354 * if enc != XML_CHAR_ENCODING_NONE
7355 * plug some encoding conversion routines.
7356 */
7357 start[0] = RAW;
7358 start[1] = NXT(1);
7359 start[2] = NXT(2);
7360 start[3] = NXT(3);
7361 enc = xmlDetectCharEncoding(start, 4);
7362 if (enc != XML_CHAR_ENCODING_NONE) {
7363 xmlSwitchEncoding(ctxt, enc);
7364 }
7365
7366
7367 if (CUR == 0) {
7368 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7370 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7371 ctxt->wellFormed = 0;
7372 ctxt->disableSAX = 1;
7373 }
7374
7375 /*
7376 * Check for the XMLDecl in the Prolog.
7377 */
7378 GROW;
7379 if ((RAW == '<') && (NXT(1) == '?') &&
7380 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7381 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7382
7383 /*
7384 * Note that we will switch encoding on the fly.
7385 */
7386 xmlParseXMLDecl(ctxt);
7387 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7388 /*
7389 * The XML REC instructs us to stop parsing right here
7390 */
7391 return(-1);
7392 }
7393 SKIP_BLANKS;
7394 } else {
7395 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7396 }
7397 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7398 ctxt->sax->startDocument(ctxt->userData);
7399
7400 /*
7401 * Doing validity checking on chunk doesn't make sense
7402 */
7403 ctxt->instate = XML_PARSER_CONTENT;
7404 ctxt->validate = 0;
7405 ctxt->loadsubset = 0;
7406 ctxt->depth = 0;
7407
7408 xmlParseContent(ctxt);
7409
7410 if ((RAW == '<') && (NXT(1) == '/')) {
7411 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7413 ctxt->sax->error(ctxt->userData,
7414 "chunk is not well balanced\n");
7415 ctxt->wellFormed = 0;
7416 ctxt->disableSAX = 1;
7417 } else if (RAW != 0) {
7418 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7419 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7420 ctxt->sax->error(ctxt->userData,
7421 "extra content at the end of well balanced chunk\n");
7422 ctxt->wellFormed = 0;
7423 ctxt->disableSAX = 1;
7424 }
7425
7426 /*
7427 * SAX: end of the document processing.
7428 */
7429 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7430 (!ctxt->disableSAX))
7431 ctxt->sax->endDocument(ctxt->userData);
7432
7433 if (! ctxt->wellFormed) return(-1);
7434 return(0);
7435}
7436
7437/************************************************************************
7438 * *
7439 * Progressive parsing interfaces *
7440 * *
7441 ************************************************************************/
7442
7443/**
7444 * xmlParseLookupSequence:
7445 * @ctxt: an XML parser context
7446 * @first: the first char to lookup
7447 * @next: the next char to lookup or zero
7448 * @third: the next char to lookup or zero
7449 *
7450 * Try to find if a sequence (first, next, third) or just (first next) or
7451 * (first) is available in the input stream.
7452 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7453 * to avoid rescanning sequences of bytes, it DOES change the state of the
7454 * parser, do not use liberally.
7455 *
7456 * Returns the index to the current parsing point if the full sequence
7457 * is available, -1 otherwise.
7458 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007459static int
Owen Taylor3473f882001-02-23 17:55:21 +00007460xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7461 xmlChar next, xmlChar third) {
7462 int base, len;
7463 xmlParserInputPtr in;
7464 const xmlChar *buf;
7465
7466 in = ctxt->input;
7467 if (in == NULL) return(-1);
7468 base = in->cur - in->base;
7469 if (base < 0) return(-1);
7470 if (ctxt->checkIndex > base)
7471 base = ctxt->checkIndex;
7472 if (in->buf == NULL) {
7473 buf = in->base;
7474 len = in->length;
7475 } else {
7476 buf = in->buf->buffer->content;
7477 len = in->buf->buffer->use;
7478 }
7479 /* take into account the sequence length */
7480 if (third) len -= 2;
7481 else if (next) len --;
7482 for (;base < len;base++) {
7483 if (buf[base] == first) {
7484 if (third != 0) {
7485 if ((buf[base + 1] != next) ||
7486 (buf[base + 2] != third)) continue;
7487 } else if (next != 0) {
7488 if (buf[base + 1] != next) continue;
7489 }
7490 ctxt->checkIndex = 0;
7491#ifdef DEBUG_PUSH
7492 if (next == 0)
7493 xmlGenericError(xmlGenericErrorContext,
7494 "PP: lookup '%c' found at %d\n",
7495 first, base);
7496 else if (third == 0)
7497 xmlGenericError(xmlGenericErrorContext,
7498 "PP: lookup '%c%c' found at %d\n",
7499 first, next, base);
7500 else
7501 xmlGenericError(xmlGenericErrorContext,
7502 "PP: lookup '%c%c%c' found at %d\n",
7503 first, next, third, base);
7504#endif
7505 return(base - (in->cur - in->base));
7506 }
7507 }
7508 ctxt->checkIndex = base;
7509#ifdef DEBUG_PUSH
7510 if (next == 0)
7511 xmlGenericError(xmlGenericErrorContext,
7512 "PP: lookup '%c' failed\n", first);
7513 else if (third == 0)
7514 xmlGenericError(xmlGenericErrorContext,
7515 "PP: lookup '%c%c' failed\n", first, next);
7516 else
7517 xmlGenericError(xmlGenericErrorContext,
7518 "PP: lookup '%c%c%c' failed\n", first, next, third);
7519#endif
7520 return(-1);
7521}
7522
7523/**
7524 * xmlParseTryOrFinish:
7525 * @ctxt: an XML parser context
7526 * @terminate: last chunk indicator
7527 *
7528 * Try to progress on parsing
7529 *
7530 * Returns zero if no parsing was possible
7531 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007532static int
Owen Taylor3473f882001-02-23 17:55:21 +00007533xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7534 int ret = 0;
7535 int avail;
7536 xmlChar cur, next;
7537
7538#ifdef DEBUG_PUSH
7539 switch (ctxt->instate) {
7540 case XML_PARSER_EOF:
7541 xmlGenericError(xmlGenericErrorContext,
7542 "PP: try EOF\n"); break;
7543 case XML_PARSER_START:
7544 xmlGenericError(xmlGenericErrorContext,
7545 "PP: try START\n"); break;
7546 case XML_PARSER_MISC:
7547 xmlGenericError(xmlGenericErrorContext,
7548 "PP: try MISC\n");break;
7549 case XML_PARSER_COMMENT:
7550 xmlGenericError(xmlGenericErrorContext,
7551 "PP: try COMMENT\n");break;
7552 case XML_PARSER_PROLOG:
7553 xmlGenericError(xmlGenericErrorContext,
7554 "PP: try PROLOG\n");break;
7555 case XML_PARSER_START_TAG:
7556 xmlGenericError(xmlGenericErrorContext,
7557 "PP: try START_TAG\n");break;
7558 case XML_PARSER_CONTENT:
7559 xmlGenericError(xmlGenericErrorContext,
7560 "PP: try CONTENT\n");break;
7561 case XML_PARSER_CDATA_SECTION:
7562 xmlGenericError(xmlGenericErrorContext,
7563 "PP: try CDATA_SECTION\n");break;
7564 case XML_PARSER_END_TAG:
7565 xmlGenericError(xmlGenericErrorContext,
7566 "PP: try END_TAG\n");break;
7567 case XML_PARSER_ENTITY_DECL:
7568 xmlGenericError(xmlGenericErrorContext,
7569 "PP: try ENTITY_DECL\n");break;
7570 case XML_PARSER_ENTITY_VALUE:
7571 xmlGenericError(xmlGenericErrorContext,
7572 "PP: try ENTITY_VALUE\n");break;
7573 case XML_PARSER_ATTRIBUTE_VALUE:
7574 xmlGenericError(xmlGenericErrorContext,
7575 "PP: try ATTRIBUTE_VALUE\n");break;
7576 case XML_PARSER_DTD:
7577 xmlGenericError(xmlGenericErrorContext,
7578 "PP: try DTD\n");break;
7579 case XML_PARSER_EPILOG:
7580 xmlGenericError(xmlGenericErrorContext,
7581 "PP: try EPILOG\n");break;
7582 case XML_PARSER_PI:
7583 xmlGenericError(xmlGenericErrorContext,
7584 "PP: try PI\n");break;
7585 case XML_PARSER_IGNORE:
7586 xmlGenericError(xmlGenericErrorContext,
7587 "PP: try IGNORE\n");break;
7588 }
7589#endif
7590
7591 while (1) {
7592 /*
7593 * Pop-up of finished entities.
7594 */
7595 while ((RAW == 0) && (ctxt->inputNr > 1))
7596 xmlPopInput(ctxt);
7597
7598 if (ctxt->input ==NULL) break;
7599 if (ctxt->input->buf == NULL)
7600 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7601 else
7602 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7603 if (avail < 1)
7604 goto done;
7605 switch (ctxt->instate) {
7606 case XML_PARSER_EOF:
7607 /*
7608 * Document parsing is done !
7609 */
7610 goto done;
7611 case XML_PARSER_START:
7612 /*
7613 * Very first chars read from the document flow.
7614 */
Owen Taylor3473f882001-02-23 17:55:21 +00007615 if (avail < 2)
7616 goto done;
7617
7618 cur = ctxt->input->cur[0];
7619 next = ctxt->input->cur[1];
7620 if (cur == 0) {
7621 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7622 ctxt->sax->setDocumentLocator(ctxt->userData,
7623 &xmlDefaultSAXLocator);
7624 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7626 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7627 ctxt->wellFormed = 0;
7628 ctxt->disableSAX = 1;
7629 ctxt->instate = XML_PARSER_EOF;
7630#ifdef DEBUG_PUSH
7631 xmlGenericError(xmlGenericErrorContext,
7632 "PP: entering EOF\n");
7633#endif
7634 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7635 ctxt->sax->endDocument(ctxt->userData);
7636 goto done;
7637 }
7638 if ((cur == '<') && (next == '?')) {
7639 /* PI or XML decl */
7640 if (avail < 5) return(ret);
7641 if ((!terminate) &&
7642 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7643 return(ret);
7644 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7645 ctxt->sax->setDocumentLocator(ctxt->userData,
7646 &xmlDefaultSAXLocator);
7647 if ((ctxt->input->cur[2] == 'x') &&
7648 (ctxt->input->cur[3] == 'm') &&
7649 (ctxt->input->cur[4] == 'l') &&
7650 (IS_BLANK(ctxt->input->cur[5]))) {
7651 ret += 5;
7652#ifdef DEBUG_PUSH
7653 xmlGenericError(xmlGenericErrorContext,
7654 "PP: Parsing XML Decl\n");
7655#endif
7656 xmlParseXMLDecl(ctxt);
7657 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7658 /*
7659 * The XML REC instructs us to stop parsing right
7660 * here
7661 */
7662 ctxt->instate = XML_PARSER_EOF;
7663 return(0);
7664 }
7665 ctxt->standalone = ctxt->input->standalone;
7666 if ((ctxt->encoding == NULL) &&
7667 (ctxt->input->encoding != NULL))
7668 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7669 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7670 (!ctxt->disableSAX))
7671 ctxt->sax->startDocument(ctxt->userData);
7672 ctxt->instate = XML_PARSER_MISC;
7673#ifdef DEBUG_PUSH
7674 xmlGenericError(xmlGenericErrorContext,
7675 "PP: entering MISC\n");
7676#endif
7677 } else {
7678 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7679 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7680 (!ctxt->disableSAX))
7681 ctxt->sax->startDocument(ctxt->userData);
7682 ctxt->instate = XML_PARSER_MISC;
7683#ifdef DEBUG_PUSH
7684 xmlGenericError(xmlGenericErrorContext,
7685 "PP: entering MISC\n");
7686#endif
7687 }
7688 } else {
7689 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7690 ctxt->sax->setDocumentLocator(ctxt->userData,
7691 &xmlDefaultSAXLocator);
7692 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7693 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7694 (!ctxt->disableSAX))
7695 ctxt->sax->startDocument(ctxt->userData);
7696 ctxt->instate = XML_PARSER_MISC;
7697#ifdef DEBUG_PUSH
7698 xmlGenericError(xmlGenericErrorContext,
7699 "PP: entering MISC\n");
7700#endif
7701 }
7702 break;
7703 case XML_PARSER_MISC:
7704 SKIP_BLANKS;
7705 if (ctxt->input->buf == NULL)
7706 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7707 else
7708 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7709 if (avail < 2)
7710 goto done;
7711 cur = ctxt->input->cur[0];
7712 next = ctxt->input->cur[1];
7713 if ((cur == '<') && (next == '?')) {
7714 if ((!terminate) &&
7715 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7716 goto done;
7717#ifdef DEBUG_PUSH
7718 xmlGenericError(xmlGenericErrorContext,
7719 "PP: Parsing PI\n");
7720#endif
7721 xmlParsePI(ctxt);
7722 } else if ((cur == '<') && (next == '!') &&
7723 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7724 if ((!terminate) &&
7725 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7726 goto done;
7727#ifdef DEBUG_PUSH
7728 xmlGenericError(xmlGenericErrorContext,
7729 "PP: Parsing Comment\n");
7730#endif
7731 xmlParseComment(ctxt);
7732 ctxt->instate = XML_PARSER_MISC;
7733 } else if ((cur == '<') && (next == '!') &&
7734 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7735 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7736 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7737 (ctxt->input->cur[8] == 'E')) {
7738 if ((!terminate) &&
7739 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7740 goto done;
7741#ifdef DEBUG_PUSH
7742 xmlGenericError(xmlGenericErrorContext,
7743 "PP: Parsing internal subset\n");
7744#endif
7745 ctxt->inSubset = 1;
7746 xmlParseDocTypeDecl(ctxt);
7747 if (RAW == '[') {
7748 ctxt->instate = XML_PARSER_DTD;
7749#ifdef DEBUG_PUSH
7750 xmlGenericError(xmlGenericErrorContext,
7751 "PP: entering DTD\n");
7752#endif
7753 } else {
7754 /*
7755 * Create and update the external subset.
7756 */
7757 ctxt->inSubset = 2;
7758 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7759 (ctxt->sax->externalSubset != NULL))
7760 ctxt->sax->externalSubset(ctxt->userData,
7761 ctxt->intSubName, ctxt->extSubSystem,
7762 ctxt->extSubURI);
7763 ctxt->inSubset = 0;
7764 ctxt->instate = XML_PARSER_PROLOG;
7765#ifdef DEBUG_PUSH
7766 xmlGenericError(xmlGenericErrorContext,
7767 "PP: entering PROLOG\n");
7768#endif
7769 }
7770 } else if ((cur == '<') && (next == '!') &&
7771 (avail < 9)) {
7772 goto done;
7773 } else {
7774 ctxt->instate = XML_PARSER_START_TAG;
7775#ifdef DEBUG_PUSH
7776 xmlGenericError(xmlGenericErrorContext,
7777 "PP: entering START_TAG\n");
7778#endif
7779 }
7780 break;
7781 case XML_PARSER_IGNORE:
7782 xmlGenericError(xmlGenericErrorContext,
7783 "PP: internal error, state == IGNORE");
7784 ctxt->instate = XML_PARSER_DTD;
7785#ifdef DEBUG_PUSH
7786 xmlGenericError(xmlGenericErrorContext,
7787 "PP: entering DTD\n");
7788#endif
7789 break;
7790 case XML_PARSER_PROLOG:
7791 SKIP_BLANKS;
7792 if (ctxt->input->buf == NULL)
7793 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7794 else
7795 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7796 if (avail < 2)
7797 goto done;
7798 cur = ctxt->input->cur[0];
7799 next = ctxt->input->cur[1];
7800 if ((cur == '<') && (next == '?')) {
7801 if ((!terminate) &&
7802 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7803 goto done;
7804#ifdef DEBUG_PUSH
7805 xmlGenericError(xmlGenericErrorContext,
7806 "PP: Parsing PI\n");
7807#endif
7808 xmlParsePI(ctxt);
7809 } else if ((cur == '<') && (next == '!') &&
7810 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7811 if ((!terminate) &&
7812 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7813 goto done;
7814#ifdef DEBUG_PUSH
7815 xmlGenericError(xmlGenericErrorContext,
7816 "PP: Parsing Comment\n");
7817#endif
7818 xmlParseComment(ctxt);
7819 ctxt->instate = XML_PARSER_PROLOG;
7820 } else if ((cur == '<') && (next == '!') &&
7821 (avail < 4)) {
7822 goto done;
7823 } else {
7824 ctxt->instate = XML_PARSER_START_TAG;
7825#ifdef DEBUG_PUSH
7826 xmlGenericError(xmlGenericErrorContext,
7827 "PP: entering START_TAG\n");
7828#endif
7829 }
7830 break;
7831 case XML_PARSER_EPILOG:
7832 SKIP_BLANKS;
7833 if (ctxt->input->buf == NULL)
7834 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7835 else
7836 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7837 if (avail < 2)
7838 goto done;
7839 cur = ctxt->input->cur[0];
7840 next = ctxt->input->cur[1];
7841 if ((cur == '<') && (next == '?')) {
7842 if ((!terminate) &&
7843 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7844 goto done;
7845#ifdef DEBUG_PUSH
7846 xmlGenericError(xmlGenericErrorContext,
7847 "PP: Parsing PI\n");
7848#endif
7849 xmlParsePI(ctxt);
7850 ctxt->instate = XML_PARSER_EPILOG;
7851 } else if ((cur == '<') && (next == '!') &&
7852 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7853 if ((!terminate) &&
7854 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7855 goto done;
7856#ifdef DEBUG_PUSH
7857 xmlGenericError(xmlGenericErrorContext,
7858 "PP: Parsing Comment\n");
7859#endif
7860 xmlParseComment(ctxt);
7861 ctxt->instate = XML_PARSER_EPILOG;
7862 } else if ((cur == '<') && (next == '!') &&
7863 (avail < 4)) {
7864 goto done;
7865 } else {
7866 ctxt->errNo = XML_ERR_DOCUMENT_END;
7867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7868 ctxt->sax->error(ctxt->userData,
7869 "Extra content at the end of the document\n");
7870 ctxt->wellFormed = 0;
7871 ctxt->disableSAX = 1;
7872 ctxt->instate = XML_PARSER_EOF;
7873#ifdef DEBUG_PUSH
7874 xmlGenericError(xmlGenericErrorContext,
7875 "PP: entering EOF\n");
7876#endif
7877 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7878 (!ctxt->disableSAX))
7879 ctxt->sax->endDocument(ctxt->userData);
7880 goto done;
7881 }
7882 break;
7883 case XML_PARSER_START_TAG: {
7884 xmlChar *name, *oldname;
7885
7886 if ((avail < 2) && (ctxt->inputNr == 1))
7887 goto done;
7888 cur = ctxt->input->cur[0];
7889 if (cur != '<') {
7890 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7892 ctxt->sax->error(ctxt->userData,
7893 "Start tag expect, '<' not found\n");
7894 ctxt->wellFormed = 0;
7895 ctxt->disableSAX = 1;
7896 ctxt->instate = XML_PARSER_EOF;
7897#ifdef DEBUG_PUSH
7898 xmlGenericError(xmlGenericErrorContext,
7899 "PP: entering EOF\n");
7900#endif
7901 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7902 (!ctxt->disableSAX))
7903 ctxt->sax->endDocument(ctxt->userData);
7904 goto done;
7905 }
7906 if ((!terminate) &&
7907 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7908 goto done;
7909 if (ctxt->spaceNr == 0)
7910 spacePush(ctxt, -1);
7911 else
7912 spacePush(ctxt, *ctxt->space);
7913 name = xmlParseStartTag(ctxt);
7914 if (name == NULL) {
7915 spacePop(ctxt);
7916 ctxt->instate = XML_PARSER_EOF;
7917#ifdef DEBUG_PUSH
7918 xmlGenericError(xmlGenericErrorContext,
7919 "PP: entering EOF\n");
7920#endif
7921 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7922 (!ctxt->disableSAX))
7923 ctxt->sax->endDocument(ctxt->userData);
7924 goto done;
7925 }
7926 namePush(ctxt, xmlStrdup(name));
7927
7928 /*
7929 * [ VC: Root Element Type ]
7930 * The Name in the document type declaration must match
7931 * the element type of the root element.
7932 */
7933 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7934 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7935 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7936
7937 /*
7938 * Check for an Empty Element.
7939 */
7940 if ((RAW == '/') && (NXT(1) == '>')) {
7941 SKIP(2);
7942 if ((ctxt->sax != NULL) &&
7943 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7944 ctxt->sax->endElement(ctxt->userData, name);
7945 xmlFree(name);
7946 oldname = namePop(ctxt);
7947 spacePop(ctxt);
7948 if (oldname != NULL) {
7949#ifdef DEBUG_STACK
7950 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7951#endif
7952 xmlFree(oldname);
7953 }
7954 if (ctxt->name == NULL) {
7955 ctxt->instate = XML_PARSER_EPILOG;
7956#ifdef DEBUG_PUSH
7957 xmlGenericError(xmlGenericErrorContext,
7958 "PP: entering EPILOG\n");
7959#endif
7960 } else {
7961 ctxt->instate = XML_PARSER_CONTENT;
7962#ifdef DEBUG_PUSH
7963 xmlGenericError(xmlGenericErrorContext,
7964 "PP: entering CONTENT\n");
7965#endif
7966 }
7967 break;
7968 }
7969 if (RAW == '>') {
7970 NEXT;
7971 } else {
7972 ctxt->errNo = XML_ERR_GT_REQUIRED;
7973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7974 ctxt->sax->error(ctxt->userData,
7975 "Couldn't find end of Start Tag %s\n",
7976 name);
7977 ctxt->wellFormed = 0;
7978 ctxt->disableSAX = 1;
7979
7980 /*
7981 * end of parsing of this node.
7982 */
7983 nodePop(ctxt);
7984 oldname = namePop(ctxt);
7985 spacePop(ctxt);
7986 if (oldname != NULL) {
7987#ifdef DEBUG_STACK
7988 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7989#endif
7990 xmlFree(oldname);
7991 }
7992 }
7993 xmlFree(name);
7994 ctxt->instate = XML_PARSER_CONTENT;
7995#ifdef DEBUG_PUSH
7996 xmlGenericError(xmlGenericErrorContext,
7997 "PP: entering CONTENT\n");
7998#endif
7999 break;
8000 }
8001 case XML_PARSER_CONTENT: {
8002 const xmlChar *test;
8003 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008004 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008005
8006 /*
8007 * Handle preparsed entities and charRef
8008 */
8009 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008010 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008011
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008012 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008013 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8014 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008015 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008016 ctxt->token = 0;
8017 }
8018 if ((avail < 2) && (ctxt->inputNr == 1))
8019 goto done;
8020 cur = ctxt->input->cur[0];
8021 next = ctxt->input->cur[1];
8022
8023 test = CUR_PTR;
8024 cons = ctxt->input->consumed;
8025 tok = ctxt->token;
8026 if ((cur == '<') && (next == '?')) {
8027 if ((!terminate) &&
8028 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8029 goto done;
8030#ifdef DEBUG_PUSH
8031 xmlGenericError(xmlGenericErrorContext,
8032 "PP: Parsing PI\n");
8033#endif
8034 xmlParsePI(ctxt);
8035 } else if ((cur == '<') && (next == '!') &&
8036 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8037 if ((!terminate) &&
8038 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8039 goto done;
8040#ifdef DEBUG_PUSH
8041 xmlGenericError(xmlGenericErrorContext,
8042 "PP: Parsing Comment\n");
8043#endif
8044 xmlParseComment(ctxt);
8045 ctxt->instate = XML_PARSER_CONTENT;
8046 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8047 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8048 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8049 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8050 (ctxt->input->cur[8] == '[')) {
8051 SKIP(9);
8052 ctxt->instate = XML_PARSER_CDATA_SECTION;
8053#ifdef DEBUG_PUSH
8054 xmlGenericError(xmlGenericErrorContext,
8055 "PP: entering CDATA_SECTION\n");
8056#endif
8057 break;
8058 } else if ((cur == '<') && (next == '!') &&
8059 (avail < 9)) {
8060 goto done;
8061 } else if ((cur == '<') && (next == '/')) {
8062 ctxt->instate = XML_PARSER_END_TAG;
8063#ifdef DEBUG_PUSH
8064 xmlGenericError(xmlGenericErrorContext,
8065 "PP: entering END_TAG\n");
8066#endif
8067 break;
8068 } else if (cur == '<') {
8069 ctxt->instate = XML_PARSER_START_TAG;
8070#ifdef DEBUG_PUSH
8071 xmlGenericError(xmlGenericErrorContext,
8072 "PP: entering START_TAG\n");
8073#endif
8074 break;
8075 } else if (cur == '&') {
8076 if ((!terminate) &&
8077 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8078 goto done;
8079#ifdef DEBUG_PUSH
8080 xmlGenericError(xmlGenericErrorContext,
8081 "PP: Parsing Reference\n");
8082#endif
8083 xmlParseReference(ctxt);
8084 } else {
8085 /* TODO Avoid the extra copy, handle directly !!! */
8086 /*
8087 * Goal of the following test is:
8088 * - minimize calls to the SAX 'character' callback
8089 * when they are mergeable
8090 * - handle an problem for isBlank when we only parse
8091 * a sequence of blank chars and the next one is
8092 * not available to check against '<' presence.
8093 * - tries to homogenize the differences in SAX
8094 * callbacks beween the push and pull versions
8095 * of the parser.
8096 */
8097 if ((ctxt->inputNr == 1) &&
8098 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8099 if ((!terminate) &&
8100 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8101 goto done;
8102 }
8103 ctxt->checkIndex = 0;
8104#ifdef DEBUG_PUSH
8105 xmlGenericError(xmlGenericErrorContext,
8106 "PP: Parsing char data\n");
8107#endif
8108 xmlParseCharData(ctxt, 0);
8109 }
8110 /*
8111 * Pop-up of finished entities.
8112 */
8113 while ((RAW == 0) && (ctxt->inputNr > 1))
8114 xmlPopInput(ctxt);
8115 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8116 (tok == ctxt->token)) {
8117 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8119 ctxt->sax->error(ctxt->userData,
8120 "detected an error in element content\n");
8121 ctxt->wellFormed = 0;
8122 ctxt->disableSAX = 1;
8123 ctxt->instate = XML_PARSER_EOF;
8124 break;
8125 }
8126 break;
8127 }
8128 case XML_PARSER_CDATA_SECTION: {
8129 /*
8130 * The Push mode need to have the SAX callback for
8131 * cdataBlock merge back contiguous callbacks.
8132 */
8133 int base;
8134
8135 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8136 if (base < 0) {
8137 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8138 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8139 if (ctxt->sax->cdataBlock != NULL)
8140 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8141 XML_PARSER_BIG_BUFFER_SIZE);
8142 }
8143 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8144 ctxt->checkIndex = 0;
8145 }
8146 goto done;
8147 } else {
8148 if ((ctxt->sax != NULL) && (base > 0) &&
8149 (!ctxt->disableSAX)) {
8150 if (ctxt->sax->cdataBlock != NULL)
8151 ctxt->sax->cdataBlock(ctxt->userData,
8152 ctxt->input->cur, base);
8153 }
8154 SKIP(base + 3);
8155 ctxt->checkIndex = 0;
8156 ctxt->instate = XML_PARSER_CONTENT;
8157#ifdef DEBUG_PUSH
8158 xmlGenericError(xmlGenericErrorContext,
8159 "PP: entering CONTENT\n");
8160#endif
8161 }
8162 break;
8163 }
8164 case XML_PARSER_END_TAG:
8165 if (avail < 2)
8166 goto done;
8167 if ((!terminate) &&
8168 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8169 goto done;
8170 xmlParseEndTag(ctxt);
8171 if (ctxt->name == NULL) {
8172 ctxt->instate = XML_PARSER_EPILOG;
8173#ifdef DEBUG_PUSH
8174 xmlGenericError(xmlGenericErrorContext,
8175 "PP: entering EPILOG\n");
8176#endif
8177 } else {
8178 ctxt->instate = XML_PARSER_CONTENT;
8179#ifdef DEBUG_PUSH
8180 xmlGenericError(xmlGenericErrorContext,
8181 "PP: entering CONTENT\n");
8182#endif
8183 }
8184 break;
8185 case XML_PARSER_DTD: {
8186 /*
8187 * Sorry but progressive parsing of the internal subset
8188 * is not expected to be supported. We first check that
8189 * the full content of the internal subset is available and
8190 * the parsing is launched only at that point.
8191 * Internal subset ends up with "']' S? '>'" in an unescaped
8192 * section and not in a ']]>' sequence which are conditional
8193 * sections (whoever argued to keep that crap in XML deserve
8194 * a place in hell !).
8195 */
8196 int base, i;
8197 xmlChar *buf;
8198 xmlChar quote = 0;
8199
8200 base = ctxt->input->cur - ctxt->input->base;
8201 if (base < 0) return(0);
8202 if (ctxt->checkIndex > base)
8203 base = ctxt->checkIndex;
8204 buf = ctxt->input->buf->buffer->content;
8205 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8206 base++) {
8207 if (quote != 0) {
8208 if (buf[base] == quote)
8209 quote = 0;
8210 continue;
8211 }
8212 if (buf[base] == '"') {
8213 quote = '"';
8214 continue;
8215 }
8216 if (buf[base] == '\'') {
8217 quote = '\'';
8218 continue;
8219 }
8220 if (buf[base] == ']') {
8221 if ((unsigned int) base +1 >=
8222 ctxt->input->buf->buffer->use)
8223 break;
8224 if (buf[base + 1] == ']') {
8225 /* conditional crap, skip both ']' ! */
8226 base++;
8227 continue;
8228 }
8229 for (i = 0;
8230 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8231 i++) {
8232 if (buf[base + i] == '>')
8233 goto found_end_int_subset;
8234 }
8235 break;
8236 }
8237 }
8238 /*
8239 * We didn't found the end of the Internal subset
8240 */
8241 if (quote == 0)
8242 ctxt->checkIndex = base;
8243#ifdef DEBUG_PUSH
8244 if (next == 0)
8245 xmlGenericError(xmlGenericErrorContext,
8246 "PP: lookup of int subset end filed\n");
8247#endif
8248 goto done;
8249
8250found_end_int_subset:
8251 xmlParseInternalSubset(ctxt);
8252 ctxt->inSubset = 2;
8253 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8254 (ctxt->sax->externalSubset != NULL))
8255 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8256 ctxt->extSubSystem, ctxt->extSubURI);
8257 ctxt->inSubset = 0;
8258 ctxt->instate = XML_PARSER_PROLOG;
8259 ctxt->checkIndex = 0;
8260#ifdef DEBUG_PUSH
8261 xmlGenericError(xmlGenericErrorContext,
8262 "PP: entering PROLOG\n");
8263#endif
8264 break;
8265 }
8266 case XML_PARSER_COMMENT:
8267 xmlGenericError(xmlGenericErrorContext,
8268 "PP: internal error, state == COMMENT\n");
8269 ctxt->instate = XML_PARSER_CONTENT;
8270#ifdef DEBUG_PUSH
8271 xmlGenericError(xmlGenericErrorContext,
8272 "PP: entering CONTENT\n");
8273#endif
8274 break;
8275 case XML_PARSER_PI:
8276 xmlGenericError(xmlGenericErrorContext,
8277 "PP: internal error, state == PI\n");
8278 ctxt->instate = XML_PARSER_CONTENT;
8279#ifdef DEBUG_PUSH
8280 xmlGenericError(xmlGenericErrorContext,
8281 "PP: entering CONTENT\n");
8282#endif
8283 break;
8284 case XML_PARSER_ENTITY_DECL:
8285 xmlGenericError(xmlGenericErrorContext,
8286 "PP: internal error, state == ENTITY_DECL\n");
8287 ctxt->instate = XML_PARSER_DTD;
8288#ifdef DEBUG_PUSH
8289 xmlGenericError(xmlGenericErrorContext,
8290 "PP: entering DTD\n");
8291#endif
8292 break;
8293 case XML_PARSER_ENTITY_VALUE:
8294 xmlGenericError(xmlGenericErrorContext,
8295 "PP: internal error, state == ENTITY_VALUE\n");
8296 ctxt->instate = XML_PARSER_CONTENT;
8297#ifdef DEBUG_PUSH
8298 xmlGenericError(xmlGenericErrorContext,
8299 "PP: entering DTD\n");
8300#endif
8301 break;
8302 case XML_PARSER_ATTRIBUTE_VALUE:
8303 xmlGenericError(xmlGenericErrorContext,
8304 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8305 ctxt->instate = XML_PARSER_START_TAG;
8306#ifdef DEBUG_PUSH
8307 xmlGenericError(xmlGenericErrorContext,
8308 "PP: entering START_TAG\n");
8309#endif
8310 break;
8311 case XML_PARSER_SYSTEM_LITERAL:
8312 xmlGenericError(xmlGenericErrorContext,
8313 "PP: internal error, state == SYSTEM_LITERAL\n");
8314 ctxt->instate = XML_PARSER_START_TAG;
8315#ifdef DEBUG_PUSH
8316 xmlGenericError(xmlGenericErrorContext,
8317 "PP: entering START_TAG\n");
8318#endif
8319 break;
8320 }
8321 }
8322done:
8323#ifdef DEBUG_PUSH
8324 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8325#endif
8326 return(ret);
8327}
8328
8329/**
Owen Taylor3473f882001-02-23 17:55:21 +00008330 * xmlParseChunk:
8331 * @ctxt: an XML parser context
8332 * @chunk: an char array
8333 * @size: the size in byte of the chunk
8334 * @terminate: last chunk indicator
8335 *
8336 * Parse a Chunk of memory
8337 *
8338 * Returns zero if no error, the xmlParserErrors otherwise.
8339 */
8340int
8341xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8342 int terminate) {
8343 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8344 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8345 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8346 int cur = ctxt->input->cur - ctxt->input->base;
8347
8348 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8349 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8350 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008351 ctxt->input->end =
8352 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008353#ifdef DEBUG_PUSH
8354 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8355#endif
8356
8357 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8358 xmlParseTryOrFinish(ctxt, terminate);
8359 } else if (ctxt->instate != XML_PARSER_EOF) {
8360 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8361 xmlParserInputBufferPtr in = ctxt->input->buf;
8362 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8363 (in->raw != NULL)) {
8364 int nbchars;
8365
8366 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8367 if (nbchars < 0) {
8368 xmlGenericError(xmlGenericErrorContext,
8369 "xmlParseChunk: encoder error\n");
8370 return(XML_ERR_INVALID_ENCODING);
8371 }
8372 }
8373 }
8374 }
8375 xmlParseTryOrFinish(ctxt, terminate);
8376 if (terminate) {
8377 /*
8378 * Check for termination
8379 */
8380 if ((ctxt->instate != XML_PARSER_EOF) &&
8381 (ctxt->instate != XML_PARSER_EPILOG)) {
8382 ctxt->errNo = XML_ERR_DOCUMENT_END;
8383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8384 ctxt->sax->error(ctxt->userData,
8385 "Extra content at the end of the document\n");
8386 ctxt->wellFormed = 0;
8387 ctxt->disableSAX = 1;
8388 }
8389 if (ctxt->instate != XML_PARSER_EOF) {
8390 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8391 (!ctxt->disableSAX))
8392 ctxt->sax->endDocument(ctxt->userData);
8393 }
8394 ctxt->instate = XML_PARSER_EOF;
8395 }
8396 return((xmlParserErrors) ctxt->errNo);
8397}
8398
8399/************************************************************************
8400 * *
8401 * I/O front end functions to the parser *
8402 * *
8403 ************************************************************************/
8404
8405/**
8406 * xmlStopParser:
8407 * @ctxt: an XML parser context
8408 *
8409 * Blocks further parser processing
8410 */
8411void
8412xmlStopParser(xmlParserCtxtPtr ctxt) {
8413 ctxt->instate = XML_PARSER_EOF;
8414 if (ctxt->input != NULL)
8415 ctxt->input->cur = BAD_CAST"";
8416}
8417
8418/**
8419 * xmlCreatePushParserCtxt:
8420 * @sax: a SAX handler
8421 * @user_data: The user data returned on SAX callbacks
8422 * @chunk: a pointer to an array of chars
8423 * @size: number of chars in the array
8424 * @filename: an optional file name or URI
8425 *
8426 * Create a parser context for using the XML parser in push mode
8427 * To allow content encoding detection, @size should be >= 4
8428 * The value of @filename is used for fetching external entities
8429 * and error/warning reports.
8430 *
8431 * Returns the new parser context or NULL
8432 */
8433xmlParserCtxtPtr
8434xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8435 const char *chunk, int size, const char *filename) {
8436 xmlParserCtxtPtr ctxt;
8437 xmlParserInputPtr inputStream;
8438 xmlParserInputBufferPtr buf;
8439 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8440
8441 /*
8442 * plug some encoding conversion routines
8443 */
8444 if ((chunk != NULL) && (size >= 4))
8445 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8446
8447 buf = xmlAllocParserInputBuffer(enc);
8448 if (buf == NULL) return(NULL);
8449
8450 ctxt = xmlNewParserCtxt();
8451 if (ctxt == NULL) {
8452 xmlFree(buf);
8453 return(NULL);
8454 }
8455 if (sax != NULL) {
8456 if (ctxt->sax != &xmlDefaultSAXHandler)
8457 xmlFree(ctxt->sax);
8458 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8459 if (ctxt->sax == NULL) {
8460 xmlFree(buf);
8461 xmlFree(ctxt);
8462 return(NULL);
8463 }
8464 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8465 if (user_data != NULL)
8466 ctxt->userData = user_data;
8467 }
8468 if (filename == NULL) {
8469 ctxt->directory = NULL;
8470 } else {
8471 ctxt->directory = xmlParserGetDirectory(filename);
8472 }
8473
8474 inputStream = xmlNewInputStream(ctxt);
8475 if (inputStream == NULL) {
8476 xmlFreeParserCtxt(ctxt);
8477 return(NULL);
8478 }
8479
8480 if (filename == NULL)
8481 inputStream->filename = NULL;
8482 else
8483 inputStream->filename = xmlMemStrdup(filename);
8484 inputStream->buf = buf;
8485 inputStream->base = inputStream->buf->buffer->content;
8486 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008487 inputStream->end =
8488 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008489 if (enc != XML_CHAR_ENCODING_NONE) {
8490 xmlSwitchEncoding(ctxt, enc);
8491 }
8492
8493 inputPush(ctxt, inputStream);
8494
8495 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8496 (ctxt->input->buf != NULL)) {
8497 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8498#ifdef DEBUG_PUSH
8499 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8500#endif
8501 }
8502
8503 return(ctxt);
8504}
8505
8506/**
8507 * xmlCreateIOParserCtxt:
8508 * @sax: a SAX handler
8509 * @user_data: The user data returned on SAX callbacks
8510 * @ioread: an I/O read function
8511 * @ioclose: an I/O close function
8512 * @ioctx: an I/O handler
8513 * @enc: the charset encoding if known
8514 *
8515 * Create a parser context for using the XML parser with an existing
8516 * I/O stream
8517 *
8518 * Returns the new parser context or NULL
8519 */
8520xmlParserCtxtPtr
8521xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8522 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8523 void *ioctx, xmlCharEncoding enc) {
8524 xmlParserCtxtPtr ctxt;
8525 xmlParserInputPtr inputStream;
8526 xmlParserInputBufferPtr buf;
8527
8528 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8529 if (buf == NULL) return(NULL);
8530
8531 ctxt = xmlNewParserCtxt();
8532 if (ctxt == NULL) {
8533 xmlFree(buf);
8534 return(NULL);
8535 }
8536 if (sax != NULL) {
8537 if (ctxt->sax != &xmlDefaultSAXHandler)
8538 xmlFree(ctxt->sax);
8539 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8540 if (ctxt->sax == NULL) {
8541 xmlFree(buf);
8542 xmlFree(ctxt);
8543 return(NULL);
8544 }
8545 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8546 if (user_data != NULL)
8547 ctxt->userData = user_data;
8548 }
8549
8550 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8551 if (inputStream == NULL) {
8552 xmlFreeParserCtxt(ctxt);
8553 return(NULL);
8554 }
8555 inputPush(ctxt, inputStream);
8556
8557 return(ctxt);
8558}
8559
8560/************************************************************************
8561 * *
8562 * Front ends when parsing a Dtd *
8563 * *
8564 ************************************************************************/
8565
8566/**
8567 * xmlIOParseDTD:
8568 * @sax: the SAX handler block or NULL
8569 * @input: an Input Buffer
8570 * @enc: the charset encoding if known
8571 *
8572 * Load and parse a DTD
8573 *
8574 * Returns the resulting xmlDtdPtr or NULL in case of error.
8575 * @input will be freed at parsing end.
8576 */
8577
8578xmlDtdPtr
8579xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8580 xmlCharEncoding enc) {
8581 xmlDtdPtr ret = NULL;
8582 xmlParserCtxtPtr ctxt;
8583 xmlParserInputPtr pinput = NULL;
8584
8585 if (input == NULL)
8586 return(NULL);
8587
8588 ctxt = xmlNewParserCtxt();
8589 if (ctxt == NULL) {
8590 return(NULL);
8591 }
8592
8593 /*
8594 * Set-up the SAX context
8595 */
8596 if (sax != NULL) {
8597 if (ctxt->sax != NULL)
8598 xmlFree(ctxt->sax);
8599 ctxt->sax = sax;
8600 ctxt->userData = NULL;
8601 }
8602
8603 /*
8604 * generate a parser input from the I/O handler
8605 */
8606
8607 pinput = xmlNewIOInputStream(ctxt, input, enc);
8608 if (pinput == NULL) {
8609 if (sax != NULL) ctxt->sax = NULL;
8610 xmlFreeParserCtxt(ctxt);
8611 return(NULL);
8612 }
8613
8614 /*
8615 * plug some encoding conversion routines here.
8616 */
8617 xmlPushInput(ctxt, pinput);
8618
8619 pinput->filename = NULL;
8620 pinput->line = 1;
8621 pinput->col = 1;
8622 pinput->base = ctxt->input->cur;
8623 pinput->cur = ctxt->input->cur;
8624 pinput->free = NULL;
8625
8626 /*
8627 * let's parse that entity knowing it's an external subset.
8628 */
8629 ctxt->inSubset = 2;
8630 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8631 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8632 BAD_CAST "none", BAD_CAST "none");
8633 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8634
8635 if (ctxt->myDoc != NULL) {
8636 if (ctxt->wellFormed) {
8637 ret = ctxt->myDoc->extSubset;
8638 ctxt->myDoc->extSubset = NULL;
8639 } else {
8640 ret = NULL;
8641 }
8642 xmlFreeDoc(ctxt->myDoc);
8643 ctxt->myDoc = NULL;
8644 }
8645 if (sax != NULL) ctxt->sax = NULL;
8646 xmlFreeParserCtxt(ctxt);
8647
8648 return(ret);
8649}
8650
8651/**
8652 * xmlSAXParseDTD:
8653 * @sax: the SAX handler block
8654 * @ExternalID: a NAME* containing the External ID of the DTD
8655 * @SystemID: a NAME* containing the URL to the DTD
8656 *
8657 * Load and parse an external subset.
8658 *
8659 * Returns the resulting xmlDtdPtr or NULL in case of error.
8660 */
8661
8662xmlDtdPtr
8663xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8664 const xmlChar *SystemID) {
8665 xmlDtdPtr ret = NULL;
8666 xmlParserCtxtPtr ctxt;
8667 xmlParserInputPtr input = NULL;
8668 xmlCharEncoding enc;
8669
8670 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8671
8672 ctxt = xmlNewParserCtxt();
8673 if (ctxt == NULL) {
8674 return(NULL);
8675 }
8676
8677 /*
8678 * Set-up the SAX context
8679 */
8680 if (sax != NULL) {
8681 if (ctxt->sax != NULL)
8682 xmlFree(ctxt->sax);
8683 ctxt->sax = sax;
8684 ctxt->userData = NULL;
8685 }
8686
8687 /*
8688 * Ask the Entity resolver to load the damn thing
8689 */
8690
8691 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8692 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8693 if (input == NULL) {
8694 if (sax != NULL) ctxt->sax = NULL;
8695 xmlFreeParserCtxt(ctxt);
8696 return(NULL);
8697 }
8698
8699 /*
8700 * plug some encoding conversion routines here.
8701 */
8702 xmlPushInput(ctxt, input);
8703 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8704 xmlSwitchEncoding(ctxt, enc);
8705
8706 if (input->filename == NULL)
8707 input->filename = (char *) xmlStrdup(SystemID);
8708 input->line = 1;
8709 input->col = 1;
8710 input->base = ctxt->input->cur;
8711 input->cur = ctxt->input->cur;
8712 input->free = NULL;
8713
8714 /*
8715 * let's parse that entity knowing it's an external subset.
8716 */
8717 ctxt->inSubset = 2;
8718 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8719 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8720 ExternalID, SystemID);
8721 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8722
8723 if (ctxt->myDoc != NULL) {
8724 if (ctxt->wellFormed) {
8725 ret = ctxt->myDoc->extSubset;
8726 ctxt->myDoc->extSubset = NULL;
8727 } else {
8728 ret = NULL;
8729 }
8730 xmlFreeDoc(ctxt->myDoc);
8731 ctxt->myDoc = NULL;
8732 }
8733 if (sax != NULL) ctxt->sax = NULL;
8734 xmlFreeParserCtxt(ctxt);
8735
8736 return(ret);
8737}
8738
8739/**
8740 * xmlParseDTD:
8741 * @ExternalID: a NAME* containing the External ID of the DTD
8742 * @SystemID: a NAME* containing the URL to the DTD
8743 *
8744 * Load and parse an external subset.
8745 *
8746 * Returns the resulting xmlDtdPtr or NULL in case of error.
8747 */
8748
8749xmlDtdPtr
8750xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8751 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8752}
8753
8754/************************************************************************
8755 * *
8756 * Front ends when parsing an Entity *
8757 * *
8758 ************************************************************************/
8759
8760/**
Owen Taylor3473f882001-02-23 17:55:21 +00008761 * xmlParseCtxtExternalEntity:
8762 * @ctx: the existing parsing context
8763 * @URL: the URL for the entity to load
8764 * @ID: the System ID for the entity to load
8765 * @list: the return value for the set of parsed nodes
8766 *
8767 * Parse an external general entity within an existing parsing context
8768 * An external general parsed entity is well-formed if it matches the
8769 * production labeled extParsedEnt.
8770 *
8771 * [78] extParsedEnt ::= TextDecl? content
8772 *
8773 * Returns 0 if the entity is well formed, -1 in case of args problem and
8774 * the parser error code otherwise
8775 */
8776
8777int
8778xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8779 const xmlChar *ID, xmlNodePtr *list) {
8780 xmlParserCtxtPtr ctxt;
8781 xmlDocPtr newDoc;
8782 xmlSAXHandlerPtr oldsax = NULL;
8783 int ret = 0;
8784
8785 if (ctx->depth > 40) {
8786 return(XML_ERR_ENTITY_LOOP);
8787 }
8788
8789 if (list != NULL)
8790 *list = NULL;
8791 if ((URL == NULL) && (ID == NULL))
8792 return(-1);
8793 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8794 return(-1);
8795
8796
8797 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8798 if (ctxt == NULL) return(-1);
8799 ctxt->userData = ctxt;
8800 oldsax = ctxt->sax;
8801 ctxt->sax = ctx->sax;
8802 newDoc = xmlNewDoc(BAD_CAST "1.0");
8803 if (newDoc == NULL) {
8804 xmlFreeParserCtxt(ctxt);
8805 return(-1);
8806 }
8807 if (ctx->myDoc != NULL) {
8808 newDoc->intSubset = ctx->myDoc->intSubset;
8809 newDoc->extSubset = ctx->myDoc->extSubset;
8810 }
8811 if (ctx->myDoc->URL != NULL) {
8812 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8813 }
8814 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8815 if (newDoc->children == NULL) {
8816 ctxt->sax = oldsax;
8817 xmlFreeParserCtxt(ctxt);
8818 newDoc->intSubset = NULL;
8819 newDoc->extSubset = NULL;
8820 xmlFreeDoc(newDoc);
8821 return(-1);
8822 }
8823 nodePush(ctxt, newDoc->children);
8824 if (ctx->myDoc == NULL) {
8825 ctxt->myDoc = newDoc;
8826 } else {
8827 ctxt->myDoc = ctx->myDoc;
8828 newDoc->children->doc = ctx->myDoc;
8829 }
8830
8831 /*
8832 * Parse a possible text declaration first
8833 */
8834 GROW;
8835 if ((RAW == '<') && (NXT(1) == '?') &&
8836 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8837 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8838 xmlParseTextDecl(ctxt);
8839 }
8840
8841 /*
8842 * Doing validity checking on chunk doesn't make sense
8843 */
8844 ctxt->instate = XML_PARSER_CONTENT;
8845 ctxt->validate = ctx->validate;
8846 ctxt->loadsubset = ctx->loadsubset;
8847 ctxt->depth = ctx->depth + 1;
8848 ctxt->replaceEntities = ctx->replaceEntities;
8849 if (ctxt->validate) {
8850 ctxt->vctxt.error = ctx->vctxt.error;
8851 ctxt->vctxt.warning = ctx->vctxt.warning;
8852 /* Allocate the Node stack */
8853 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8854 if (ctxt->vctxt.nodeTab == NULL) {
8855 xmlGenericError(xmlGenericErrorContext,
8856 "xmlParseCtxtExternalEntity: out of memory\n");
8857 ctxt->validate = 0;
8858 ctxt->vctxt.error = NULL;
8859 ctxt->vctxt.warning = NULL;
8860 } else {
8861 ctxt->vctxt.nodeNr = 0;
8862 ctxt->vctxt.nodeMax = 4;
8863 ctxt->vctxt.node = NULL;
8864 }
8865 } else {
8866 ctxt->vctxt.error = NULL;
8867 ctxt->vctxt.warning = NULL;
8868 }
8869
8870 xmlParseContent(ctxt);
8871
8872 if ((RAW == '<') && (NXT(1) == '/')) {
8873 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8875 ctxt->sax->error(ctxt->userData,
8876 "chunk is not well balanced\n");
8877 ctxt->wellFormed = 0;
8878 ctxt->disableSAX = 1;
8879 } else if (RAW != 0) {
8880 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8882 ctxt->sax->error(ctxt->userData,
8883 "extra content at the end of well balanced chunk\n");
8884 ctxt->wellFormed = 0;
8885 ctxt->disableSAX = 1;
8886 }
8887 if (ctxt->node != newDoc->children) {
8888 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8890 ctxt->sax->error(ctxt->userData,
8891 "chunk is not well balanced\n");
8892 ctxt->wellFormed = 0;
8893 ctxt->disableSAX = 1;
8894 }
8895
8896 if (!ctxt->wellFormed) {
8897 if (ctxt->errNo == 0)
8898 ret = 1;
8899 else
8900 ret = ctxt->errNo;
8901 } else {
8902 if (list != NULL) {
8903 xmlNodePtr cur;
8904
8905 /*
8906 * Return the newly created nodeset after unlinking it from
8907 * they pseudo parent.
8908 */
8909 cur = newDoc->children->children;
8910 *list = cur;
8911 while (cur != NULL) {
8912 cur->parent = NULL;
8913 cur = cur->next;
8914 }
8915 newDoc->children->children = NULL;
8916 }
8917 ret = 0;
8918 }
8919 ctxt->sax = oldsax;
8920 xmlFreeParserCtxt(ctxt);
8921 newDoc->intSubset = NULL;
8922 newDoc->extSubset = NULL;
8923 xmlFreeDoc(newDoc);
8924
8925 return(ret);
8926}
8927
8928/**
Daniel Veillard257d9102001-05-08 10:41:44 +00008929 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00008930 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00008931 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00008932 * @sax: the SAX handler bloc (possibly NULL)
8933 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8934 * @depth: Used for loop detection, use 0
8935 * @URL: the URL for the entity to load
8936 * @ID: the System ID for the entity to load
8937 * @list: the return value for the set of parsed nodes
8938 *
Daniel Veillard257d9102001-05-08 10:41:44 +00008939 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00008940 *
8941 * Returns 0 if the entity is well formed, -1 in case of args problem and
8942 * the parser error code otherwise
8943 */
8944
Daniel Veillard257d9102001-05-08 10:41:44 +00008945static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00008946xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
8947 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00008948 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00008949 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00008950 xmlParserCtxtPtr ctxt;
8951 xmlDocPtr newDoc;
8952 xmlSAXHandlerPtr oldsax = NULL;
8953 int ret = 0;
8954
8955 if (depth > 40) {
8956 return(XML_ERR_ENTITY_LOOP);
8957 }
8958
8959
8960
8961 if (list != NULL)
8962 *list = NULL;
8963 if ((URL == NULL) && (ID == NULL))
8964 return(-1);
8965 if (doc == NULL) /* @@ relax but check for dereferences */
8966 return(-1);
8967
8968
8969 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8970 if (ctxt == NULL) return(-1);
8971 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00008972 if (oldctxt != NULL) {
8973 ctxt->_private = oldctxt->_private;
8974 ctxt->loadsubset = oldctxt->loadsubset;
8975 ctxt->validate = oldctxt->validate;
8976 ctxt->external = oldctxt->external;
8977 } else {
8978 /*
8979 * Doing validity checking on chunk without context
8980 * doesn't make sense
8981 */
8982 ctxt->_private = NULL;
8983 ctxt->validate = 0;
8984 ctxt->external = 2;
8985 ctxt->loadsubset = 0;
8986 }
Owen Taylor3473f882001-02-23 17:55:21 +00008987 if (sax != NULL) {
8988 oldsax = ctxt->sax;
8989 ctxt->sax = sax;
8990 if (user_data != NULL)
8991 ctxt->userData = user_data;
8992 }
8993 newDoc = xmlNewDoc(BAD_CAST "1.0");
8994 if (newDoc == NULL) {
8995 xmlFreeParserCtxt(ctxt);
8996 return(-1);
8997 }
8998 if (doc != NULL) {
8999 newDoc->intSubset = doc->intSubset;
9000 newDoc->extSubset = doc->extSubset;
9001 }
9002 if (doc->URL != NULL) {
9003 newDoc->URL = xmlStrdup(doc->URL);
9004 }
9005 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9006 if (newDoc->children == NULL) {
9007 if (sax != NULL)
9008 ctxt->sax = oldsax;
9009 xmlFreeParserCtxt(ctxt);
9010 newDoc->intSubset = NULL;
9011 newDoc->extSubset = NULL;
9012 xmlFreeDoc(newDoc);
9013 return(-1);
9014 }
9015 nodePush(ctxt, newDoc->children);
9016 if (doc == NULL) {
9017 ctxt->myDoc = newDoc;
9018 } else {
9019 ctxt->myDoc = doc;
9020 newDoc->children->doc = doc;
9021 }
9022
9023 /*
9024 * Parse a possible text declaration first
9025 */
9026 GROW;
9027 if ((RAW == '<') && (NXT(1) == '?') &&
9028 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9029 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9030 xmlParseTextDecl(ctxt);
9031 }
9032
Owen Taylor3473f882001-02-23 17:55:21 +00009033 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009034 ctxt->depth = depth;
9035
9036 xmlParseContent(ctxt);
9037
9038 if ((RAW == '<') && (NXT(1) == '/')) {
9039 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9041 ctxt->sax->error(ctxt->userData,
9042 "chunk is not well balanced\n");
9043 ctxt->wellFormed = 0;
9044 ctxt->disableSAX = 1;
9045 } else if (RAW != 0) {
9046 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9048 ctxt->sax->error(ctxt->userData,
9049 "extra content at the end of well balanced chunk\n");
9050 ctxt->wellFormed = 0;
9051 ctxt->disableSAX = 1;
9052 }
9053 if (ctxt->node != newDoc->children) {
9054 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9056 ctxt->sax->error(ctxt->userData,
9057 "chunk is not well balanced\n");
9058 ctxt->wellFormed = 0;
9059 ctxt->disableSAX = 1;
9060 }
9061
9062 if (!ctxt->wellFormed) {
9063 if (ctxt->errNo == 0)
9064 ret = 1;
9065 else
9066 ret = ctxt->errNo;
9067 } else {
9068 if (list != NULL) {
9069 xmlNodePtr cur;
9070
9071 /*
9072 * Return the newly created nodeset after unlinking it from
9073 * they pseudo parent.
9074 */
9075 cur = newDoc->children->children;
9076 *list = cur;
9077 while (cur != NULL) {
9078 cur->parent = NULL;
9079 cur = cur->next;
9080 }
9081 newDoc->children->children = NULL;
9082 }
9083 ret = 0;
9084 }
9085 if (sax != NULL)
9086 ctxt->sax = oldsax;
9087 xmlFreeParserCtxt(ctxt);
9088 newDoc->intSubset = NULL;
9089 newDoc->extSubset = NULL;
9090 xmlFreeDoc(newDoc);
9091
9092 return(ret);
9093}
9094
9095/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009096 * xmlParseExternalEntity:
9097 * @doc: the document the chunk pertains to
9098 * @sax: the SAX handler bloc (possibly NULL)
9099 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9100 * @depth: Used for loop detection, use 0
9101 * @URL: the URL for the entity to load
9102 * @ID: the System ID for the entity to load
9103 * @list: the return value for the set of parsed nodes
9104 *
9105 * Parse an external general entity
9106 * An external general parsed entity is well-formed if it matches the
9107 * production labeled extParsedEnt.
9108 *
9109 * [78] extParsedEnt ::= TextDecl? content
9110 *
9111 * Returns 0 if the entity is well formed, -1 in case of args problem and
9112 * the parser error code otherwise
9113 */
9114
9115int
9116xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9117 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009118 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
9119 ID, list));
Daniel Veillard257d9102001-05-08 10:41:44 +00009120}
9121
9122/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009123 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009124 * @doc: the document the chunk pertains to
9125 * @sax: the SAX handler bloc (possibly NULL)
9126 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9127 * @depth: Used for loop detection, use 0
9128 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9129 * @list: the return value for the set of parsed nodes
9130 *
9131 * Parse a well-balanced chunk of an XML document
9132 * called by the parser
9133 * The allowed sequence for the Well Balanced Chunk is the one defined by
9134 * the content production in the XML grammar:
9135 *
9136 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9137 *
9138 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9139 * the parser error code otherwise
9140 */
9141
9142int
9143xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9144 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9145 xmlParserCtxtPtr ctxt;
9146 xmlDocPtr newDoc;
9147 xmlSAXHandlerPtr oldsax = NULL;
9148 int size;
9149 int ret = 0;
9150
9151 if (depth > 40) {
9152 return(XML_ERR_ENTITY_LOOP);
9153 }
9154
9155
9156 if (list != NULL)
9157 *list = NULL;
9158 if (string == NULL)
9159 return(-1);
9160
9161 size = xmlStrlen(string);
9162
9163 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9164 if (ctxt == NULL) return(-1);
9165 ctxt->userData = ctxt;
9166 if (sax != NULL) {
9167 oldsax = ctxt->sax;
9168 ctxt->sax = sax;
9169 if (user_data != NULL)
9170 ctxt->userData = user_data;
9171 }
9172 newDoc = xmlNewDoc(BAD_CAST "1.0");
9173 if (newDoc == NULL) {
9174 xmlFreeParserCtxt(ctxt);
9175 return(-1);
9176 }
9177 if (doc != NULL) {
9178 newDoc->intSubset = doc->intSubset;
9179 newDoc->extSubset = doc->extSubset;
9180 }
9181 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9182 if (newDoc->children == NULL) {
9183 if (sax != NULL)
9184 ctxt->sax = oldsax;
9185 xmlFreeParserCtxt(ctxt);
9186 newDoc->intSubset = NULL;
9187 newDoc->extSubset = NULL;
9188 xmlFreeDoc(newDoc);
9189 return(-1);
9190 }
9191 nodePush(ctxt, newDoc->children);
9192 if (doc == NULL) {
9193 ctxt->myDoc = newDoc;
9194 } else {
9195 ctxt->myDoc = doc;
9196 newDoc->children->doc = doc;
9197 }
9198 ctxt->instate = XML_PARSER_CONTENT;
9199 ctxt->depth = depth;
9200
9201 /*
9202 * Doing validity checking on chunk doesn't make sense
9203 */
9204 ctxt->validate = 0;
9205 ctxt->loadsubset = 0;
9206
9207 xmlParseContent(ctxt);
9208
9209 if ((RAW == '<') && (NXT(1) == '/')) {
9210 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9212 ctxt->sax->error(ctxt->userData,
9213 "chunk is not well balanced\n");
9214 ctxt->wellFormed = 0;
9215 ctxt->disableSAX = 1;
9216 } else if (RAW != 0) {
9217 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9219 ctxt->sax->error(ctxt->userData,
9220 "extra content at the end of well balanced chunk\n");
9221 ctxt->wellFormed = 0;
9222 ctxt->disableSAX = 1;
9223 }
9224 if (ctxt->node != newDoc->children) {
9225 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9227 ctxt->sax->error(ctxt->userData,
9228 "chunk is not well balanced\n");
9229 ctxt->wellFormed = 0;
9230 ctxt->disableSAX = 1;
9231 }
9232
9233 if (!ctxt->wellFormed) {
9234 if (ctxt->errNo == 0)
9235 ret = 1;
9236 else
9237 ret = ctxt->errNo;
9238 } else {
9239 if (list != NULL) {
9240 xmlNodePtr cur;
9241
9242 /*
9243 * Return the newly created nodeset after unlinking it from
9244 * they pseudo parent.
9245 */
9246 cur = newDoc->children->children;
9247 *list = cur;
9248 while (cur != NULL) {
9249 cur->parent = NULL;
9250 cur = cur->next;
9251 }
9252 newDoc->children->children = NULL;
9253 }
9254 ret = 0;
9255 }
9256 if (sax != NULL)
9257 ctxt->sax = oldsax;
9258 xmlFreeParserCtxt(ctxt);
9259 newDoc->intSubset = NULL;
9260 newDoc->extSubset = NULL;
9261 xmlFreeDoc(newDoc);
9262
9263 return(ret);
9264}
9265
9266/**
9267 * xmlSAXParseEntity:
9268 * @sax: the SAX handler block
9269 * @filename: the filename
9270 *
9271 * parse an XML external entity out of context and build a tree.
9272 * It use the given SAX function block to handle the parsing callback.
9273 * If sax is NULL, fallback to the default DOM tree building routines.
9274 *
9275 * [78] extParsedEnt ::= TextDecl? content
9276 *
9277 * This correspond to a "Well Balanced" chunk
9278 *
9279 * Returns the resulting document tree
9280 */
9281
9282xmlDocPtr
9283xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9284 xmlDocPtr ret;
9285 xmlParserCtxtPtr ctxt;
9286 char *directory = NULL;
9287
9288 ctxt = xmlCreateFileParserCtxt(filename);
9289 if (ctxt == NULL) {
9290 return(NULL);
9291 }
9292 if (sax != NULL) {
9293 if (ctxt->sax != NULL)
9294 xmlFree(ctxt->sax);
9295 ctxt->sax = sax;
9296 ctxt->userData = NULL;
9297 }
9298
9299 if ((ctxt->directory == NULL) && (directory == NULL))
9300 directory = xmlParserGetDirectory(filename);
9301
9302 xmlParseExtParsedEnt(ctxt);
9303
9304 if (ctxt->wellFormed)
9305 ret = ctxt->myDoc;
9306 else {
9307 ret = NULL;
9308 xmlFreeDoc(ctxt->myDoc);
9309 ctxt->myDoc = NULL;
9310 }
9311 if (sax != NULL)
9312 ctxt->sax = NULL;
9313 xmlFreeParserCtxt(ctxt);
9314
9315 return(ret);
9316}
9317
9318/**
9319 * xmlParseEntity:
9320 * @filename: the filename
9321 *
9322 * parse an XML external entity out of context and build a tree.
9323 *
9324 * [78] extParsedEnt ::= TextDecl? content
9325 *
9326 * This correspond to a "Well Balanced" chunk
9327 *
9328 * Returns the resulting document tree
9329 */
9330
9331xmlDocPtr
9332xmlParseEntity(const char *filename) {
9333 return(xmlSAXParseEntity(NULL, filename));
9334}
9335
9336/**
9337 * xmlCreateEntityParserCtxt:
9338 * @URL: the entity URL
9339 * @ID: the entity PUBLIC ID
9340 * @base: a posible base for the target URI
9341 *
9342 * Create a parser context for an external entity
9343 * Automatic support for ZLIB/Compress compressed document is provided
9344 * by default if found at compile-time.
9345 *
9346 * Returns the new parser context or NULL
9347 */
9348xmlParserCtxtPtr
9349xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9350 const xmlChar *base) {
9351 xmlParserCtxtPtr ctxt;
9352 xmlParserInputPtr inputStream;
9353 char *directory = NULL;
9354 xmlChar *uri;
9355
9356 ctxt = xmlNewParserCtxt();
9357 if (ctxt == NULL) {
9358 return(NULL);
9359 }
9360
9361 uri = xmlBuildURI(URL, base);
9362
9363 if (uri == NULL) {
9364 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9365 if (inputStream == NULL) {
9366 xmlFreeParserCtxt(ctxt);
9367 return(NULL);
9368 }
9369
9370 inputPush(ctxt, inputStream);
9371
9372 if ((ctxt->directory == NULL) && (directory == NULL))
9373 directory = xmlParserGetDirectory((char *)URL);
9374 if ((ctxt->directory == NULL) && (directory != NULL))
9375 ctxt->directory = directory;
9376 } else {
9377 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9378 if (inputStream == NULL) {
9379 xmlFree(uri);
9380 xmlFreeParserCtxt(ctxt);
9381 return(NULL);
9382 }
9383
9384 inputPush(ctxt, inputStream);
9385
9386 if ((ctxt->directory == NULL) && (directory == NULL))
9387 directory = xmlParserGetDirectory((char *)uri);
9388 if ((ctxt->directory == NULL) && (directory != NULL))
9389 ctxt->directory = directory;
9390 xmlFree(uri);
9391 }
9392
9393 return(ctxt);
9394}
9395
9396/************************************************************************
9397 * *
9398 * Front ends when parsing from a file *
9399 * *
9400 ************************************************************************/
9401
9402/**
9403 * xmlCreateFileParserCtxt:
9404 * @filename: the filename
9405 *
9406 * Create a parser context for a file content.
9407 * Automatic support for ZLIB/Compress compressed document is provided
9408 * by default if found at compile-time.
9409 *
9410 * Returns the new parser context or NULL
9411 */
9412xmlParserCtxtPtr
9413xmlCreateFileParserCtxt(const char *filename)
9414{
9415 xmlParserCtxtPtr ctxt;
9416 xmlParserInputPtr inputStream;
9417 xmlParserInputBufferPtr buf;
9418 char *directory = NULL;
9419
9420 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9421 if (buf == NULL) {
9422 return(NULL);
9423 }
9424
9425 ctxt = xmlNewParserCtxt();
9426 if (ctxt == NULL) {
9427 if (xmlDefaultSAXHandler.error != NULL) {
9428 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9429 }
9430 return(NULL);
9431 }
9432
9433 inputStream = xmlNewInputStream(ctxt);
9434 if (inputStream == NULL) {
9435 xmlFreeParserCtxt(ctxt);
9436 return(NULL);
9437 }
9438
9439 inputStream->filename = xmlMemStrdup(filename);
9440 inputStream->buf = buf;
9441 inputStream->base = inputStream->buf->buffer->content;
9442 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009443 inputStream->end =
9444 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009445
9446 inputPush(ctxt, inputStream);
9447 if ((ctxt->directory == NULL) && (directory == NULL))
9448 directory = xmlParserGetDirectory(filename);
9449 if ((ctxt->directory == NULL) && (directory != NULL))
9450 ctxt->directory = directory;
9451
9452 return(ctxt);
9453}
9454
9455/**
9456 * xmlSAXParseFile:
9457 * @sax: the SAX handler block
9458 * @filename: the filename
9459 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9460 * documents
9461 *
9462 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9463 * compressed document is provided by default if found at compile-time.
9464 * It use the given SAX function block to handle the parsing callback.
9465 * If sax is NULL, fallback to the default DOM tree building routines.
9466 *
9467 * Returns the resulting document tree
9468 */
9469
9470xmlDocPtr
9471xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9472 int recovery) {
9473 xmlDocPtr ret;
9474 xmlParserCtxtPtr ctxt;
9475 char *directory = NULL;
9476
9477 ctxt = xmlCreateFileParserCtxt(filename);
9478 if (ctxt == NULL) {
9479 return(NULL);
9480 }
9481 if (sax != NULL) {
9482 if (ctxt->sax != NULL)
9483 xmlFree(ctxt->sax);
9484 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009485 }
9486
9487 if ((ctxt->directory == NULL) && (directory == NULL))
9488 directory = xmlParserGetDirectory(filename);
9489 if ((ctxt->directory == NULL) && (directory != NULL))
9490 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9491
9492 xmlParseDocument(ctxt);
9493
9494 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9495 else {
9496 ret = NULL;
9497 xmlFreeDoc(ctxt->myDoc);
9498 ctxt->myDoc = NULL;
9499 }
9500 if (sax != NULL)
9501 ctxt->sax = NULL;
9502 xmlFreeParserCtxt(ctxt);
9503
9504 return(ret);
9505}
9506
9507/**
9508 * xmlRecoverDoc:
9509 * @cur: a pointer to an array of xmlChar
9510 *
9511 * parse an XML in-memory document and build a tree.
9512 * In the case the document is not Well Formed, a tree is built anyway
9513 *
9514 * Returns the resulting document tree
9515 */
9516
9517xmlDocPtr
9518xmlRecoverDoc(xmlChar *cur) {
9519 return(xmlSAXParseDoc(NULL, cur, 1));
9520}
9521
9522/**
9523 * xmlParseFile:
9524 * @filename: the filename
9525 *
9526 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9527 * compressed document is provided by default if found at compile-time.
9528 *
9529 * Returns the resulting document tree
9530 */
9531
9532xmlDocPtr
9533xmlParseFile(const char *filename) {
9534 return(xmlSAXParseFile(NULL, filename, 0));
9535}
9536
9537/**
9538 * xmlRecoverFile:
9539 * @filename: the filename
9540 *
9541 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9542 * compressed document is provided by default if found at compile-time.
9543 * In the case the document is not Well Formed, a tree is built anyway
9544 *
9545 * Returns the resulting document tree
9546 */
9547
9548xmlDocPtr
9549xmlRecoverFile(const char *filename) {
9550 return(xmlSAXParseFile(NULL, filename, 1));
9551}
9552
9553
9554/**
9555 * xmlSetupParserForBuffer:
9556 * @ctxt: an XML parser context
9557 * @buffer: a xmlChar * buffer
9558 * @filename: a file name
9559 *
9560 * Setup the parser context to parse a new buffer; Clears any prior
9561 * contents from the parser context. The buffer parameter must not be
9562 * NULL, but the filename parameter can be
9563 */
9564void
9565xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9566 const char* filename)
9567{
9568 xmlParserInputPtr input;
9569
9570 input = xmlNewInputStream(ctxt);
9571 if (input == NULL) {
9572 perror("malloc");
9573 xmlFree(ctxt);
9574 return;
9575 }
9576
9577 xmlClearParserCtxt(ctxt);
9578 if (filename != NULL)
9579 input->filename = xmlMemStrdup(filename);
9580 input->base = buffer;
9581 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009582 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009583 inputPush(ctxt, input);
9584}
9585
9586/**
9587 * xmlSAXUserParseFile:
9588 * @sax: a SAX handler
9589 * @user_data: The user data returned on SAX callbacks
9590 * @filename: a file name
9591 *
9592 * parse an XML file and call the given SAX handler routines.
9593 * Automatic support for ZLIB/Compress compressed document is provided
9594 *
9595 * Returns 0 in case of success or a error number otherwise
9596 */
9597int
9598xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9599 const char *filename) {
9600 int ret = 0;
9601 xmlParserCtxtPtr ctxt;
9602
9603 ctxt = xmlCreateFileParserCtxt(filename);
9604 if (ctxt == NULL) return -1;
9605 if (ctxt->sax != &xmlDefaultSAXHandler)
9606 xmlFree(ctxt->sax);
9607 ctxt->sax = sax;
9608 if (user_data != NULL)
9609 ctxt->userData = user_data;
9610
9611 xmlParseDocument(ctxt);
9612
9613 if (ctxt->wellFormed)
9614 ret = 0;
9615 else {
9616 if (ctxt->errNo != 0)
9617 ret = ctxt->errNo;
9618 else
9619 ret = -1;
9620 }
9621 if (sax != NULL)
9622 ctxt->sax = NULL;
9623 xmlFreeParserCtxt(ctxt);
9624
9625 return ret;
9626}
9627
9628/************************************************************************
9629 * *
9630 * Front ends when parsing from memory *
9631 * *
9632 ************************************************************************/
9633
9634/**
9635 * xmlCreateMemoryParserCtxt:
9636 * @buffer: a pointer to a char array
9637 * @size: the size of the array
9638 *
9639 * Create a parser context for an XML in-memory document.
9640 *
9641 * Returns the new parser context or NULL
9642 */
9643xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009644xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009645 xmlParserCtxtPtr ctxt;
9646 xmlParserInputPtr input;
9647 xmlParserInputBufferPtr buf;
9648
9649 if (buffer == NULL)
9650 return(NULL);
9651 if (size <= 0)
9652 return(NULL);
9653
9654 ctxt = xmlNewParserCtxt();
9655 if (ctxt == NULL)
9656 return(NULL);
9657
9658 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9659 if (buf == NULL) return(NULL);
9660
9661 input = xmlNewInputStream(ctxt);
9662 if (input == NULL) {
9663 xmlFreeParserCtxt(ctxt);
9664 return(NULL);
9665 }
9666
9667 input->filename = NULL;
9668 input->buf = buf;
9669 input->base = input->buf->buffer->content;
9670 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009671 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009672
9673 inputPush(ctxt, input);
9674 return(ctxt);
9675}
9676
9677/**
9678 * xmlSAXParseMemory:
9679 * @sax: the SAX handler block
9680 * @buffer: an pointer to a char array
9681 * @size: the size of the array
9682 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9683 * documents
9684 *
9685 * parse an XML in-memory block and use the given SAX function block
9686 * to handle the parsing callback. If sax is NULL, fallback to the default
9687 * DOM tree building routines.
9688 *
9689 * Returns the resulting document tree
9690 */
9691xmlDocPtr
9692xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9693 xmlDocPtr ret;
9694 xmlParserCtxtPtr ctxt;
9695
9696 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9697 if (ctxt == NULL) return(NULL);
9698 if (sax != NULL) {
9699 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009700 }
9701
9702 xmlParseDocument(ctxt);
9703
9704 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9705 else {
9706 ret = NULL;
9707 xmlFreeDoc(ctxt->myDoc);
9708 ctxt->myDoc = NULL;
9709 }
9710 if (sax != NULL)
9711 ctxt->sax = NULL;
9712 xmlFreeParserCtxt(ctxt);
9713
9714 return(ret);
9715}
9716
9717/**
9718 * xmlParseMemory:
9719 * @buffer: an pointer to a char array
9720 * @size: the size of the array
9721 *
9722 * parse an XML in-memory block and build a tree.
9723 *
9724 * Returns the resulting document tree
9725 */
9726
9727xmlDocPtr xmlParseMemory(char *buffer, int size) {
9728 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9729}
9730
9731/**
9732 * xmlRecoverMemory:
9733 * @buffer: an pointer to a char array
9734 * @size: the size of the array
9735 *
9736 * parse an XML in-memory block and build a tree.
9737 * In the case the document is not Well Formed, a tree is built anyway
9738 *
9739 * Returns the resulting document tree
9740 */
9741
9742xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9743 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9744}
9745
9746/**
9747 * xmlSAXUserParseMemory:
9748 * @sax: a SAX handler
9749 * @user_data: The user data returned on SAX callbacks
9750 * @buffer: an in-memory XML document input
9751 * @size: the length of the XML document in bytes
9752 *
9753 * A better SAX parsing routine.
9754 * parse an XML in-memory buffer and call the given SAX handler routines.
9755 *
9756 * Returns 0 in case of success or a error number otherwise
9757 */
9758int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009759 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009760 int ret = 0;
9761 xmlParserCtxtPtr ctxt;
9762 xmlSAXHandlerPtr oldsax = NULL;
9763
9764 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9765 if (ctxt == NULL) return -1;
9766 if (sax != NULL) {
9767 oldsax = ctxt->sax;
9768 ctxt->sax = sax;
9769 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009770 if (user_data != NULL)
9771 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009772
9773 xmlParseDocument(ctxt);
9774
9775 if (ctxt->wellFormed)
9776 ret = 0;
9777 else {
9778 if (ctxt->errNo != 0)
9779 ret = ctxt->errNo;
9780 else
9781 ret = -1;
9782 }
9783 if (sax != NULL) {
9784 ctxt->sax = oldsax;
9785 }
9786 xmlFreeParserCtxt(ctxt);
9787
9788 return ret;
9789}
9790
9791/**
9792 * xmlCreateDocParserCtxt:
9793 * @cur: a pointer to an array of xmlChar
9794 *
9795 * Creates a parser context for an XML in-memory document.
9796 *
9797 * Returns the new parser context or NULL
9798 */
9799xmlParserCtxtPtr
9800xmlCreateDocParserCtxt(xmlChar *cur) {
9801 int len;
9802
9803 if (cur == NULL)
9804 return(NULL);
9805 len = xmlStrlen(cur);
9806 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9807}
9808
9809/**
9810 * xmlSAXParseDoc:
9811 * @sax: the SAX handler block
9812 * @cur: a pointer to an array of xmlChar
9813 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9814 * documents
9815 *
9816 * parse an XML in-memory document and build a tree.
9817 * It use the given SAX function block to handle the parsing callback.
9818 * If sax is NULL, fallback to the default DOM tree building routines.
9819 *
9820 * Returns the resulting document tree
9821 */
9822
9823xmlDocPtr
9824xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9825 xmlDocPtr ret;
9826 xmlParserCtxtPtr ctxt;
9827
9828 if (cur == NULL) return(NULL);
9829
9830
9831 ctxt = xmlCreateDocParserCtxt(cur);
9832 if (ctxt == NULL) return(NULL);
9833 if (sax != NULL) {
9834 ctxt->sax = sax;
9835 ctxt->userData = NULL;
9836 }
9837
9838 xmlParseDocument(ctxt);
9839 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9840 else {
9841 ret = NULL;
9842 xmlFreeDoc(ctxt->myDoc);
9843 ctxt->myDoc = NULL;
9844 }
9845 if (sax != NULL)
9846 ctxt->sax = NULL;
9847 xmlFreeParserCtxt(ctxt);
9848
9849 return(ret);
9850}
9851
9852/**
9853 * xmlParseDoc:
9854 * @cur: a pointer to an array of xmlChar
9855 *
9856 * parse an XML in-memory document and build a tree.
9857 *
9858 * Returns the resulting document tree
9859 */
9860
9861xmlDocPtr
9862xmlParseDoc(xmlChar *cur) {
9863 return(xmlSAXParseDoc(NULL, cur, 0));
9864}
9865
9866
9867/************************************************************************
9868 * *
9869 * Miscellaneous *
9870 * *
9871 ************************************************************************/
9872
9873#ifdef LIBXML_XPATH_ENABLED
9874#include <libxml/xpath.h>
9875#endif
9876
9877static int xmlParserInitialized = 0;
9878
9879/**
9880 * xmlInitParser:
9881 *
9882 * Initialization function for the XML parser.
9883 * This is not reentrant. Call once before processing in case of
9884 * use in multithreaded programs.
9885 */
9886
9887void
9888xmlInitParser(void) {
9889 if (xmlParserInitialized) return;
9890
9891 xmlInitCharEncodingHandlers();
9892 xmlInitializePredefinedEntities();
9893 xmlDefaultSAXHandlerInit();
9894 xmlRegisterDefaultInputCallbacks();
9895 xmlRegisterDefaultOutputCallbacks();
9896#ifdef LIBXML_HTML_ENABLED
9897 htmlInitAutoClose();
9898 htmlDefaultSAXHandlerInit();
9899#endif
9900#ifdef LIBXML_XPATH_ENABLED
9901 xmlXPathInit();
9902#endif
9903 xmlParserInitialized = 1;
9904}
9905
9906/**
9907 * xmlCleanupParser:
9908 *
9909 * Cleanup function for the XML parser. It tries to reclaim all
9910 * parsing related global memory allocated for the parser processing.
9911 * It doesn't deallocate any document related memory. Calling this
9912 * function should not prevent reusing the parser.
9913 */
9914
9915void
9916xmlCleanupParser(void) {
9917 xmlParserInitialized = 0;
9918 xmlCleanupCharEncodingHandlers();
9919 xmlCleanupPredefinedEntities();
9920}
9921
9922/**
9923 * xmlPedanticParserDefault:
9924 * @val: int 0 or 1
9925 *
9926 * Set and return the previous value for enabling pedantic warnings.
9927 *
9928 * Returns the last value for 0 for no substitution, 1 for substitution.
9929 */
9930
9931int
9932xmlPedanticParserDefault(int val) {
9933 int old = xmlPedanticParserDefaultValue;
9934
9935 xmlPedanticParserDefaultValue = val;
9936 return(old);
9937}
9938
9939/**
9940 * xmlSubstituteEntitiesDefault:
9941 * @val: int 0 or 1
9942 *
9943 * Set and return the previous value for default entity support.
9944 * Initially the parser always keep entity references instead of substituting
9945 * entity values in the output. This function has to be used to change the
9946 * default parser behaviour
9947 * SAX::subtituteEntities() has to be used for changing that on a file by
9948 * file basis.
9949 *
9950 * Returns the last value for 0 for no substitution, 1 for substitution.
9951 */
9952
9953int
9954xmlSubstituteEntitiesDefault(int val) {
9955 int old = xmlSubstituteEntitiesDefaultValue;
9956
9957 xmlSubstituteEntitiesDefaultValue = val;
9958 return(old);
9959}
9960
9961/**
9962 * xmlKeepBlanksDefault:
9963 * @val: int 0 or 1
9964 *
9965 * Set and return the previous value for default blanks text nodes support.
9966 * The 1.x version of the parser used an heuristic to try to detect
9967 * ignorable white spaces. As a result the SAX callback was generating
9968 * ignorableWhitespace() callbacks instead of characters() one, and when
9969 * using the DOM output text nodes containing those blanks were not generated.
9970 * The 2.x and later version will switch to the XML standard way and
9971 * ignorableWhitespace() are only generated when running the parser in
9972 * validating mode and when the current element doesn't allow CDATA or
9973 * mixed content.
9974 * This function is provided as a way to force the standard behaviour
9975 * on 1.X libs and to switch back to the old mode for compatibility when
9976 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9977 * by using xmlIsBlankNode() commodity function to detect the "empty"
9978 * nodes generated.
9979 * This value also affect autogeneration of indentation when saving code
9980 * if blanks sections are kept, indentation is not generated.
9981 *
9982 * Returns the last value for 0 for no substitution, 1 for substitution.
9983 */
9984
9985int
9986xmlKeepBlanksDefault(int val) {
9987 int old = xmlKeepBlanksDefaultValue;
9988
9989 xmlKeepBlanksDefaultValue = val;
9990 xmlIndentTreeOutput = !val;
9991 return(old);
9992}
9993