blob: 615e293c2eb93f220e527593679068ab9bb18c15 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
83int xmlGetWarningsDefaultValue = 1;
84int xmlParserDebugEntities = 0;
85#ifdef VMS
86int xmlSubstituteEntitiesDefaultVal = 0;
87#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
88int xmlDoValidityCheckingDefaultVal = 0;
89#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
90#else
91int xmlSubstituteEntitiesDefaultValue = 0;
92int xmlDoValidityCheckingDefaultValue = 0;
93#endif
94int xmlLoadExtDtdDefaultValue = 0;
95int xmlPedanticParserDefaultValue = 0;
96int xmlKeepBlanksDefaultValue = 1;
97
98/*
99 * List of XML prefixed PI allowed by W3C specs
100 */
101
102const char *xmlW3CPIs[] = {
103 "xml-stylesheet",
104 NULL
105};
106
107/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
108void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110 const xmlChar **str);
111
Daniel Veillard257d9102001-05-08 10:41:44 +0000112static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000113xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
114 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000115 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000116 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000117
118/************************************************************************
119 * *
120 * Parser stacks related functions and macros *
121 * *
122 ************************************************************************/
123
124xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
125 const xmlChar ** str);
126
127/*
128 * Generic function for accessing stacks in the Parser Context
129 */
130
131#define PUSH_AND_POP(scope, type, name) \
132scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
133 if (ctxt->name##Nr >= ctxt->name##Max) { \
134 ctxt->name##Max *= 2; \
135 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
136 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
137 if (ctxt->name##Tab == NULL) { \
138 xmlGenericError(xmlGenericErrorContext, \
139 "realloc failed !\n"); \
140 return(0); \
141 } \
142 } \
143 ctxt->name##Tab[ctxt->name##Nr] = value; \
144 ctxt->name = value; \
145 return(ctxt->name##Nr++); \
146} \
147scope type name##Pop(xmlParserCtxtPtr ctxt) { \
148 type ret; \
149 if (ctxt->name##Nr <= 0) return(0); \
150 ctxt->name##Nr--; \
151 if (ctxt->name##Nr > 0) \
152 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
153 else \
154 ctxt->name = NULL; \
155 ret = ctxt->name##Tab[ctxt->name##Nr]; \
156 ctxt->name##Tab[ctxt->name##Nr] = 0; \
157 return(ret); \
158} \
159
160/*
161 * Those macros actually generate the functions
162 */
163PUSH_AND_POP(extern, xmlParserInputPtr, input)
164PUSH_AND_POP(extern, xmlNodePtr, node)
165PUSH_AND_POP(extern, xmlChar*, name)
166
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000167static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000168 if (ctxt->spaceNr >= ctxt->spaceMax) {
169 ctxt->spaceMax *= 2;
170 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
171 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
172 if (ctxt->spaceTab == NULL) {
173 xmlGenericError(xmlGenericErrorContext,
174 "realloc failed !\n");
175 return(0);
176 }
177 }
178 ctxt->spaceTab[ctxt->spaceNr] = val;
179 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
180 return(ctxt->spaceNr++);
181}
182
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000183static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000184 int ret;
185 if (ctxt->spaceNr <= 0) return(0);
186 ctxt->spaceNr--;
187 if (ctxt->spaceNr > 0)
188 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
189 else
190 ctxt->space = NULL;
191 ret = ctxt->spaceTab[ctxt->spaceNr];
192 ctxt->spaceTab[ctxt->spaceNr] = -1;
193 return(ret);
194}
195
196/*
197 * Macros for accessing the content. Those should be used only by the parser,
198 * and not exported.
199 *
200 * Dirty macros, i.e. one often need to make assumption on the context to
201 * use them
202 *
203 * CUR_PTR return the current pointer to the xmlChar to be parsed.
204 * To be used with extreme caution since operations consuming
205 * characters may move the input buffer to a different location !
206 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
207 * This should be used internally by the parser
208 * only to compare to ASCII values otherwise it would break when
209 * running with UTF-8 encoding.
210 * RAW same as CUR but in the input buffer, bypass any token
211 * extraction that may have been done
212 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
213 * to compare on ASCII based substring.
214 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
215 * strings within the parser.
216 *
217 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
218 *
219 * NEXT Skip to the next character, this does the proper decoding
220 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
221 * NEXTL(l) Skip l xmlChars in the input buffer
222 * CUR_CHAR(l) returns the current unicode character (int), set l
223 * to the number of xmlChars used for the encoding [0-5].
224 * CUR_SCHAR same but operate on a string instead of the context
225 * COPY_BUF copy the current unicode char to the target buffer, increment
226 * the index
227 * GROW, SHRINK handling of input buffers
228 */
229
230#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
231#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
232#define NXT(val) ctxt->input->cur[(val)]
233#define CUR_PTR ctxt->input->cur
234
235#define SKIP(val) do { \
236 ctxt->nbChars += (val),ctxt->input->cur += (val); \
237 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000238 if ((*ctxt->input->cur == 0) && \
239 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
240 xmlPopInput(ctxt); \
241 } while (0)
242
Daniel Veillard48b2f892001-02-25 16:11:03 +0000243#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000244 xmlParserInputShrink(ctxt->input); \
245 if ((*ctxt->input->cur == 0) && \
246 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
247 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000248 }
Owen Taylor3473f882001-02-23 17:55:21 +0000249
Daniel Veillard48b2f892001-02-25 16:11:03 +0000250#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000251 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
252 if ((*ctxt->input->cur == 0) && \
253 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
254 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000255 }
Owen Taylor3473f882001-02-23 17:55:21 +0000256
257#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
258
259#define NEXT xmlNextChar(ctxt)
260
Daniel Veillard21a0f912001-02-25 19:54:14 +0000261#define NEXT1 { \
262 ctxt->input->cur++; \
263 ctxt->nbChars++; \
264 if (*ctxt->input->cur == 0) \
265 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
266 }
267
Owen Taylor3473f882001-02-23 17:55:21 +0000268#define NEXTL(l) do { \
269 if (*(ctxt->input->cur) == '\n') { \
270 ctxt->input->line++; ctxt->input->col = 1; \
271 } else ctxt->input->col++; \
272 ctxt->token = 0; ctxt->input->cur += l; \
273 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000274 } while (0)
275
276#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
277#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
278
279#define COPY_BUF(l,b,i,v) \
280 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000281 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000282
283/**
284 * xmlSkipBlankChars:
285 * @ctxt: the XML parser context
286 *
287 * skip all blanks character found at that point in the input streams.
288 * It pops up finished entities in the process if allowable at that point.
289 *
290 * Returns the number of space chars skipped
291 */
292
293int
294xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000295 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000296
Daniel Veillard02141ea2001-04-30 11:46:40 +0000297 if (ctxt->token != 0) {
298 if (!IS_BLANK(ctxt->token))
299 return(0);
300 ctxt->token = 0;
301 res++;
302 }
Owen Taylor3473f882001-02-23 17:55:21 +0000303 /*
304 * It's Okay to use CUR/NEXT here since all the blanks are on
305 * the ASCII range.
306 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000307 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
308 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000309 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000310 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000311 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000312 cur = ctxt->input->cur;
313 while (IS_BLANK(*cur)) {
314 if (*cur == '\n') {
315 ctxt->input->line++; ctxt->input->col = 1;
316 }
317 cur++;
318 res++;
319 if (*cur == 0) {
320 ctxt->input->cur = cur;
321 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
322 cur = ctxt->input->cur;
323 }
324 }
325 ctxt->input->cur = cur;
326 } else {
327 int cur;
328 do {
329 cur = CUR;
330 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
331 NEXT;
332 cur = CUR;
333 res++;
334 }
335 while ((cur == 0) && (ctxt->inputNr > 1) &&
336 (ctxt->instate != XML_PARSER_COMMENT)) {
337 xmlPopInput(ctxt);
338 cur = CUR;
339 }
340 /*
341 * Need to handle support of entities branching here
342 */
343 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
344 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
345 }
Owen Taylor3473f882001-02-23 17:55:21 +0000346 return(res);
347}
348
349/************************************************************************
350 * *
351 * Commodity functions to handle entities *
352 * *
353 ************************************************************************/
354
355/**
356 * xmlPopInput:
357 * @ctxt: an XML parser context
358 *
359 * xmlPopInput: the current input pointed by ctxt->input came to an end
360 * pop it and return the next char.
361 *
362 * Returns the current xmlChar in the parser context
363 */
364xmlChar
365xmlPopInput(xmlParserCtxtPtr ctxt) {
366 if (ctxt->inputNr == 1) return(0); /* End of main Input */
367 if (xmlParserDebugEntities)
368 xmlGenericError(xmlGenericErrorContext,
369 "Popping input %d\n", ctxt->inputNr);
370 xmlFreeInputStream(inputPop(ctxt));
371 if ((*ctxt->input->cur == 0) &&
372 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
373 return(xmlPopInput(ctxt));
374 return(CUR);
375}
376
377/**
378 * xmlPushInput:
379 * @ctxt: an XML parser context
380 * @input: an XML parser input fragment (entity, XML fragment ...).
381 *
382 * xmlPushInput: switch to a new input stream which is stacked on top
383 * of the previous one(s).
384 */
385void
386xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
387 if (input == NULL) return;
388
389 if (xmlParserDebugEntities) {
390 if ((ctxt->input != NULL) && (ctxt->input->filename))
391 xmlGenericError(xmlGenericErrorContext,
392 "%s(%d): ", ctxt->input->filename,
393 ctxt->input->line);
394 xmlGenericError(xmlGenericErrorContext,
395 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
396 }
397 inputPush(ctxt, input);
398 GROW;
399}
400
401/**
402 * xmlParseCharRef:
403 * @ctxt: an XML parser context
404 *
405 * parse Reference declarations
406 *
407 * [66] CharRef ::= '&#' [0-9]+ ';' |
408 * '&#x' [0-9a-fA-F]+ ';'
409 *
410 * [ WFC: Legal Character ]
411 * Characters referred to using character references must match the
412 * production for Char.
413 *
414 * Returns the value parsed (as an int), 0 in case of error
415 */
416int
417xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000418 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000419 int count = 0;
420
421 if (ctxt->token != 0) {
422 val = ctxt->token;
423 ctxt->token = 0;
424 return(val);
425 }
426 /*
427 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
428 */
429 if ((RAW == '&') && (NXT(1) == '#') &&
430 (NXT(2) == 'x')) {
431 SKIP(3);
432 GROW;
433 while (RAW != ';') { /* loop blocked by count */
434 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
435 val = val * 16 + (CUR - '0');
436 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
437 val = val * 16 + (CUR - 'a') + 10;
438 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
439 val = val * 16 + (CUR - 'A') + 10;
440 else {
441 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
443 ctxt->sax->error(ctxt->userData,
444 "xmlParseCharRef: invalid hexadecimal value\n");
445 ctxt->wellFormed = 0;
446 ctxt->disableSAX = 1;
447 val = 0;
448 break;
449 }
450 NEXT;
451 count++;
452 }
453 if (RAW == ';') {
454 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
455 ctxt->nbChars ++;
456 ctxt->input->cur++;
457 }
458 } else if ((RAW == '&') && (NXT(1) == '#')) {
459 SKIP(2);
460 GROW;
461 while (RAW != ';') { /* loop blocked by count */
462 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
463 val = val * 10 + (CUR - '0');
464 else {
465 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
467 ctxt->sax->error(ctxt->userData,
468 "xmlParseCharRef: invalid decimal value\n");
469 ctxt->wellFormed = 0;
470 ctxt->disableSAX = 1;
471 val = 0;
472 break;
473 }
474 NEXT;
475 count++;
476 }
477 if (RAW == ';') {
478 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
479 ctxt->nbChars ++;
480 ctxt->input->cur++;
481 }
482 } else {
483 ctxt->errNo = XML_ERR_INVALID_CHARREF;
484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
485 ctxt->sax->error(ctxt->userData,
486 "xmlParseCharRef: invalid value\n");
487 ctxt->wellFormed = 0;
488 ctxt->disableSAX = 1;
489 }
490
491 /*
492 * [ WFC: Legal Character ]
493 * Characters referred to using character references must match the
494 * production for Char.
495 */
496 if (IS_CHAR(val)) {
497 return(val);
498 } else {
499 ctxt->errNo = XML_ERR_INVALID_CHAR;
500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
501 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
502 val);
503 ctxt->wellFormed = 0;
504 ctxt->disableSAX = 1;
505 }
506 return(0);
507}
508
509/**
510 * xmlParseStringCharRef:
511 * @ctxt: an XML parser context
512 * @str: a pointer to an index in the string
513 *
514 * parse Reference declarations, variant parsing from a string rather
515 * than an an input flow.
516 *
517 * [66] CharRef ::= '&#' [0-9]+ ';' |
518 * '&#x' [0-9a-fA-F]+ ';'
519 *
520 * [ WFC: Legal Character ]
521 * Characters referred to using character references must match the
522 * production for Char.
523 *
524 * Returns the value parsed (as an int), 0 in case of error, str will be
525 * updated to the current value of the index
526 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000527static int
Owen Taylor3473f882001-02-23 17:55:21 +0000528xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
529 const xmlChar *ptr;
530 xmlChar cur;
531 int val = 0;
532
533 if ((str == NULL) || (*str == NULL)) return(0);
534 ptr = *str;
535 cur = *ptr;
536 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
537 ptr += 3;
538 cur = *ptr;
539 while (cur != ';') { /* Non input consuming loop */
540 if ((cur >= '0') && (cur <= '9'))
541 val = val * 16 + (cur - '0');
542 else if ((cur >= 'a') && (cur <= 'f'))
543 val = val * 16 + (cur - 'a') + 10;
544 else if ((cur >= 'A') && (cur <= 'F'))
545 val = val * 16 + (cur - 'A') + 10;
546 else {
547 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
549 ctxt->sax->error(ctxt->userData,
550 "xmlParseStringCharRef: invalid hexadecimal value\n");
551 ctxt->wellFormed = 0;
552 ctxt->disableSAX = 1;
553 val = 0;
554 break;
555 }
556 ptr++;
557 cur = *ptr;
558 }
559 if (cur == ';')
560 ptr++;
561 } else if ((cur == '&') && (ptr[1] == '#')){
562 ptr += 2;
563 cur = *ptr;
564 while (cur != ';') { /* Non input consuming loops */
565 if ((cur >= '0') && (cur <= '9'))
566 val = val * 10 + (cur - '0');
567 else {
568 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
570 ctxt->sax->error(ctxt->userData,
571 "xmlParseStringCharRef: invalid decimal value\n");
572 ctxt->wellFormed = 0;
573 ctxt->disableSAX = 1;
574 val = 0;
575 break;
576 }
577 ptr++;
578 cur = *ptr;
579 }
580 if (cur == ';')
581 ptr++;
582 } else {
583 ctxt->errNo = XML_ERR_INVALID_CHARREF;
584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
585 ctxt->sax->error(ctxt->userData,
586 "xmlParseCharRef: invalid value\n");
587 ctxt->wellFormed = 0;
588 ctxt->disableSAX = 1;
589 return(0);
590 }
591 *str = ptr;
592
593 /*
594 * [ WFC: Legal Character ]
595 * Characters referred to using character references must match the
596 * production for Char.
597 */
598 if (IS_CHAR(val)) {
599 return(val);
600 } else {
601 ctxt->errNo = XML_ERR_INVALID_CHAR;
602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
603 ctxt->sax->error(ctxt->userData,
604 "CharRef: invalid xmlChar value %d\n", val);
605 ctxt->wellFormed = 0;
606 ctxt->disableSAX = 1;
607 }
608 return(0);
609}
610
611/**
612 * xmlParserHandlePEReference:
613 * @ctxt: the parser context
614 *
615 * [69] PEReference ::= '%' Name ';'
616 *
617 * [ WFC: No Recursion ]
618 * A parsed entity must not contain a recursive
619 * reference to itself, either directly or indirectly.
620 *
621 * [ WFC: Entity Declared ]
622 * In a document without any DTD, a document with only an internal DTD
623 * subset which contains no parameter entity references, or a document
624 * with "standalone='yes'", ... ... The declaration of a parameter
625 * entity must precede any reference to it...
626 *
627 * [ VC: Entity Declared ]
628 * In a document with an external subset or external parameter entities
629 * with "standalone='no'", ... ... The declaration of a parameter entity
630 * must precede any reference to it...
631 *
632 * [ WFC: In DTD ]
633 * Parameter-entity references may only appear in the DTD.
634 * NOTE: misleading but this is handled.
635 *
636 * A PEReference may have been detected in the current input stream
637 * the handling is done accordingly to
638 * http://www.w3.org/TR/REC-xml#entproc
639 * i.e.
640 * - Included in literal in entity values
641 * - Included as Paraemeter Entity reference within DTDs
642 */
643void
644xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
645 xmlChar *name;
646 xmlEntityPtr entity = NULL;
647 xmlParserInputPtr input;
648
649 if (ctxt->token != 0) {
650 return;
651 }
652 if (RAW != '%') return;
653 switch(ctxt->instate) {
654 case XML_PARSER_CDATA_SECTION:
655 return;
656 case XML_PARSER_COMMENT:
657 return;
658 case XML_PARSER_START_TAG:
659 return;
660 case XML_PARSER_END_TAG:
661 return;
662 case XML_PARSER_EOF:
663 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
665 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
666 ctxt->wellFormed = 0;
667 ctxt->disableSAX = 1;
668 return;
669 case XML_PARSER_PROLOG:
670 case XML_PARSER_START:
671 case XML_PARSER_MISC:
672 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
674 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
675 ctxt->wellFormed = 0;
676 ctxt->disableSAX = 1;
677 return;
678 case XML_PARSER_ENTITY_DECL:
679 case XML_PARSER_CONTENT:
680 case XML_PARSER_ATTRIBUTE_VALUE:
681 case XML_PARSER_PI:
682 case XML_PARSER_SYSTEM_LITERAL:
683 /* we just ignore it there */
684 return;
685 case XML_PARSER_EPILOG:
686 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
688 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
689 ctxt->wellFormed = 0;
690 ctxt->disableSAX = 1;
691 return;
692 case XML_PARSER_ENTITY_VALUE:
693 /*
694 * NOTE: in the case of entity values, we don't do the
695 * substitution here since we need the literal
696 * entity value to be able to save the internal
697 * subset of the document.
698 * This will be handled by xmlStringDecodeEntities
699 */
700 return;
701 case XML_PARSER_DTD:
702 /*
703 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
704 * In the internal DTD subset, parameter-entity references
705 * can occur only where markup declarations can occur, not
706 * within markup declarations.
707 * In that case this is handled in xmlParseMarkupDecl
708 */
709 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
710 return;
711 break;
712 case XML_PARSER_IGNORE:
713 return;
714 }
715
716 NEXT;
717 name = xmlParseName(ctxt);
718 if (xmlParserDebugEntities)
719 xmlGenericError(xmlGenericErrorContext,
720 "PE Reference: %s\n", name);
721 if (name == NULL) {
722 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
724 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
725 ctxt->wellFormed = 0;
726 ctxt->disableSAX = 1;
727 } else {
728 if (RAW == ';') {
729 NEXT;
730 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
731 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
732 if (entity == NULL) {
733
734 /*
735 * [ WFC: Entity Declared ]
736 * In a document without any DTD, a document with only an
737 * internal DTD subset which contains no parameter entity
738 * references, or a document with "standalone='yes'", ...
739 * ... The declaration of a parameter entity must precede
740 * any reference to it...
741 */
742 if ((ctxt->standalone == 1) ||
743 ((ctxt->hasExternalSubset == 0) &&
744 (ctxt->hasPErefs == 0))) {
745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
746 ctxt->sax->error(ctxt->userData,
747 "PEReference: %%%s; not found\n", name);
748 ctxt->wellFormed = 0;
749 ctxt->disableSAX = 1;
750 } else {
751 /*
752 * [ VC: Entity Declared ]
753 * In a document with an external subset or external
754 * parameter entities with "standalone='no'", ...
755 * ... The declaration of a parameter entity must precede
756 * any reference to it...
757 */
758 if ((!ctxt->disableSAX) &&
759 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
760 ctxt->vctxt.error(ctxt->vctxt.userData,
761 "PEReference: %%%s; not found\n", name);
762 } else if ((!ctxt->disableSAX) &&
763 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
764 ctxt->sax->warning(ctxt->userData,
765 "PEReference: %%%s; not found\n", name);
766 ctxt->valid = 0;
767 }
768 } else {
769 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
770 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
771 /*
772 * handle the extra spaces added before and after
773 * c.f. http://www.w3.org/TR/REC-xml#as-PE
774 * this is done independantly.
775 */
776 input = xmlNewEntityInputStream(ctxt, entity);
777 xmlPushInput(ctxt, input);
778 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
779 (RAW == '<') && (NXT(1) == '?') &&
780 (NXT(2) == 'x') && (NXT(3) == 'm') &&
781 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
782 xmlParseTextDecl(ctxt);
783 }
784 if (ctxt->token == 0)
785 ctxt->token = ' ';
786 } else {
787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
788 ctxt->sax->error(ctxt->userData,
789 "xmlHandlePEReference: %s is not a parameter entity\n",
790 name);
791 ctxt->wellFormed = 0;
792 ctxt->disableSAX = 1;
793 }
794 }
795 } else {
796 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
798 ctxt->sax->error(ctxt->userData,
799 "xmlHandlePEReference: expecting ';'\n");
800 ctxt->wellFormed = 0;
801 ctxt->disableSAX = 1;
802 }
803 xmlFree(name);
804 }
805}
806
807/*
808 * Macro used to grow the current buffer.
809 */
810#define growBuffer(buffer) { \
811 buffer##_size *= 2; \
812 buffer = (xmlChar *) \
813 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
814 if (buffer == NULL) { \
815 perror("realloc failed"); \
816 return(NULL); \
817 } \
818}
819
820/**
821 * xmlStringDecodeEntities:
822 * @ctxt: the parser context
823 * @str: the input string
824 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
825 * @end: an end marker xmlChar, 0 if none
826 * @end2: an end marker xmlChar, 0 if none
827 * @end3: an end marker xmlChar, 0 if none
828 *
829 * Takes a entity string content and process to do the adequate subtitutions.
830 *
831 * [67] Reference ::= EntityRef | CharRef
832 *
833 * [69] PEReference ::= '%' Name ';'
834 *
835 * Returns A newly allocated string with the substitution done. The caller
836 * must deallocate it !
837 */
838xmlChar *
839xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
840 xmlChar end, xmlChar end2, xmlChar end3) {
841 xmlChar *buffer = NULL;
842 int buffer_size = 0;
843
844 xmlChar *current = NULL;
845 xmlEntityPtr ent;
846 int c,l;
847 int nbchars = 0;
848
849 if (str == NULL)
850 return(NULL);
851
852 if (ctxt->depth > 40) {
853 ctxt->errNo = XML_ERR_ENTITY_LOOP;
854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
855 ctxt->sax->error(ctxt->userData,
856 "Detected entity reference loop\n");
857 ctxt->wellFormed = 0;
858 ctxt->disableSAX = 1;
859 return(NULL);
860 }
861
862 /*
863 * allocate a translation buffer.
864 */
865 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
866 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
867 if (buffer == NULL) {
868 perror("xmlDecodeEntities: malloc failed");
869 return(NULL);
870 }
871
872 /*
873 * Ok loop until we reach one of the ending char or a size limit.
874 * we are operating on already parsed values.
875 */
876 c = CUR_SCHAR(str, l);
877 while ((c != 0) && (c != end) && /* non input consuming loop */
878 (c != end2) && (c != end3)) {
879
880 if (c == 0) break;
881 if ((c == '&') && (str[1] == '#')) {
882 int val = xmlParseStringCharRef(ctxt, &str);
883 if (val != 0) {
884 COPY_BUF(0,buffer,nbchars,val);
885 }
886 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
887 if (xmlParserDebugEntities)
888 xmlGenericError(xmlGenericErrorContext,
889 "String decoding Entity Reference: %.30s\n",
890 str);
891 ent = xmlParseStringEntityRef(ctxt, &str);
892 if ((ent != NULL) &&
893 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
894 if (ent->content != NULL) {
895 COPY_BUF(0,buffer,nbchars,ent->content[0]);
896 } else {
897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
898 ctxt->sax->error(ctxt->userData,
899 "internal error entity has no content\n");
900 }
901 } else if ((ent != NULL) && (ent->content != NULL)) {
902 xmlChar *rep;
903
904 ctxt->depth++;
905 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
906 0, 0, 0);
907 ctxt->depth--;
908 if (rep != NULL) {
909 current = rep;
910 while (*current != 0) { /* non input consuming loop */
911 buffer[nbchars++] = *current++;
912 if (nbchars >
913 buffer_size - XML_PARSER_BUFFER_SIZE) {
914 growBuffer(buffer);
915 }
916 }
917 xmlFree(rep);
918 }
919 } else if (ent != NULL) {
920 int i = xmlStrlen(ent->name);
921 const xmlChar *cur = ent->name;
922
923 buffer[nbchars++] = '&';
924 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
925 growBuffer(buffer);
926 }
927 for (;i > 0;i--)
928 buffer[nbchars++] = *cur++;
929 buffer[nbchars++] = ';';
930 }
931 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
932 if (xmlParserDebugEntities)
933 xmlGenericError(xmlGenericErrorContext,
934 "String decoding PE Reference: %.30s\n", str);
935 ent = xmlParseStringPEReference(ctxt, &str);
936 if (ent != NULL) {
937 xmlChar *rep;
938
939 ctxt->depth++;
940 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
941 0, 0, 0);
942 ctxt->depth--;
943 if (rep != NULL) {
944 current = rep;
945 while (*current != 0) { /* non input consuming loop */
946 buffer[nbchars++] = *current++;
947 if (nbchars >
948 buffer_size - XML_PARSER_BUFFER_SIZE) {
949 growBuffer(buffer);
950 }
951 }
952 xmlFree(rep);
953 }
954 }
955 } else {
956 COPY_BUF(l,buffer,nbchars,c);
957 str += l;
958 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
959 growBuffer(buffer);
960 }
961 }
962 c = CUR_SCHAR(str, l);
963 }
964 buffer[nbchars++] = 0;
965 return(buffer);
966}
967
968
969/************************************************************************
970 * *
971 * Commodity functions to handle xmlChars *
972 * *
973 ************************************************************************/
974
975/**
976 * xmlStrndup:
977 * @cur: the input xmlChar *
978 * @len: the len of @cur
979 *
980 * a strndup for array of xmlChar's
981 *
982 * Returns a new xmlChar * or NULL
983 */
984xmlChar *
985xmlStrndup(const xmlChar *cur, int len) {
986 xmlChar *ret;
987
988 if ((cur == NULL) || (len < 0)) return(NULL);
989 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
990 if (ret == NULL) {
991 xmlGenericError(xmlGenericErrorContext,
992 "malloc of %ld byte failed\n",
993 (len + 1) * (long)sizeof(xmlChar));
994 return(NULL);
995 }
996 memcpy(ret, cur, len * sizeof(xmlChar));
997 ret[len] = 0;
998 return(ret);
999}
1000
1001/**
1002 * xmlStrdup:
1003 * @cur: the input xmlChar *
1004 *
1005 * a strdup for array of xmlChar's. Since they are supposed to be
1006 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1007 * a termination mark of '0'.
1008 *
1009 * Returns a new xmlChar * or NULL
1010 */
1011xmlChar *
1012xmlStrdup(const xmlChar *cur) {
1013 const xmlChar *p = cur;
1014
1015 if (cur == NULL) return(NULL);
1016 while (*p != 0) p++; /* non input consuming */
1017 return(xmlStrndup(cur, p - cur));
1018}
1019
1020/**
1021 * xmlCharStrndup:
1022 * @cur: the input char *
1023 * @len: the len of @cur
1024 *
1025 * a strndup for char's to xmlChar's
1026 *
1027 * Returns a new xmlChar * or NULL
1028 */
1029
1030xmlChar *
1031xmlCharStrndup(const char *cur, int len) {
1032 int i;
1033 xmlChar *ret;
1034
1035 if ((cur == NULL) || (len < 0)) return(NULL);
1036 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1037 if (ret == NULL) {
1038 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1039 (len + 1) * (long)sizeof(xmlChar));
1040 return(NULL);
1041 }
1042 for (i = 0;i < len;i++)
1043 ret[i] = (xmlChar) cur[i];
1044 ret[len] = 0;
1045 return(ret);
1046}
1047
1048/**
1049 * xmlCharStrdup:
1050 * @cur: the input char *
1051 * @len: the len of @cur
1052 *
1053 * a strdup for char's to xmlChar's
1054 *
1055 * Returns a new xmlChar * or NULL
1056 */
1057
1058xmlChar *
1059xmlCharStrdup(const char *cur) {
1060 const char *p = cur;
1061
1062 if (cur == NULL) return(NULL);
1063 while (*p != '\0') p++; /* non input consuming */
1064 return(xmlCharStrndup(cur, p - cur));
1065}
1066
1067/**
1068 * xmlStrcmp:
1069 * @str1: the first xmlChar *
1070 * @str2: the second xmlChar *
1071 *
1072 * a strcmp for xmlChar's
1073 *
1074 * Returns the integer result of the comparison
1075 */
1076
1077int
1078xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1079 register int tmp;
1080
1081 if (str1 == str2) return(0);
1082 if (str1 == NULL) return(-1);
1083 if (str2 == NULL) return(1);
1084 do {
1085 tmp = *str1++ - *str2;
1086 if (tmp != 0) return(tmp);
1087 } while (*str2++ != 0);
1088 return 0;
1089}
1090
1091/**
1092 * xmlStrEqual:
1093 * @str1: the first xmlChar *
1094 * @str2: the second xmlChar *
1095 *
1096 * Check if both string are equal of have same content
1097 * Should be a bit more readable and faster than xmlStrEqual()
1098 *
1099 * Returns 1 if they are equal, 0 if they are different
1100 */
1101
1102int
1103xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1104 if (str1 == str2) return(1);
1105 if (str1 == NULL) return(0);
1106 if (str2 == NULL) return(0);
1107 do {
1108 if (*str1++ != *str2) return(0);
1109 } while (*str2++);
1110 return(1);
1111}
1112
1113/**
1114 * xmlStrncmp:
1115 * @str1: the first xmlChar *
1116 * @str2: the second xmlChar *
1117 * @len: the max comparison length
1118 *
1119 * a strncmp for xmlChar's
1120 *
1121 * Returns the integer result of the comparison
1122 */
1123
1124int
1125xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1126 register int tmp;
1127
1128 if (len <= 0) return(0);
1129 if (str1 == str2) return(0);
1130 if (str1 == NULL) return(-1);
1131 if (str2 == NULL) return(1);
1132 do {
1133 tmp = *str1++ - *str2;
1134 if (tmp != 0 || --len == 0) return(tmp);
1135 } while (*str2++ != 0);
1136 return 0;
1137}
1138
1139static xmlChar casemap[256] = {
1140 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1141 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1142 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1143 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1144 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1145 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1146 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1147 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1148 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1149 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1150 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1151 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1152 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1153 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1154 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1155 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1156 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1157 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1158 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1159 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1160 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1161 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1162 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1163 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1164 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1165 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1166 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1167 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1168 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1169 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1170 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1171 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1172};
1173
1174/**
1175 * xmlStrcasecmp:
1176 * @str1: the first xmlChar *
1177 * @str2: the second xmlChar *
1178 *
1179 * a strcasecmp for xmlChar's
1180 *
1181 * Returns the integer result of the comparison
1182 */
1183
1184int
1185xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1186 register int tmp;
1187
1188 if (str1 == str2) return(0);
1189 if (str1 == NULL) return(-1);
1190 if (str2 == NULL) return(1);
1191 do {
1192 tmp = casemap[*str1++] - casemap[*str2];
1193 if (tmp != 0) return(tmp);
1194 } while (*str2++ != 0);
1195 return 0;
1196}
1197
1198/**
1199 * xmlStrncasecmp:
1200 * @str1: the first xmlChar *
1201 * @str2: the second xmlChar *
1202 * @len: the max comparison length
1203 *
1204 * a strncasecmp for xmlChar's
1205 *
1206 * Returns the integer result of the comparison
1207 */
1208
1209int
1210xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1211 register int tmp;
1212
1213 if (len <= 0) return(0);
1214 if (str1 == str2) return(0);
1215 if (str1 == NULL) return(-1);
1216 if (str2 == NULL) return(1);
1217 do {
1218 tmp = casemap[*str1++] - casemap[*str2];
1219 if (tmp != 0 || --len == 0) return(tmp);
1220 } while (*str2++ != 0);
1221 return 0;
1222}
1223
1224/**
1225 * xmlStrchr:
1226 * @str: the xmlChar * array
1227 * @val: the xmlChar to search
1228 *
1229 * a strchr for xmlChar's
1230 *
1231 * Returns the xmlChar * for the first occurence or NULL.
1232 */
1233
1234const xmlChar *
1235xmlStrchr(const xmlChar *str, xmlChar val) {
1236 if (str == NULL) return(NULL);
1237 while (*str != 0) { /* non input consuming */
1238 if (*str == val) return((xmlChar *) str);
1239 str++;
1240 }
1241 return(NULL);
1242}
1243
1244/**
1245 * xmlStrstr:
1246 * @str: the xmlChar * array (haystack)
1247 * @val: the xmlChar to search (needle)
1248 *
1249 * a strstr for xmlChar's
1250 *
1251 * Returns the xmlChar * for the first occurence or NULL.
1252 */
1253
1254const xmlChar *
1255xmlStrstr(const xmlChar *str, xmlChar *val) {
1256 int n;
1257
1258 if (str == NULL) return(NULL);
1259 if (val == NULL) return(NULL);
1260 n = xmlStrlen(val);
1261
1262 if (n == 0) return(str);
1263 while (*str != 0) { /* non input consuming */
1264 if (*str == *val) {
1265 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1266 }
1267 str++;
1268 }
1269 return(NULL);
1270}
1271
1272/**
1273 * xmlStrcasestr:
1274 * @str: the xmlChar * array (haystack)
1275 * @val: the xmlChar to search (needle)
1276 *
1277 * a case-ignoring strstr for xmlChar's
1278 *
1279 * Returns the xmlChar * for the first occurence or NULL.
1280 */
1281
1282const xmlChar *
1283xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1284 int n;
1285
1286 if (str == NULL) return(NULL);
1287 if (val == NULL) return(NULL);
1288 n = xmlStrlen(val);
1289
1290 if (n == 0) return(str);
1291 while (*str != 0) { /* non input consuming */
1292 if (casemap[*str] == casemap[*val])
1293 if (!xmlStrncasecmp(str, val, n)) return(str);
1294 str++;
1295 }
1296 return(NULL);
1297}
1298
1299/**
1300 * xmlStrsub:
1301 * @str: the xmlChar * array (haystack)
1302 * @start: the index of the first char (zero based)
1303 * @len: the length of the substring
1304 *
1305 * Extract a substring of a given string
1306 *
1307 * Returns the xmlChar * for the first occurence or NULL.
1308 */
1309
1310xmlChar *
1311xmlStrsub(const xmlChar *str, int start, int len) {
1312 int i;
1313
1314 if (str == NULL) return(NULL);
1315 if (start < 0) return(NULL);
1316 if (len < 0) return(NULL);
1317
1318 for (i = 0;i < start;i++) {
1319 if (*str == 0) return(NULL);
1320 str++;
1321 }
1322 if (*str == 0) return(NULL);
1323 return(xmlStrndup(str, len));
1324}
1325
1326/**
1327 * xmlStrlen:
1328 * @str: the xmlChar * array
1329 *
1330 * length of a xmlChar's string
1331 *
1332 * Returns the number of xmlChar contained in the ARRAY.
1333 */
1334
1335int
1336xmlStrlen(const xmlChar *str) {
1337 int len = 0;
1338
1339 if (str == NULL) return(0);
1340 while (*str != 0) { /* non input consuming */
1341 str++;
1342 len++;
1343 }
1344 return(len);
1345}
1346
1347/**
1348 * xmlStrncat:
1349 * @cur: the original xmlChar * array
1350 * @add: the xmlChar * array added
1351 * @len: the length of @add
1352 *
1353 * a strncat for array of xmlChar's, it will extend cur with the len
1354 * first bytes of @add.
1355 *
1356 * Returns a new xmlChar *, the original @cur is reallocated if needed
1357 * and should not be freed
1358 */
1359
1360xmlChar *
1361xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1362 int size;
1363 xmlChar *ret;
1364
1365 if ((add == NULL) || (len == 0))
1366 return(cur);
1367 if (cur == NULL)
1368 return(xmlStrndup(add, len));
1369
1370 size = xmlStrlen(cur);
1371 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1372 if (ret == NULL) {
1373 xmlGenericError(xmlGenericErrorContext,
1374 "xmlStrncat: realloc of %ld byte failed\n",
1375 (size + len + 1) * (long)sizeof(xmlChar));
1376 return(cur);
1377 }
1378 memcpy(&ret[size], add, len * sizeof(xmlChar));
1379 ret[size + len] = 0;
1380 return(ret);
1381}
1382
1383/**
1384 * xmlStrcat:
1385 * @cur: the original xmlChar * array
1386 * @add: the xmlChar * array added
1387 *
1388 * a strcat for array of xmlChar's. Since they are supposed to be
1389 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1390 * a termination mark of '0'.
1391 *
1392 * Returns a new xmlChar * containing the concatenated string.
1393 */
1394xmlChar *
1395xmlStrcat(xmlChar *cur, const xmlChar *add) {
1396 const xmlChar *p = add;
1397
1398 if (add == NULL) return(cur);
1399 if (cur == NULL)
1400 return(xmlStrdup(add));
1401
1402 while (*p != 0) p++; /* non input consuming */
1403 return(xmlStrncat(cur, add, p - add));
1404}
1405
1406/************************************************************************
1407 * *
1408 * Commodity functions, cleanup needed ? *
1409 * *
1410 ************************************************************************/
1411
1412/**
1413 * areBlanks:
1414 * @ctxt: an XML parser context
1415 * @str: a xmlChar *
1416 * @len: the size of @str
1417 *
1418 * Is this a sequence of blank chars that one can ignore ?
1419 *
1420 * Returns 1 if ignorable 0 otherwise.
1421 */
1422
1423static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1424 int i, ret;
1425 xmlNodePtr lastChild;
1426
Daniel Veillard2f362242001-03-02 17:36:21 +00001427 if (ctxt->keepBlanks)
1428 return(0);
1429
Owen Taylor3473f882001-02-23 17:55:21 +00001430 /*
1431 * Check for xml:space value.
1432 */
1433 if (*(ctxt->space) == 1)
1434 return(0);
1435
1436 /*
1437 * Check that the string is made of blanks
1438 */
1439 for (i = 0;i < len;i++)
1440 if (!(IS_BLANK(str[i]))) return(0);
1441
1442 /*
1443 * Look if the element is mixed content in the Dtd if available
1444 */
1445 if (ctxt->myDoc != NULL) {
1446 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1447 if (ret == 0) return(1);
1448 if (ret == 1) return(0);
1449 }
1450
1451 /*
1452 * Otherwise, heuristic :-\
1453 */
Owen Taylor3473f882001-02-23 17:55:21 +00001454 if (RAW != '<') return(0);
1455 if (ctxt->node == NULL) return(0);
1456 if ((ctxt->node->children == NULL) &&
1457 (RAW == '<') && (NXT(1) == '/')) return(0);
1458
1459 lastChild = xmlGetLastChild(ctxt->node);
1460 if (lastChild == NULL) {
1461 if (ctxt->node->content != NULL) return(0);
1462 } else if (xmlNodeIsText(lastChild))
1463 return(0);
1464 else if ((ctxt->node->children != NULL) &&
1465 (xmlNodeIsText(ctxt->node->children)))
1466 return(0);
1467 return(1);
1468}
1469
1470/*
1471 * Forward definition for recusive behaviour.
1472 */
1473void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1474void xmlParseReference(xmlParserCtxtPtr ctxt);
1475
1476/************************************************************************
1477 * *
1478 * Extra stuff for namespace support *
1479 * Relates to http://www.w3.org/TR/WD-xml-names *
1480 * *
1481 ************************************************************************/
1482
1483/**
1484 * xmlSplitQName:
1485 * @ctxt: an XML parser context
1486 * @name: an XML parser context
1487 * @prefix: a xmlChar **
1488 *
1489 * parse an UTF8 encoded XML qualified name string
1490 *
1491 * [NS 5] QName ::= (Prefix ':')? LocalPart
1492 *
1493 * [NS 6] Prefix ::= NCName
1494 *
1495 * [NS 7] LocalPart ::= NCName
1496 *
1497 * Returns the local part, and prefix is updated
1498 * to get the Prefix if any.
1499 */
1500
1501xmlChar *
1502xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1503 xmlChar buf[XML_MAX_NAMELEN + 5];
1504 xmlChar *buffer = NULL;
1505 int len = 0;
1506 int max = XML_MAX_NAMELEN;
1507 xmlChar *ret = NULL;
1508 const xmlChar *cur = name;
1509 int c;
1510
1511 *prefix = NULL;
1512
1513#ifndef XML_XML_NAMESPACE
1514 /* xml: prefix is not really a namespace */
1515 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1516 (cur[2] == 'l') && (cur[3] == ':'))
1517 return(xmlStrdup(name));
1518#endif
1519
1520 /* nasty but valid */
1521 if (cur[0] == ':')
1522 return(xmlStrdup(name));
1523
1524 c = *cur++;
1525 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1526 buf[len++] = c;
1527 c = *cur++;
1528 }
1529 if (len >= max) {
1530 /*
1531 * Okay someone managed to make a huge name, so he's ready to pay
1532 * for the processing speed.
1533 */
1534 max = len * 2;
1535
1536 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1537 if (buffer == NULL) {
1538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1539 ctxt->sax->error(ctxt->userData,
1540 "xmlSplitQName: out of memory\n");
1541 return(NULL);
1542 }
1543 memcpy(buffer, buf, len);
1544 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1545 if (len + 10 > max) {
1546 max *= 2;
1547 buffer = (xmlChar *) xmlRealloc(buffer,
1548 max * sizeof(xmlChar));
1549 if (buffer == NULL) {
1550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1551 ctxt->sax->error(ctxt->userData,
1552 "xmlSplitQName: out of memory\n");
1553 return(NULL);
1554 }
1555 }
1556 buffer[len++] = c;
1557 c = *cur++;
1558 }
1559 buffer[len] = 0;
1560 }
1561
1562 if (buffer == NULL)
1563 ret = xmlStrndup(buf, len);
1564 else {
1565 ret = buffer;
1566 buffer = NULL;
1567 max = XML_MAX_NAMELEN;
1568 }
1569
1570
1571 if (c == ':') {
1572 c = *cur++;
1573 if (c == 0) return(ret);
1574 *prefix = ret;
1575 len = 0;
1576
1577 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1578 buf[len++] = c;
1579 c = *cur++;
1580 }
1581 if (len >= max) {
1582 /*
1583 * Okay someone managed to make a huge name, so he's ready to pay
1584 * for the processing speed.
1585 */
1586 max = len * 2;
1587
1588 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1589 if (buffer == NULL) {
1590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1591 ctxt->sax->error(ctxt->userData,
1592 "xmlSplitQName: out of memory\n");
1593 return(NULL);
1594 }
1595 memcpy(buffer, buf, len);
1596 while (c != 0) { /* tested bigname2.xml */
1597 if (len + 10 > max) {
1598 max *= 2;
1599 buffer = (xmlChar *) xmlRealloc(buffer,
1600 max * sizeof(xmlChar));
1601 if (buffer == NULL) {
1602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1603 ctxt->sax->error(ctxt->userData,
1604 "xmlSplitQName: out of memory\n");
1605 return(NULL);
1606 }
1607 }
1608 buffer[len++] = c;
1609 c = *cur++;
1610 }
1611 buffer[len] = 0;
1612 }
1613
1614 if (buffer == NULL)
1615 ret = xmlStrndup(buf, len);
1616 else {
1617 ret = buffer;
1618 }
1619 }
1620
1621 return(ret);
1622}
1623
1624/************************************************************************
1625 * *
1626 * The parser itself *
1627 * Relates to http://www.w3.org/TR/REC-xml *
1628 * *
1629 ************************************************************************/
1630
Daniel Veillard76d66f42001-05-16 21:05:17 +00001631static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001632/**
1633 * xmlParseName:
1634 * @ctxt: an XML parser context
1635 *
1636 * parse an XML name.
1637 *
1638 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1639 * CombiningChar | Extender
1640 *
1641 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1642 *
1643 * [6] Names ::= Name (S Name)*
1644 *
1645 * Returns the Name parsed or NULL
1646 */
1647
1648xmlChar *
1649xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001650 const xmlChar *in;
1651 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001652 int count = 0;
1653
1654 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001655
1656 /*
1657 * Accelerator for simple ASCII names
1658 */
1659 in = ctxt->input->cur;
1660 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1661 ((*in >= 0x41) && (*in <= 0x5A)) ||
1662 (*in == '_') || (*in == ':')) {
1663 in++;
1664 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1665 ((*in >= 0x41) && (*in <= 0x5A)) ||
1666 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001667 (*in == '_') || (*in == '-') ||
1668 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001669 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001670 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001671 count = in - ctxt->input->cur;
1672 ret = xmlStrndup(ctxt->input->cur, count);
1673 ctxt->input->cur = in;
1674 return(ret);
1675 }
1676 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001677 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001678}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001679
Daniel Veillard76d66f42001-05-16 21:05:17 +00001680static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001681xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1682 xmlChar buf[XML_MAX_NAMELEN + 5];
1683 int len = 0, l;
1684 int c;
1685 int count = 0;
1686
1687 /*
1688 * Handler for more complex cases
1689 */
1690 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001691 c = CUR_CHAR(l);
1692 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1693 (!IS_LETTER(c) && (c != '_') &&
1694 (c != ':'))) {
1695 return(NULL);
1696 }
1697
1698 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1699 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1700 (c == '.') || (c == '-') ||
1701 (c == '_') || (c == ':') ||
1702 (IS_COMBINING(c)) ||
1703 (IS_EXTENDER(c)))) {
1704 if (count++ > 100) {
1705 count = 0;
1706 GROW;
1707 }
1708 COPY_BUF(l,buf,len,c);
1709 NEXTL(l);
1710 c = CUR_CHAR(l);
1711 if (len >= XML_MAX_NAMELEN) {
1712 /*
1713 * Okay someone managed to make a huge name, so he's ready to pay
1714 * for the processing speed.
1715 */
1716 xmlChar *buffer;
1717 int max = len * 2;
1718
1719 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1720 if (buffer == NULL) {
1721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1722 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001723 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001724 return(NULL);
1725 }
1726 memcpy(buffer, buf, len);
1727 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1728 (c == '.') || (c == '-') ||
1729 (c == '_') || (c == ':') ||
1730 (IS_COMBINING(c)) ||
1731 (IS_EXTENDER(c))) {
1732 if (count++ > 100) {
1733 count = 0;
1734 GROW;
1735 }
1736 if (len + 10 > max) {
1737 max *= 2;
1738 buffer = (xmlChar *) xmlRealloc(buffer,
1739 max * sizeof(xmlChar));
1740 if (buffer == NULL) {
1741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1742 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001743 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001744 return(NULL);
1745 }
1746 }
1747 COPY_BUF(l,buffer,len,c);
1748 NEXTL(l);
1749 c = CUR_CHAR(l);
1750 }
1751 buffer[len] = 0;
1752 return(buffer);
1753 }
1754 }
1755 return(xmlStrndup(buf, len));
1756}
1757
1758/**
1759 * xmlParseStringName:
1760 * @ctxt: an XML parser context
1761 * @str: a pointer to the string pointer (IN/OUT)
1762 *
1763 * parse an XML name.
1764 *
1765 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1766 * CombiningChar | Extender
1767 *
1768 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1769 *
1770 * [6] Names ::= Name (S Name)*
1771 *
1772 * Returns the Name parsed or NULL. The str pointer
1773 * is updated to the current location in the string.
1774 */
1775
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001776static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001777xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1778 xmlChar buf[XML_MAX_NAMELEN + 5];
1779 const xmlChar *cur = *str;
1780 int len = 0, l;
1781 int c;
1782
1783 c = CUR_SCHAR(cur, l);
1784 if (!IS_LETTER(c) && (c != '_') &&
1785 (c != ':')) {
1786 return(NULL);
1787 }
1788
1789 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1790 (c == '.') || (c == '-') ||
1791 (c == '_') || (c == ':') ||
1792 (IS_COMBINING(c)) ||
1793 (IS_EXTENDER(c))) {
1794 COPY_BUF(l,buf,len,c);
1795 cur += l;
1796 c = CUR_SCHAR(cur, l);
1797 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1798 /*
1799 * Okay someone managed to make a huge name, so he's ready to pay
1800 * for the processing speed.
1801 */
1802 xmlChar *buffer;
1803 int max = len * 2;
1804
1805 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1806 if (buffer == NULL) {
1807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1808 ctxt->sax->error(ctxt->userData,
1809 "xmlParseStringName: out of memory\n");
1810 return(NULL);
1811 }
1812 memcpy(buffer, buf, len);
1813 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1814 (c == '.') || (c == '-') ||
1815 (c == '_') || (c == ':') ||
1816 (IS_COMBINING(c)) ||
1817 (IS_EXTENDER(c))) {
1818 if (len + 10 > max) {
1819 max *= 2;
1820 buffer = (xmlChar *) xmlRealloc(buffer,
1821 max * sizeof(xmlChar));
1822 if (buffer == NULL) {
1823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1824 ctxt->sax->error(ctxt->userData,
1825 "xmlParseStringName: out of memory\n");
1826 return(NULL);
1827 }
1828 }
1829 COPY_BUF(l,buffer,len,c);
1830 cur += l;
1831 c = CUR_SCHAR(cur, l);
1832 }
1833 buffer[len] = 0;
1834 *str = cur;
1835 return(buffer);
1836 }
1837 }
1838 *str = cur;
1839 return(xmlStrndup(buf, len));
1840}
1841
1842/**
1843 * xmlParseNmtoken:
1844 * @ctxt: an XML parser context
1845 *
1846 * parse an XML Nmtoken.
1847 *
1848 * [7] Nmtoken ::= (NameChar)+
1849 *
1850 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1851 *
1852 * Returns the Nmtoken parsed or NULL
1853 */
1854
1855xmlChar *
1856xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1857 xmlChar buf[XML_MAX_NAMELEN + 5];
1858 int len = 0, l;
1859 int c;
1860 int count = 0;
1861
1862 GROW;
1863 c = CUR_CHAR(l);
1864
1865 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1866 (c == '.') || (c == '-') ||
1867 (c == '_') || (c == ':') ||
1868 (IS_COMBINING(c)) ||
1869 (IS_EXTENDER(c))) {
1870 if (count++ > 100) {
1871 count = 0;
1872 GROW;
1873 }
1874 COPY_BUF(l,buf,len,c);
1875 NEXTL(l);
1876 c = CUR_CHAR(l);
1877 if (len >= XML_MAX_NAMELEN) {
1878 /*
1879 * Okay someone managed to make a huge token, so he's ready to pay
1880 * for the processing speed.
1881 */
1882 xmlChar *buffer;
1883 int max = len * 2;
1884
1885 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1886 if (buffer == NULL) {
1887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1888 ctxt->sax->error(ctxt->userData,
1889 "xmlParseNmtoken: out of memory\n");
1890 return(NULL);
1891 }
1892 memcpy(buffer, buf, len);
1893 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1894 (c == '.') || (c == '-') ||
1895 (c == '_') || (c == ':') ||
1896 (IS_COMBINING(c)) ||
1897 (IS_EXTENDER(c))) {
1898 if (count++ > 100) {
1899 count = 0;
1900 GROW;
1901 }
1902 if (len + 10 > max) {
1903 max *= 2;
1904 buffer = (xmlChar *) xmlRealloc(buffer,
1905 max * sizeof(xmlChar));
1906 if (buffer == NULL) {
1907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1908 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001909 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001910 return(NULL);
1911 }
1912 }
1913 COPY_BUF(l,buffer,len,c);
1914 NEXTL(l);
1915 c = CUR_CHAR(l);
1916 }
1917 buffer[len] = 0;
1918 return(buffer);
1919 }
1920 }
1921 if (len == 0)
1922 return(NULL);
1923 return(xmlStrndup(buf, len));
1924}
1925
1926/**
1927 * xmlParseEntityValue:
1928 * @ctxt: an XML parser context
1929 * @orig: if non-NULL store a copy of the original entity value
1930 *
1931 * parse a value for ENTITY declarations
1932 *
1933 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1934 * "'" ([^%&'] | PEReference | Reference)* "'"
1935 *
1936 * Returns the EntityValue parsed with reference substitued or NULL
1937 */
1938
1939xmlChar *
1940xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1941 xmlChar *buf = NULL;
1942 int len = 0;
1943 int size = XML_PARSER_BUFFER_SIZE;
1944 int c, l;
1945 xmlChar stop;
1946 xmlChar *ret = NULL;
1947 const xmlChar *cur = NULL;
1948 xmlParserInputPtr input;
1949
1950 if (RAW == '"') stop = '"';
1951 else if (RAW == '\'') stop = '\'';
1952 else {
1953 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1955 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1956 ctxt->wellFormed = 0;
1957 ctxt->disableSAX = 1;
1958 return(NULL);
1959 }
1960 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1961 if (buf == NULL) {
1962 xmlGenericError(xmlGenericErrorContext,
1963 "malloc of %d byte failed\n", size);
1964 return(NULL);
1965 }
1966
1967 /*
1968 * The content of the entity definition is copied in a buffer.
1969 */
1970
1971 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1972 input = ctxt->input;
1973 GROW;
1974 NEXT;
1975 c = CUR_CHAR(l);
1976 /*
1977 * NOTE: 4.4.5 Included in Literal
1978 * When a parameter entity reference appears in a literal entity
1979 * value, ... a single or double quote character in the replacement
1980 * text is always treated as a normal data character and will not
1981 * terminate the literal.
1982 * In practice it means we stop the loop only when back at parsing
1983 * the initial entity and the quote is found
1984 */
1985 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1986 (ctxt->input != input))) {
1987 if (len + 5 >= size) {
1988 size *= 2;
1989 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1990 if (buf == NULL) {
1991 xmlGenericError(xmlGenericErrorContext,
1992 "realloc of %d byte failed\n", size);
1993 return(NULL);
1994 }
1995 }
1996 COPY_BUF(l,buf,len,c);
1997 NEXTL(l);
1998 /*
1999 * Pop-up of finished entities.
2000 */
2001 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2002 xmlPopInput(ctxt);
2003
2004 GROW;
2005 c = CUR_CHAR(l);
2006 if (c == 0) {
2007 GROW;
2008 c = CUR_CHAR(l);
2009 }
2010 }
2011 buf[len] = 0;
2012
2013 /*
2014 * Raise problem w.r.t. '&' and '%' being used in non-entities
2015 * reference constructs. Note Charref will be handled in
2016 * xmlStringDecodeEntities()
2017 */
2018 cur = buf;
2019 while (*cur != 0) { /* non input consuming */
2020 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2021 xmlChar *name;
2022 xmlChar tmp = *cur;
2023
2024 cur++;
2025 name = xmlParseStringName(ctxt, &cur);
2026 if ((name == NULL) || (*cur != ';')) {
2027 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2029 ctxt->sax->error(ctxt->userData,
2030 "EntityValue: '%c' forbidden except for entities references\n",
2031 tmp);
2032 ctxt->wellFormed = 0;
2033 ctxt->disableSAX = 1;
2034 }
2035 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2036 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2038 ctxt->sax->error(ctxt->userData,
2039 "EntityValue: PEReferences forbidden in internal subset\n",
2040 tmp);
2041 ctxt->wellFormed = 0;
2042 ctxt->disableSAX = 1;
2043 }
2044 if (name != NULL)
2045 xmlFree(name);
2046 }
2047 cur++;
2048 }
2049
2050 /*
2051 * Then PEReference entities are substituted.
2052 */
2053 if (c != stop) {
2054 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2056 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2057 ctxt->wellFormed = 0;
2058 ctxt->disableSAX = 1;
2059 xmlFree(buf);
2060 } else {
2061 NEXT;
2062 /*
2063 * NOTE: 4.4.7 Bypassed
2064 * When a general entity reference appears in the EntityValue in
2065 * an entity declaration, it is bypassed and left as is.
2066 * so XML_SUBSTITUTE_REF is not set here.
2067 */
2068 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2069 0, 0, 0);
2070 if (orig != NULL)
2071 *orig = buf;
2072 else
2073 xmlFree(buf);
2074 }
2075
2076 return(ret);
2077}
2078
2079/**
2080 * xmlParseAttValue:
2081 * @ctxt: an XML parser context
2082 *
2083 * parse a value for an attribute
2084 * Note: the parser won't do substitution of entities here, this
2085 * will be handled later in xmlStringGetNodeList
2086 *
2087 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2088 * "'" ([^<&'] | Reference)* "'"
2089 *
2090 * 3.3.3 Attribute-Value Normalization:
2091 * Before the value of an attribute is passed to the application or
2092 * checked for validity, the XML processor must normalize it as follows:
2093 * - a character reference is processed by appending the referenced
2094 * character to the attribute value
2095 * - an entity reference is processed by recursively processing the
2096 * replacement text of the entity
2097 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2098 * appending #x20 to the normalized value, except that only a single
2099 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2100 * parsed entity or the literal entity value of an internal parsed entity
2101 * - other characters are processed by appending them to the normalized value
2102 * If the declared value is not CDATA, then the XML processor must further
2103 * process the normalized attribute value by discarding any leading and
2104 * trailing space (#x20) characters, and by replacing sequences of space
2105 * (#x20) characters by a single space (#x20) character.
2106 * All attributes for which no declaration has been read should be treated
2107 * by a non-validating parser as if declared CDATA.
2108 *
2109 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2110 */
2111
2112xmlChar *
2113xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2114 xmlChar limit = 0;
2115 xmlChar *buf = NULL;
2116 int len = 0;
2117 int buf_size = 0;
2118 int c, l;
2119 xmlChar *current = NULL;
2120 xmlEntityPtr ent;
2121
2122
2123 SHRINK;
2124 if (NXT(0) == '"') {
2125 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2126 limit = '"';
2127 NEXT;
2128 } else if (NXT(0) == '\'') {
2129 limit = '\'';
2130 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2131 NEXT;
2132 } else {
2133 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2135 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2136 ctxt->wellFormed = 0;
2137 ctxt->disableSAX = 1;
2138 return(NULL);
2139 }
2140
2141 /*
2142 * allocate a translation buffer.
2143 */
2144 buf_size = XML_PARSER_BUFFER_SIZE;
2145 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2146 if (buf == NULL) {
2147 perror("xmlParseAttValue: malloc failed");
2148 return(NULL);
2149 }
2150
2151 /*
2152 * Ok loop until we reach one of the ending char or a size limit.
2153 */
2154 c = CUR_CHAR(l);
2155 while (((NXT(0) != limit) && /* checked */
2156 (c != '<')) || (ctxt->token != 0)) {
2157 if (c == 0) break;
2158 if (ctxt->token == '&') {
2159 /*
2160 * The reparsing will be done in xmlStringGetNodeList()
2161 * called by the attribute() function in SAX.c
2162 */
2163 static xmlChar buffer[6] = "&#38;";
2164
2165 if (len > buf_size - 10) {
2166 growBuffer(buf);
2167 }
2168 current = &buffer[0];
2169 while (*current != 0) { /* non input consuming */
2170 buf[len++] = *current++;
2171 }
2172 ctxt->token = 0;
2173 } else if (c == '&') {
2174 if (NXT(1) == '#') {
2175 int val = xmlParseCharRef(ctxt);
2176 if (val == '&') {
2177 /*
2178 * The reparsing will be done in xmlStringGetNodeList()
2179 * called by the attribute() function in SAX.c
2180 */
2181 static xmlChar buffer[6] = "&#38;";
2182
2183 if (len > buf_size - 10) {
2184 growBuffer(buf);
2185 }
2186 current = &buffer[0];
2187 while (*current != 0) { /* non input consuming */
2188 buf[len++] = *current++;
2189 }
2190 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002191 if (len > buf_size - 10) {
2192 growBuffer(buf);
2193 }
Owen Taylor3473f882001-02-23 17:55:21 +00002194 len += xmlCopyChar(0, &buf[len], val);
2195 }
2196 } else {
2197 ent = xmlParseEntityRef(ctxt);
2198 if ((ent != NULL) &&
2199 (ctxt->replaceEntities != 0)) {
2200 xmlChar *rep;
2201
2202 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2203 rep = xmlStringDecodeEntities(ctxt, ent->content,
2204 XML_SUBSTITUTE_REF, 0, 0, 0);
2205 if (rep != NULL) {
2206 current = rep;
2207 while (*current != 0) { /* non input consuming */
2208 buf[len++] = *current++;
2209 if (len > buf_size - 10) {
2210 growBuffer(buf);
2211 }
2212 }
2213 xmlFree(rep);
2214 }
2215 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002216 if (len > buf_size - 10) {
2217 growBuffer(buf);
2218 }
Owen Taylor3473f882001-02-23 17:55:21 +00002219 if (ent->content != NULL)
2220 buf[len++] = ent->content[0];
2221 }
2222 } else if (ent != NULL) {
2223 int i = xmlStrlen(ent->name);
2224 const xmlChar *cur = ent->name;
2225
2226 /*
2227 * This may look absurd but is needed to detect
2228 * entities problems
2229 */
2230 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2231 (ent->content != NULL)) {
2232 xmlChar *rep;
2233 rep = xmlStringDecodeEntities(ctxt, ent->content,
2234 XML_SUBSTITUTE_REF, 0, 0, 0);
2235 if (rep != NULL)
2236 xmlFree(rep);
2237 }
2238
2239 /*
2240 * Just output the reference
2241 */
2242 buf[len++] = '&';
2243 if (len > buf_size - i - 10) {
2244 growBuffer(buf);
2245 }
2246 for (;i > 0;i--)
2247 buf[len++] = *cur++;
2248 buf[len++] = ';';
2249 }
2250 }
2251 } else {
2252 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2253 COPY_BUF(l,buf,len,0x20);
2254 if (len > buf_size - 10) {
2255 growBuffer(buf);
2256 }
2257 } else {
2258 COPY_BUF(l,buf,len,c);
2259 if (len > buf_size - 10) {
2260 growBuffer(buf);
2261 }
2262 }
2263 NEXTL(l);
2264 }
2265 GROW;
2266 c = CUR_CHAR(l);
2267 }
2268 buf[len++] = 0;
2269 if (RAW == '<') {
2270 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2272 ctxt->sax->error(ctxt->userData,
2273 "Unescaped '<' not allowed in attributes values\n");
2274 ctxt->wellFormed = 0;
2275 ctxt->disableSAX = 1;
2276 } else if (RAW != limit) {
2277 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2279 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2280 ctxt->wellFormed = 0;
2281 ctxt->disableSAX = 1;
2282 } else
2283 NEXT;
2284 return(buf);
2285}
2286
2287/**
2288 * xmlParseSystemLiteral:
2289 * @ctxt: an XML parser context
2290 *
2291 * parse an XML Literal
2292 *
2293 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2294 *
2295 * Returns the SystemLiteral parsed or NULL
2296 */
2297
2298xmlChar *
2299xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2300 xmlChar *buf = NULL;
2301 int len = 0;
2302 int size = XML_PARSER_BUFFER_SIZE;
2303 int cur, l;
2304 xmlChar stop;
2305 int state = ctxt->instate;
2306 int count = 0;
2307
2308 SHRINK;
2309 if (RAW == '"') {
2310 NEXT;
2311 stop = '"';
2312 } else if (RAW == '\'') {
2313 NEXT;
2314 stop = '\'';
2315 } else {
2316 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2318 ctxt->sax->error(ctxt->userData,
2319 "SystemLiteral \" or ' expected\n");
2320 ctxt->wellFormed = 0;
2321 ctxt->disableSAX = 1;
2322 return(NULL);
2323 }
2324
2325 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2326 if (buf == NULL) {
2327 xmlGenericError(xmlGenericErrorContext,
2328 "malloc of %d byte failed\n", size);
2329 return(NULL);
2330 }
2331 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2332 cur = CUR_CHAR(l);
2333 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2334 if (len + 5 >= size) {
2335 size *= 2;
2336 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2337 if (buf == NULL) {
2338 xmlGenericError(xmlGenericErrorContext,
2339 "realloc of %d byte failed\n", size);
2340 ctxt->instate = (xmlParserInputState) state;
2341 return(NULL);
2342 }
2343 }
2344 count++;
2345 if (count > 50) {
2346 GROW;
2347 count = 0;
2348 }
2349 COPY_BUF(l,buf,len,cur);
2350 NEXTL(l);
2351 cur = CUR_CHAR(l);
2352 if (cur == 0) {
2353 GROW;
2354 SHRINK;
2355 cur = CUR_CHAR(l);
2356 }
2357 }
2358 buf[len] = 0;
2359 ctxt->instate = (xmlParserInputState) state;
2360 if (!IS_CHAR(cur)) {
2361 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2363 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2364 ctxt->wellFormed = 0;
2365 ctxt->disableSAX = 1;
2366 } else {
2367 NEXT;
2368 }
2369 return(buf);
2370}
2371
2372/**
2373 * xmlParsePubidLiteral:
2374 * @ctxt: an XML parser context
2375 *
2376 * parse an XML public literal
2377 *
2378 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2379 *
2380 * Returns the PubidLiteral parsed or NULL.
2381 */
2382
2383xmlChar *
2384xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2385 xmlChar *buf = NULL;
2386 int len = 0;
2387 int size = XML_PARSER_BUFFER_SIZE;
2388 xmlChar cur;
2389 xmlChar stop;
2390 int count = 0;
2391
2392 SHRINK;
2393 if (RAW == '"') {
2394 NEXT;
2395 stop = '"';
2396 } else if (RAW == '\'') {
2397 NEXT;
2398 stop = '\'';
2399 } else {
2400 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2402 ctxt->sax->error(ctxt->userData,
2403 "SystemLiteral \" or ' expected\n");
2404 ctxt->wellFormed = 0;
2405 ctxt->disableSAX = 1;
2406 return(NULL);
2407 }
2408 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2409 if (buf == NULL) {
2410 xmlGenericError(xmlGenericErrorContext,
2411 "malloc of %d byte failed\n", size);
2412 return(NULL);
2413 }
2414 cur = CUR;
2415 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2416 if (len + 1 >= size) {
2417 size *= 2;
2418 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2419 if (buf == NULL) {
2420 xmlGenericError(xmlGenericErrorContext,
2421 "realloc of %d byte failed\n", size);
2422 return(NULL);
2423 }
2424 }
2425 buf[len++] = cur;
2426 count++;
2427 if (count > 50) {
2428 GROW;
2429 count = 0;
2430 }
2431 NEXT;
2432 cur = CUR;
2433 if (cur == 0) {
2434 GROW;
2435 SHRINK;
2436 cur = CUR;
2437 }
2438 }
2439 buf[len] = 0;
2440 if (cur != stop) {
2441 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2443 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2444 ctxt->wellFormed = 0;
2445 ctxt->disableSAX = 1;
2446 } else {
2447 NEXT;
2448 }
2449 return(buf);
2450}
2451
Daniel Veillard48b2f892001-02-25 16:11:03 +00002452void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002453/**
2454 * xmlParseCharData:
2455 * @ctxt: an XML parser context
2456 * @cdata: int indicating whether we are within a CDATA section
2457 *
2458 * parse a CharData section.
2459 * if we are within a CDATA section ']]>' marks an end of section.
2460 *
2461 * The right angle bracket (>) may be represented using the string "&gt;",
2462 * and must, for compatibility, be escaped using "&gt;" or a character
2463 * reference when it appears in the string "]]>" in content, when that
2464 * string is not marking the end of a CDATA section.
2465 *
2466 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2467 */
2468
2469void
2470xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002471 const xmlChar *in;
2472 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002473 int line = ctxt->input->line;
2474 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002475
2476 SHRINK;
2477 GROW;
2478 /*
2479 * Accelerated common case where input don't need to be
2480 * modified before passing it to the handler.
2481 */
2482 if ((ctxt->token == 0) && (!cdata)) {
2483 in = ctxt->input->cur;
2484 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002485get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002486 while (((*in >= 0x20) && (*in != '<') &&
2487 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2488 in++;
2489 if (*in == 0xA) {
2490 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002491 in++;
2492 while (*in == 0xA) {
2493 ctxt->input->line++;
2494 in++;
2495 }
2496 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002497 }
2498 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002499 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002500 if (IS_BLANK(*ctxt->input->cur)) {
2501 const xmlChar *tmp = ctxt->input->cur;
2502 ctxt->input->cur = in;
2503 if (areBlanks(ctxt, tmp, nbchar)) {
2504 if (ctxt->sax->ignorableWhitespace != NULL)
2505 ctxt->sax->ignorableWhitespace(ctxt->userData,
2506 tmp, nbchar);
2507 } else {
2508 if (ctxt->sax->characters != NULL)
2509 ctxt->sax->characters(ctxt->userData,
2510 tmp, nbchar);
2511 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002512 line = ctxt->input->line;
2513 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002514 } else {
2515 if (ctxt->sax->characters != NULL)
2516 ctxt->sax->characters(ctxt->userData,
2517 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002518 line = ctxt->input->line;
2519 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002520 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002521 }
2522 ctxt->input->cur = in;
2523 if (*in == 0xD) {
2524 in++;
2525 if (*in == 0xA) {
2526 ctxt->input->cur = in;
2527 in++;
2528 ctxt->input->line++;
2529 continue; /* while */
2530 }
2531 in--;
2532 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002533 if (*in == '<') {
2534 return;
2535 }
2536 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002537 return;
2538 }
2539 SHRINK;
2540 GROW;
2541 in = ctxt->input->cur;
2542 } while ((*in >= 0x20) && (*in <= 0x7F));
2543 nbchar = 0;
2544 }
Daniel Veillard50582112001-03-26 22:52:16 +00002545 ctxt->input->line = line;
2546 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002547 xmlParseCharDataComplex(ctxt, cdata);
2548}
2549
2550void
2551xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002552 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2553 int nbchar = 0;
2554 int cur, l;
2555 int count = 0;
2556
2557 SHRINK;
2558 GROW;
2559 cur = CUR_CHAR(l);
2560 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2561 ((cur != '&') || (ctxt->token == '&')) &&
2562 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2563 if ((cur == ']') && (NXT(1) == ']') &&
2564 (NXT(2) == '>')) {
2565 if (cdata) break;
2566 else {
2567 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2569 ctxt->sax->error(ctxt->userData,
2570 "Sequence ']]>' not allowed in content\n");
2571 /* Should this be relaxed ??? I see a "must here */
2572 ctxt->wellFormed = 0;
2573 ctxt->disableSAX = 1;
2574 }
2575 }
2576 COPY_BUF(l,buf,nbchar,cur);
2577 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2578 /*
2579 * Ok the segment is to be consumed as chars.
2580 */
2581 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2582 if (areBlanks(ctxt, buf, nbchar)) {
2583 if (ctxt->sax->ignorableWhitespace != NULL)
2584 ctxt->sax->ignorableWhitespace(ctxt->userData,
2585 buf, nbchar);
2586 } else {
2587 if (ctxt->sax->characters != NULL)
2588 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2589 }
2590 }
2591 nbchar = 0;
2592 }
2593 count++;
2594 if (count > 50) {
2595 GROW;
2596 count = 0;
2597 }
2598 NEXTL(l);
2599 cur = CUR_CHAR(l);
2600 }
2601 if (nbchar != 0) {
2602 /*
2603 * Ok the segment is to be consumed as chars.
2604 */
2605 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2606 if (areBlanks(ctxt, buf, nbchar)) {
2607 if (ctxt->sax->ignorableWhitespace != NULL)
2608 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2609 } else {
2610 if (ctxt->sax->characters != NULL)
2611 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2612 }
2613 }
2614 }
2615}
2616
2617/**
2618 * xmlParseExternalID:
2619 * @ctxt: an XML parser context
2620 * @publicID: a xmlChar** receiving PubidLiteral
2621 * @strict: indicate whether we should restrict parsing to only
2622 * production [75], see NOTE below
2623 *
2624 * Parse an External ID or a Public ID
2625 *
2626 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2627 * 'PUBLIC' S PubidLiteral S SystemLiteral
2628 *
2629 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2630 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2631 *
2632 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2633 *
2634 * Returns the function returns SystemLiteral and in the second
2635 * case publicID receives PubidLiteral, is strict is off
2636 * it is possible to return NULL and have publicID set.
2637 */
2638
2639xmlChar *
2640xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2641 xmlChar *URI = NULL;
2642
2643 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002644
2645 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002646 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2647 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2648 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2649 SKIP(6);
2650 if (!IS_BLANK(CUR)) {
2651 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2653 ctxt->sax->error(ctxt->userData,
2654 "Space required after 'SYSTEM'\n");
2655 ctxt->wellFormed = 0;
2656 ctxt->disableSAX = 1;
2657 }
2658 SKIP_BLANKS;
2659 URI = xmlParseSystemLiteral(ctxt);
2660 if (URI == NULL) {
2661 ctxt->errNo = XML_ERR_URI_REQUIRED;
2662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2663 ctxt->sax->error(ctxt->userData,
2664 "xmlParseExternalID: SYSTEM, no URI\n");
2665 ctxt->wellFormed = 0;
2666 ctxt->disableSAX = 1;
2667 }
2668 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2669 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2670 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2671 SKIP(6);
2672 if (!IS_BLANK(CUR)) {
2673 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2675 ctxt->sax->error(ctxt->userData,
2676 "Space required after 'PUBLIC'\n");
2677 ctxt->wellFormed = 0;
2678 ctxt->disableSAX = 1;
2679 }
2680 SKIP_BLANKS;
2681 *publicID = xmlParsePubidLiteral(ctxt);
2682 if (*publicID == NULL) {
2683 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2684 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2685 ctxt->sax->error(ctxt->userData,
2686 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2687 ctxt->wellFormed = 0;
2688 ctxt->disableSAX = 1;
2689 }
2690 if (strict) {
2691 /*
2692 * We don't handle [83] so "S SystemLiteral" is required.
2693 */
2694 if (!IS_BLANK(CUR)) {
2695 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2697 ctxt->sax->error(ctxt->userData,
2698 "Space required after the Public Identifier\n");
2699 ctxt->wellFormed = 0;
2700 ctxt->disableSAX = 1;
2701 }
2702 } else {
2703 /*
2704 * We handle [83] so we return immediately, if
2705 * "S SystemLiteral" is not detected. From a purely parsing
2706 * point of view that's a nice mess.
2707 */
2708 const xmlChar *ptr;
2709 GROW;
2710
2711 ptr = CUR_PTR;
2712 if (!IS_BLANK(*ptr)) return(NULL);
2713
2714 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2715 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2716 }
2717 SKIP_BLANKS;
2718 URI = xmlParseSystemLiteral(ctxt);
2719 if (URI == NULL) {
2720 ctxt->errNo = XML_ERR_URI_REQUIRED;
2721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2722 ctxt->sax->error(ctxt->userData,
2723 "xmlParseExternalID: PUBLIC, no URI\n");
2724 ctxt->wellFormed = 0;
2725 ctxt->disableSAX = 1;
2726 }
2727 }
2728 return(URI);
2729}
2730
2731/**
2732 * xmlParseComment:
2733 * @ctxt: an XML parser context
2734 *
2735 * Skip an XML (SGML) comment <!-- .... -->
2736 * The spec says that "For compatibility, the string "--" (double-hyphen)
2737 * must not occur within comments. "
2738 *
2739 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2740 */
2741void
2742xmlParseComment(xmlParserCtxtPtr ctxt) {
2743 xmlChar *buf = NULL;
2744 int len;
2745 int size = XML_PARSER_BUFFER_SIZE;
2746 int q, ql;
2747 int r, rl;
2748 int cur, l;
2749 xmlParserInputState state;
2750 xmlParserInputPtr input = ctxt->input;
2751 int count = 0;
2752
2753 /*
2754 * Check that there is a comment right here.
2755 */
2756 if ((RAW != '<') || (NXT(1) != '!') ||
2757 (NXT(2) != '-') || (NXT(3) != '-')) return;
2758
2759 state = ctxt->instate;
2760 ctxt->instate = XML_PARSER_COMMENT;
2761 SHRINK;
2762 SKIP(4);
2763 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2764 if (buf == NULL) {
2765 xmlGenericError(xmlGenericErrorContext,
2766 "malloc of %d byte failed\n", size);
2767 ctxt->instate = state;
2768 return;
2769 }
2770 q = CUR_CHAR(ql);
2771 NEXTL(ql);
2772 r = CUR_CHAR(rl);
2773 NEXTL(rl);
2774 cur = CUR_CHAR(l);
2775 len = 0;
2776 while (IS_CHAR(cur) && /* checked */
2777 ((cur != '>') ||
2778 (r != '-') || (q != '-'))) {
2779 if ((r == '-') && (q == '-') && (len > 1)) {
2780 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2782 ctxt->sax->error(ctxt->userData,
2783 "Comment must not contain '--' (double-hyphen)`\n");
2784 ctxt->wellFormed = 0;
2785 ctxt->disableSAX = 1;
2786 }
2787 if (len + 5 >= size) {
2788 size *= 2;
2789 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2790 if (buf == NULL) {
2791 xmlGenericError(xmlGenericErrorContext,
2792 "realloc of %d byte failed\n", size);
2793 ctxt->instate = state;
2794 return;
2795 }
2796 }
2797 COPY_BUF(ql,buf,len,q);
2798 q = r;
2799 ql = rl;
2800 r = cur;
2801 rl = l;
2802
2803 count++;
2804 if (count > 50) {
2805 GROW;
2806 count = 0;
2807 }
2808 NEXTL(l);
2809 cur = CUR_CHAR(l);
2810 if (cur == 0) {
2811 SHRINK;
2812 GROW;
2813 cur = CUR_CHAR(l);
2814 }
2815 }
2816 buf[len] = 0;
2817 if (!IS_CHAR(cur)) {
2818 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2820 ctxt->sax->error(ctxt->userData,
2821 "Comment not terminated \n<!--%.50s\n", buf);
2822 ctxt->wellFormed = 0;
2823 ctxt->disableSAX = 1;
2824 xmlFree(buf);
2825 } else {
2826 if (input != ctxt->input) {
2827 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2829 ctxt->sax->error(ctxt->userData,
2830"Comment doesn't start and stop in the same entity\n");
2831 ctxt->wellFormed = 0;
2832 ctxt->disableSAX = 1;
2833 }
2834 NEXT;
2835 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2836 (!ctxt->disableSAX))
2837 ctxt->sax->comment(ctxt->userData, buf);
2838 xmlFree(buf);
2839 }
2840 ctxt->instate = state;
2841}
2842
2843/**
2844 * xmlParsePITarget:
2845 * @ctxt: an XML parser context
2846 *
2847 * parse the name of a PI
2848 *
2849 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2850 *
2851 * Returns the PITarget name or NULL
2852 */
2853
2854xmlChar *
2855xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2856 xmlChar *name;
2857
2858 name = xmlParseName(ctxt);
2859 if ((name != NULL) &&
2860 ((name[0] == 'x') || (name[0] == 'X')) &&
2861 ((name[1] == 'm') || (name[1] == 'M')) &&
2862 ((name[2] == 'l') || (name[2] == 'L'))) {
2863 int i;
2864 if ((name[0] == 'x') && (name[1] == 'm') &&
2865 (name[2] == 'l') && (name[3] == 0)) {
2866 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2868 ctxt->sax->error(ctxt->userData,
2869 "XML declaration allowed only at the start of the document\n");
2870 ctxt->wellFormed = 0;
2871 ctxt->disableSAX = 1;
2872 return(name);
2873 } else if (name[3] == 0) {
2874 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2876 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2877 ctxt->wellFormed = 0;
2878 ctxt->disableSAX = 1;
2879 return(name);
2880 }
2881 for (i = 0;;i++) {
2882 if (xmlW3CPIs[i] == NULL) break;
2883 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2884 return(name);
2885 }
2886 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2887 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2888 ctxt->sax->warning(ctxt->userData,
2889 "xmlParsePItarget: invalid name prefix 'xml'\n");
2890 }
2891 }
2892 return(name);
2893}
2894
2895/**
2896 * xmlParsePI:
2897 * @ctxt: an XML parser context
2898 *
2899 * parse an XML Processing Instruction.
2900 *
2901 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2902 *
2903 * The processing is transfered to SAX once parsed.
2904 */
2905
2906void
2907xmlParsePI(xmlParserCtxtPtr ctxt) {
2908 xmlChar *buf = NULL;
2909 int len = 0;
2910 int size = XML_PARSER_BUFFER_SIZE;
2911 int cur, l;
2912 xmlChar *target;
2913 xmlParserInputState state;
2914 int count = 0;
2915
2916 if ((RAW == '<') && (NXT(1) == '?')) {
2917 xmlParserInputPtr input = ctxt->input;
2918 state = ctxt->instate;
2919 ctxt->instate = XML_PARSER_PI;
2920 /*
2921 * this is a Processing Instruction.
2922 */
2923 SKIP(2);
2924 SHRINK;
2925
2926 /*
2927 * Parse the target name and check for special support like
2928 * namespace.
2929 */
2930 target = xmlParsePITarget(ctxt);
2931 if (target != NULL) {
2932 if ((RAW == '?') && (NXT(1) == '>')) {
2933 if (input != ctxt->input) {
2934 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2936 ctxt->sax->error(ctxt->userData,
2937 "PI declaration doesn't start and stop in the same entity\n");
2938 ctxt->wellFormed = 0;
2939 ctxt->disableSAX = 1;
2940 }
2941 SKIP(2);
2942
2943 /*
2944 * SAX: PI detected.
2945 */
2946 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2947 (ctxt->sax->processingInstruction != NULL))
2948 ctxt->sax->processingInstruction(ctxt->userData,
2949 target, NULL);
2950 ctxt->instate = state;
2951 xmlFree(target);
2952 return;
2953 }
2954 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2955 if (buf == NULL) {
2956 xmlGenericError(xmlGenericErrorContext,
2957 "malloc of %d byte failed\n", size);
2958 ctxt->instate = state;
2959 return;
2960 }
2961 cur = CUR;
2962 if (!IS_BLANK(cur)) {
2963 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2965 ctxt->sax->error(ctxt->userData,
2966 "xmlParsePI: PI %s space expected\n", target);
2967 ctxt->wellFormed = 0;
2968 ctxt->disableSAX = 1;
2969 }
2970 SKIP_BLANKS;
2971 cur = CUR_CHAR(l);
2972 while (IS_CHAR(cur) && /* checked */
2973 ((cur != '?') || (NXT(1) != '>'))) {
2974 if (len + 5 >= size) {
2975 size *= 2;
2976 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2977 if (buf == NULL) {
2978 xmlGenericError(xmlGenericErrorContext,
2979 "realloc of %d byte failed\n", size);
2980 ctxt->instate = state;
2981 return;
2982 }
2983 }
2984 count++;
2985 if (count > 50) {
2986 GROW;
2987 count = 0;
2988 }
2989 COPY_BUF(l,buf,len,cur);
2990 NEXTL(l);
2991 cur = CUR_CHAR(l);
2992 if (cur == 0) {
2993 SHRINK;
2994 GROW;
2995 cur = CUR_CHAR(l);
2996 }
2997 }
2998 buf[len] = 0;
2999 if (cur != '?') {
3000 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3002 ctxt->sax->error(ctxt->userData,
3003 "xmlParsePI: PI %s never end ...\n", target);
3004 ctxt->wellFormed = 0;
3005 ctxt->disableSAX = 1;
3006 } else {
3007 if (input != ctxt->input) {
3008 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3010 ctxt->sax->error(ctxt->userData,
3011 "PI declaration doesn't start and stop in the same entity\n");
3012 ctxt->wellFormed = 0;
3013 ctxt->disableSAX = 1;
3014 }
3015 SKIP(2);
3016
3017 /*
3018 * SAX: PI detected.
3019 */
3020 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3021 (ctxt->sax->processingInstruction != NULL))
3022 ctxt->sax->processingInstruction(ctxt->userData,
3023 target, buf);
3024 }
3025 xmlFree(buf);
3026 xmlFree(target);
3027 } else {
3028 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3030 ctxt->sax->error(ctxt->userData,
3031 "xmlParsePI : no target name\n");
3032 ctxt->wellFormed = 0;
3033 ctxt->disableSAX = 1;
3034 }
3035 ctxt->instate = state;
3036 }
3037}
3038
3039/**
3040 * xmlParseNotationDecl:
3041 * @ctxt: an XML parser context
3042 *
3043 * parse a notation declaration
3044 *
3045 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3046 *
3047 * Hence there is actually 3 choices:
3048 * 'PUBLIC' S PubidLiteral
3049 * 'PUBLIC' S PubidLiteral S SystemLiteral
3050 * and 'SYSTEM' S SystemLiteral
3051 *
3052 * See the NOTE on xmlParseExternalID().
3053 */
3054
3055void
3056xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3057 xmlChar *name;
3058 xmlChar *Pubid;
3059 xmlChar *Systemid;
3060
3061 if ((RAW == '<') && (NXT(1) == '!') &&
3062 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3063 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3064 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3065 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3066 xmlParserInputPtr input = ctxt->input;
3067 SHRINK;
3068 SKIP(10);
3069 if (!IS_BLANK(CUR)) {
3070 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3072 ctxt->sax->error(ctxt->userData,
3073 "Space required after '<!NOTATION'\n");
3074 ctxt->wellFormed = 0;
3075 ctxt->disableSAX = 1;
3076 return;
3077 }
3078 SKIP_BLANKS;
3079
Daniel Veillard76d66f42001-05-16 21:05:17 +00003080 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003081 if (name == NULL) {
3082 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3084 ctxt->sax->error(ctxt->userData,
3085 "NOTATION: Name expected here\n");
3086 ctxt->wellFormed = 0;
3087 ctxt->disableSAX = 1;
3088 return;
3089 }
3090 if (!IS_BLANK(CUR)) {
3091 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3093 ctxt->sax->error(ctxt->userData,
3094 "Space required after the NOTATION name'\n");
3095 ctxt->wellFormed = 0;
3096 ctxt->disableSAX = 1;
3097 return;
3098 }
3099 SKIP_BLANKS;
3100
3101 /*
3102 * Parse the IDs.
3103 */
3104 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3105 SKIP_BLANKS;
3106
3107 if (RAW == '>') {
3108 if (input != ctxt->input) {
3109 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3111 ctxt->sax->error(ctxt->userData,
3112"Notation declaration doesn't start and stop in the same entity\n");
3113 ctxt->wellFormed = 0;
3114 ctxt->disableSAX = 1;
3115 }
3116 NEXT;
3117 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3118 (ctxt->sax->notationDecl != NULL))
3119 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3120 } else {
3121 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3123 ctxt->sax->error(ctxt->userData,
3124 "'>' required to close NOTATION declaration\n");
3125 ctxt->wellFormed = 0;
3126 ctxt->disableSAX = 1;
3127 }
3128 xmlFree(name);
3129 if (Systemid != NULL) xmlFree(Systemid);
3130 if (Pubid != NULL) xmlFree(Pubid);
3131 }
3132}
3133
3134/**
3135 * xmlParseEntityDecl:
3136 * @ctxt: an XML parser context
3137 *
3138 * parse <!ENTITY declarations
3139 *
3140 * [70] EntityDecl ::= GEDecl | PEDecl
3141 *
3142 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3143 *
3144 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3145 *
3146 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3147 *
3148 * [74] PEDef ::= EntityValue | ExternalID
3149 *
3150 * [76] NDataDecl ::= S 'NDATA' S Name
3151 *
3152 * [ VC: Notation Declared ]
3153 * The Name must match the declared name of a notation.
3154 */
3155
3156void
3157xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3158 xmlChar *name = NULL;
3159 xmlChar *value = NULL;
3160 xmlChar *URI = NULL, *literal = NULL;
3161 xmlChar *ndata = NULL;
3162 int isParameter = 0;
3163 xmlChar *orig = NULL;
3164
3165 GROW;
3166 if ((RAW == '<') && (NXT(1) == '!') &&
3167 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3168 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3169 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3170 xmlParserInputPtr input = ctxt->input;
3171 ctxt->instate = XML_PARSER_ENTITY_DECL;
3172 SHRINK;
3173 SKIP(8);
3174 if (!IS_BLANK(CUR)) {
3175 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3177 ctxt->sax->error(ctxt->userData,
3178 "Space required after '<!ENTITY'\n");
3179 ctxt->wellFormed = 0;
3180 ctxt->disableSAX = 1;
3181 }
3182 SKIP_BLANKS;
3183
3184 if (RAW == '%') {
3185 NEXT;
3186 if (!IS_BLANK(CUR)) {
3187 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3189 ctxt->sax->error(ctxt->userData,
3190 "Space required after '%'\n");
3191 ctxt->wellFormed = 0;
3192 ctxt->disableSAX = 1;
3193 }
3194 SKIP_BLANKS;
3195 isParameter = 1;
3196 }
3197
Daniel Veillard76d66f42001-05-16 21:05:17 +00003198 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003199 if (name == NULL) {
3200 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3202 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3203 ctxt->wellFormed = 0;
3204 ctxt->disableSAX = 1;
3205 return;
3206 }
3207 if (!IS_BLANK(CUR)) {
3208 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3209 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3210 ctxt->sax->error(ctxt->userData,
3211 "Space required after the entity name\n");
3212 ctxt->wellFormed = 0;
3213 ctxt->disableSAX = 1;
3214 }
3215 SKIP_BLANKS;
3216
3217 /*
3218 * handle the various case of definitions...
3219 */
3220 if (isParameter) {
3221 if ((RAW == '"') || (RAW == '\'')) {
3222 value = xmlParseEntityValue(ctxt, &orig);
3223 if (value) {
3224 if ((ctxt->sax != NULL) &&
3225 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3226 ctxt->sax->entityDecl(ctxt->userData, name,
3227 XML_INTERNAL_PARAMETER_ENTITY,
3228 NULL, NULL, value);
3229 }
3230 } else {
3231 URI = xmlParseExternalID(ctxt, &literal, 1);
3232 if ((URI == NULL) && (literal == NULL)) {
3233 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3234 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3235 ctxt->sax->error(ctxt->userData,
3236 "Entity value required\n");
3237 ctxt->wellFormed = 0;
3238 ctxt->disableSAX = 1;
3239 }
3240 if (URI) {
3241 xmlURIPtr uri;
3242
3243 uri = xmlParseURI((const char *) URI);
3244 if (uri == NULL) {
3245 ctxt->errNo = XML_ERR_INVALID_URI;
3246 if ((ctxt->sax != NULL) &&
3247 (!ctxt->disableSAX) &&
3248 (ctxt->sax->error != NULL))
3249 ctxt->sax->error(ctxt->userData,
3250 "Invalid URI: %s\n", URI);
3251 ctxt->wellFormed = 0;
3252 } else {
3253 if (uri->fragment != NULL) {
3254 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3255 if ((ctxt->sax != NULL) &&
3256 (!ctxt->disableSAX) &&
3257 (ctxt->sax->error != NULL))
3258 ctxt->sax->error(ctxt->userData,
3259 "Fragment not allowed: %s\n", URI);
3260 ctxt->wellFormed = 0;
3261 } else {
3262 if ((ctxt->sax != NULL) &&
3263 (!ctxt->disableSAX) &&
3264 (ctxt->sax->entityDecl != NULL))
3265 ctxt->sax->entityDecl(ctxt->userData, name,
3266 XML_EXTERNAL_PARAMETER_ENTITY,
3267 literal, URI, NULL);
3268 }
3269 xmlFreeURI(uri);
3270 }
3271 }
3272 }
3273 } else {
3274 if ((RAW == '"') || (RAW == '\'')) {
3275 value = xmlParseEntityValue(ctxt, &orig);
3276 if ((ctxt->sax != NULL) &&
3277 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3278 ctxt->sax->entityDecl(ctxt->userData, name,
3279 XML_INTERNAL_GENERAL_ENTITY,
3280 NULL, NULL, value);
3281 } else {
3282 URI = xmlParseExternalID(ctxt, &literal, 1);
3283 if ((URI == NULL) && (literal == NULL)) {
3284 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3286 ctxt->sax->error(ctxt->userData,
3287 "Entity value required\n");
3288 ctxt->wellFormed = 0;
3289 ctxt->disableSAX = 1;
3290 }
3291 if (URI) {
3292 xmlURIPtr uri;
3293
3294 uri = xmlParseURI((const char *)URI);
3295 if (uri == NULL) {
3296 ctxt->errNo = XML_ERR_INVALID_URI;
3297 if ((ctxt->sax != NULL) &&
3298 (!ctxt->disableSAX) &&
3299 (ctxt->sax->error != NULL))
3300 ctxt->sax->error(ctxt->userData,
3301 "Invalid URI: %s\n", URI);
3302 ctxt->wellFormed = 0;
3303 } else {
3304 if (uri->fragment != NULL) {
3305 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3306 if ((ctxt->sax != NULL) &&
3307 (!ctxt->disableSAX) &&
3308 (ctxt->sax->error != NULL))
3309 ctxt->sax->error(ctxt->userData,
3310 "Fragment not allowed: %s\n", URI);
3311 ctxt->wellFormed = 0;
3312 }
3313 xmlFreeURI(uri);
3314 }
3315 }
3316 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3317 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3318 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3319 ctxt->sax->error(ctxt->userData,
3320 "Space required before 'NDATA'\n");
3321 ctxt->wellFormed = 0;
3322 ctxt->disableSAX = 1;
3323 }
3324 SKIP_BLANKS;
3325 if ((RAW == 'N') && (NXT(1) == 'D') &&
3326 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3327 (NXT(4) == 'A')) {
3328 SKIP(5);
3329 if (!IS_BLANK(CUR)) {
3330 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3332 ctxt->sax->error(ctxt->userData,
3333 "Space required after 'NDATA'\n");
3334 ctxt->wellFormed = 0;
3335 ctxt->disableSAX = 1;
3336 }
3337 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003338 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003339 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3340 (ctxt->sax->unparsedEntityDecl != NULL))
3341 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3342 literal, URI, ndata);
3343 } else {
3344 if ((ctxt->sax != NULL) &&
3345 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3346 ctxt->sax->entityDecl(ctxt->userData, name,
3347 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3348 literal, URI, NULL);
3349 }
3350 }
3351 }
3352 SKIP_BLANKS;
3353 if (RAW != '>') {
3354 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3356 ctxt->sax->error(ctxt->userData,
3357 "xmlParseEntityDecl: entity %s not terminated\n", name);
3358 ctxt->wellFormed = 0;
3359 ctxt->disableSAX = 1;
3360 } else {
3361 if (input != ctxt->input) {
3362 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3364 ctxt->sax->error(ctxt->userData,
3365"Entity declaration doesn't start and stop in the same entity\n");
3366 ctxt->wellFormed = 0;
3367 ctxt->disableSAX = 1;
3368 }
3369 NEXT;
3370 }
3371 if (orig != NULL) {
3372 /*
3373 * Ugly mechanism to save the raw entity value.
3374 */
3375 xmlEntityPtr cur = NULL;
3376
3377 if (isParameter) {
3378 if ((ctxt->sax != NULL) &&
3379 (ctxt->sax->getParameterEntity != NULL))
3380 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3381 } else {
3382 if ((ctxt->sax != NULL) &&
3383 (ctxt->sax->getEntity != NULL))
3384 cur = ctxt->sax->getEntity(ctxt->userData, name);
3385 }
3386 if (cur != NULL) {
3387 if (cur->orig != NULL)
3388 xmlFree(orig);
3389 else
3390 cur->orig = orig;
3391 } else
3392 xmlFree(orig);
3393 }
3394 if (name != NULL) xmlFree(name);
3395 if (value != NULL) xmlFree(value);
3396 if (URI != NULL) xmlFree(URI);
3397 if (literal != NULL) xmlFree(literal);
3398 if (ndata != NULL) xmlFree(ndata);
3399 }
3400}
3401
3402/**
3403 * xmlParseDefaultDecl:
3404 * @ctxt: an XML parser context
3405 * @value: Receive a possible fixed default value for the attribute
3406 *
3407 * Parse an attribute default declaration
3408 *
3409 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3410 *
3411 * [ VC: Required Attribute ]
3412 * if the default declaration is the keyword #REQUIRED, then the
3413 * attribute must be specified for all elements of the type in the
3414 * attribute-list declaration.
3415 *
3416 * [ VC: Attribute Default Legal ]
3417 * The declared default value must meet the lexical constraints of
3418 * the declared attribute type c.f. xmlValidateAttributeDecl()
3419 *
3420 * [ VC: Fixed Attribute Default ]
3421 * if an attribute has a default value declared with the #FIXED
3422 * keyword, instances of that attribute must match the default value.
3423 *
3424 * [ WFC: No < in Attribute Values ]
3425 * handled in xmlParseAttValue()
3426 *
3427 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3428 * or XML_ATTRIBUTE_FIXED.
3429 */
3430
3431int
3432xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3433 int val;
3434 xmlChar *ret;
3435
3436 *value = NULL;
3437 if ((RAW == '#') && (NXT(1) == 'R') &&
3438 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3439 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3440 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3441 (NXT(8) == 'D')) {
3442 SKIP(9);
3443 return(XML_ATTRIBUTE_REQUIRED);
3444 }
3445 if ((RAW == '#') && (NXT(1) == 'I') &&
3446 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3447 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3448 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3449 SKIP(8);
3450 return(XML_ATTRIBUTE_IMPLIED);
3451 }
3452 val = XML_ATTRIBUTE_NONE;
3453 if ((RAW == '#') && (NXT(1) == 'F') &&
3454 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3455 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3456 SKIP(6);
3457 val = XML_ATTRIBUTE_FIXED;
3458 if (!IS_BLANK(CUR)) {
3459 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3461 ctxt->sax->error(ctxt->userData,
3462 "Space required after '#FIXED'\n");
3463 ctxt->wellFormed = 0;
3464 ctxt->disableSAX = 1;
3465 }
3466 SKIP_BLANKS;
3467 }
3468 ret = xmlParseAttValue(ctxt);
3469 ctxt->instate = XML_PARSER_DTD;
3470 if (ret == NULL) {
3471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3472 ctxt->sax->error(ctxt->userData,
3473 "Attribute default value declaration error\n");
3474 ctxt->wellFormed = 0;
3475 ctxt->disableSAX = 1;
3476 } else
3477 *value = ret;
3478 return(val);
3479}
3480
3481/**
3482 * xmlParseNotationType:
3483 * @ctxt: an XML parser context
3484 *
3485 * parse an Notation attribute type.
3486 *
3487 * Note: the leading 'NOTATION' S part has already being parsed...
3488 *
3489 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3490 *
3491 * [ VC: Notation Attributes ]
3492 * Values of this type must match one of the notation names included
3493 * in the declaration; all notation names in the declaration must be declared.
3494 *
3495 * Returns: the notation attribute tree built while parsing
3496 */
3497
3498xmlEnumerationPtr
3499xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3500 xmlChar *name;
3501 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3502
3503 if (RAW != '(') {
3504 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3506 ctxt->sax->error(ctxt->userData,
3507 "'(' required to start 'NOTATION'\n");
3508 ctxt->wellFormed = 0;
3509 ctxt->disableSAX = 1;
3510 return(NULL);
3511 }
3512 SHRINK;
3513 do {
3514 NEXT;
3515 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003516 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003517 if (name == NULL) {
3518 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3520 ctxt->sax->error(ctxt->userData,
3521 "Name expected in NOTATION declaration\n");
3522 ctxt->wellFormed = 0;
3523 ctxt->disableSAX = 1;
3524 return(ret);
3525 }
3526 cur = xmlCreateEnumeration(name);
3527 xmlFree(name);
3528 if (cur == NULL) return(ret);
3529 if (last == NULL) ret = last = cur;
3530 else {
3531 last->next = cur;
3532 last = cur;
3533 }
3534 SKIP_BLANKS;
3535 } while (RAW == '|');
3536 if (RAW != ')') {
3537 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3539 ctxt->sax->error(ctxt->userData,
3540 "')' required to finish NOTATION declaration\n");
3541 ctxt->wellFormed = 0;
3542 ctxt->disableSAX = 1;
3543 if ((last != NULL) && (last != ret))
3544 xmlFreeEnumeration(last);
3545 return(ret);
3546 }
3547 NEXT;
3548 return(ret);
3549}
3550
3551/**
3552 * xmlParseEnumerationType:
3553 * @ctxt: an XML parser context
3554 *
3555 * parse an Enumeration attribute type.
3556 *
3557 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3558 *
3559 * [ VC: Enumeration ]
3560 * Values of this type must match one of the Nmtoken tokens in
3561 * the declaration
3562 *
3563 * Returns: the enumeration attribute tree built while parsing
3564 */
3565
3566xmlEnumerationPtr
3567xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3568 xmlChar *name;
3569 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3570
3571 if (RAW != '(') {
3572 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3574 ctxt->sax->error(ctxt->userData,
3575 "'(' required to start ATTLIST enumeration\n");
3576 ctxt->wellFormed = 0;
3577 ctxt->disableSAX = 1;
3578 return(NULL);
3579 }
3580 SHRINK;
3581 do {
3582 NEXT;
3583 SKIP_BLANKS;
3584 name = xmlParseNmtoken(ctxt);
3585 if (name == NULL) {
3586 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3588 ctxt->sax->error(ctxt->userData,
3589 "NmToken expected in ATTLIST enumeration\n");
3590 ctxt->wellFormed = 0;
3591 ctxt->disableSAX = 1;
3592 return(ret);
3593 }
3594 cur = xmlCreateEnumeration(name);
3595 xmlFree(name);
3596 if (cur == NULL) return(ret);
3597 if (last == NULL) ret = last = cur;
3598 else {
3599 last->next = cur;
3600 last = cur;
3601 }
3602 SKIP_BLANKS;
3603 } while (RAW == '|');
3604 if (RAW != ')') {
3605 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3607 ctxt->sax->error(ctxt->userData,
3608 "')' required to finish ATTLIST enumeration\n");
3609 ctxt->wellFormed = 0;
3610 ctxt->disableSAX = 1;
3611 return(ret);
3612 }
3613 NEXT;
3614 return(ret);
3615}
3616
3617/**
3618 * xmlParseEnumeratedType:
3619 * @ctxt: an XML parser context
3620 * @tree: the enumeration tree built while parsing
3621 *
3622 * parse an Enumerated attribute type.
3623 *
3624 * [57] EnumeratedType ::= NotationType | Enumeration
3625 *
3626 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3627 *
3628 *
3629 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3630 */
3631
3632int
3633xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3634 if ((RAW == 'N') && (NXT(1) == 'O') &&
3635 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3636 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3637 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3638 SKIP(8);
3639 if (!IS_BLANK(CUR)) {
3640 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3642 ctxt->sax->error(ctxt->userData,
3643 "Space required after 'NOTATION'\n");
3644 ctxt->wellFormed = 0;
3645 ctxt->disableSAX = 1;
3646 return(0);
3647 }
3648 SKIP_BLANKS;
3649 *tree = xmlParseNotationType(ctxt);
3650 if (*tree == NULL) return(0);
3651 return(XML_ATTRIBUTE_NOTATION);
3652 }
3653 *tree = xmlParseEnumerationType(ctxt);
3654 if (*tree == NULL) return(0);
3655 return(XML_ATTRIBUTE_ENUMERATION);
3656}
3657
3658/**
3659 * xmlParseAttributeType:
3660 * @ctxt: an XML parser context
3661 * @tree: the enumeration tree built while parsing
3662 *
3663 * parse the Attribute list def for an element
3664 *
3665 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3666 *
3667 * [55] StringType ::= 'CDATA'
3668 *
3669 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3670 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3671 *
3672 * Validity constraints for attribute values syntax are checked in
3673 * xmlValidateAttributeValue()
3674 *
3675 * [ VC: ID ]
3676 * Values of type ID must match the Name production. A name must not
3677 * appear more than once in an XML document as a value of this type;
3678 * i.e., ID values must uniquely identify the elements which bear them.
3679 *
3680 * [ VC: One ID per Element Type ]
3681 * No element type may have more than one ID attribute specified.
3682 *
3683 * [ VC: ID Attribute Default ]
3684 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3685 *
3686 * [ VC: IDREF ]
3687 * Values of type IDREF must match the Name production, and values
3688 * of type IDREFS must match Names; each IDREF Name must match the value
3689 * of an ID attribute on some element in the XML document; i.e. IDREF
3690 * values must match the value of some ID attribute.
3691 *
3692 * [ VC: Entity Name ]
3693 * Values of type ENTITY must match the Name production, values
3694 * of type ENTITIES must match Names; each Entity Name must match the
3695 * name of an unparsed entity declared in the DTD.
3696 *
3697 * [ VC: Name Token ]
3698 * Values of type NMTOKEN must match the Nmtoken production; values
3699 * of type NMTOKENS must match Nmtokens.
3700 *
3701 * Returns the attribute type
3702 */
3703int
3704xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3705 SHRINK;
3706 if ((RAW == 'C') && (NXT(1) == 'D') &&
3707 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3708 (NXT(4) == 'A')) {
3709 SKIP(5);
3710 return(XML_ATTRIBUTE_CDATA);
3711 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3712 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3713 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3714 SKIP(6);
3715 return(XML_ATTRIBUTE_IDREFS);
3716 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3717 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3718 (NXT(4) == 'F')) {
3719 SKIP(5);
3720 return(XML_ATTRIBUTE_IDREF);
3721 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3722 SKIP(2);
3723 return(XML_ATTRIBUTE_ID);
3724 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3725 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3726 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3727 SKIP(6);
3728 return(XML_ATTRIBUTE_ENTITY);
3729 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3730 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3731 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3732 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3733 SKIP(8);
3734 return(XML_ATTRIBUTE_ENTITIES);
3735 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3736 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3737 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3738 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3739 SKIP(8);
3740 return(XML_ATTRIBUTE_NMTOKENS);
3741 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3742 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3743 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3744 (NXT(6) == 'N')) {
3745 SKIP(7);
3746 return(XML_ATTRIBUTE_NMTOKEN);
3747 }
3748 return(xmlParseEnumeratedType(ctxt, tree));
3749}
3750
3751/**
3752 * xmlParseAttributeListDecl:
3753 * @ctxt: an XML parser context
3754 *
3755 * : parse the Attribute list def for an element
3756 *
3757 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3758 *
3759 * [53] AttDef ::= S Name S AttType S DefaultDecl
3760 *
3761 */
3762void
3763xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3764 xmlChar *elemName;
3765 xmlChar *attrName;
3766 xmlEnumerationPtr tree;
3767
3768 if ((RAW == '<') && (NXT(1) == '!') &&
3769 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3770 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3771 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3772 (NXT(8) == 'T')) {
3773 xmlParserInputPtr input = ctxt->input;
3774
3775 SKIP(9);
3776 if (!IS_BLANK(CUR)) {
3777 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3779 ctxt->sax->error(ctxt->userData,
3780 "Space required after '<!ATTLIST'\n");
3781 ctxt->wellFormed = 0;
3782 ctxt->disableSAX = 1;
3783 }
3784 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003785 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003786 if (elemName == NULL) {
3787 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3789 ctxt->sax->error(ctxt->userData,
3790 "ATTLIST: no name for Element\n");
3791 ctxt->wellFormed = 0;
3792 ctxt->disableSAX = 1;
3793 return;
3794 }
3795 SKIP_BLANKS;
3796 GROW;
3797 while (RAW != '>') {
3798 const xmlChar *check = CUR_PTR;
3799 int type;
3800 int def;
3801 xmlChar *defaultValue = NULL;
3802
3803 GROW;
3804 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003805 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003806 if (attrName == NULL) {
3807 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3809 ctxt->sax->error(ctxt->userData,
3810 "ATTLIST: no name for Attribute\n");
3811 ctxt->wellFormed = 0;
3812 ctxt->disableSAX = 1;
3813 break;
3814 }
3815 GROW;
3816 if (!IS_BLANK(CUR)) {
3817 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3819 ctxt->sax->error(ctxt->userData,
3820 "Space required after the attribute name\n");
3821 ctxt->wellFormed = 0;
3822 ctxt->disableSAX = 1;
3823 if (attrName != NULL)
3824 xmlFree(attrName);
3825 if (defaultValue != NULL)
3826 xmlFree(defaultValue);
3827 break;
3828 }
3829 SKIP_BLANKS;
3830
3831 type = xmlParseAttributeType(ctxt, &tree);
3832 if (type <= 0) {
3833 if (attrName != NULL)
3834 xmlFree(attrName);
3835 if (defaultValue != NULL)
3836 xmlFree(defaultValue);
3837 break;
3838 }
3839
3840 GROW;
3841 if (!IS_BLANK(CUR)) {
3842 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3844 ctxt->sax->error(ctxt->userData,
3845 "Space required after the attribute type\n");
3846 ctxt->wellFormed = 0;
3847 ctxt->disableSAX = 1;
3848 if (attrName != NULL)
3849 xmlFree(attrName);
3850 if (defaultValue != NULL)
3851 xmlFree(defaultValue);
3852 if (tree != NULL)
3853 xmlFreeEnumeration(tree);
3854 break;
3855 }
3856 SKIP_BLANKS;
3857
3858 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3859 if (def <= 0) {
3860 if (attrName != NULL)
3861 xmlFree(attrName);
3862 if (defaultValue != NULL)
3863 xmlFree(defaultValue);
3864 if (tree != NULL)
3865 xmlFreeEnumeration(tree);
3866 break;
3867 }
3868
3869 GROW;
3870 if (RAW != '>') {
3871 if (!IS_BLANK(CUR)) {
3872 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3874 ctxt->sax->error(ctxt->userData,
3875 "Space required after the attribute default value\n");
3876 ctxt->wellFormed = 0;
3877 ctxt->disableSAX = 1;
3878 if (attrName != NULL)
3879 xmlFree(attrName);
3880 if (defaultValue != NULL)
3881 xmlFree(defaultValue);
3882 if (tree != NULL)
3883 xmlFreeEnumeration(tree);
3884 break;
3885 }
3886 SKIP_BLANKS;
3887 }
3888 if (check == CUR_PTR) {
3889 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3890 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3891 ctxt->sax->error(ctxt->userData,
3892 "xmlParseAttributeListDecl: detected internal error\n");
3893 if (attrName != NULL)
3894 xmlFree(attrName);
3895 if (defaultValue != NULL)
3896 xmlFree(defaultValue);
3897 if (tree != NULL)
3898 xmlFreeEnumeration(tree);
3899 break;
3900 }
3901 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3902 (ctxt->sax->attributeDecl != NULL))
3903 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3904 type, def, defaultValue, tree);
3905 if (attrName != NULL)
3906 xmlFree(attrName);
3907 if (defaultValue != NULL)
3908 xmlFree(defaultValue);
3909 GROW;
3910 }
3911 if (RAW == '>') {
3912 if (input != ctxt->input) {
3913 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3915 ctxt->sax->error(ctxt->userData,
3916"Attribute list declaration doesn't start and stop in the same entity\n");
3917 ctxt->wellFormed = 0;
3918 ctxt->disableSAX = 1;
3919 }
3920 NEXT;
3921 }
3922
3923 xmlFree(elemName);
3924 }
3925}
3926
3927/**
3928 * xmlParseElementMixedContentDecl:
3929 * @ctxt: an XML parser context
3930 *
3931 * parse the declaration for a Mixed Element content
3932 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3933 *
3934 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3935 * '(' S? '#PCDATA' S? ')'
3936 *
3937 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3938 *
3939 * [ VC: No Duplicate Types ]
3940 * The same name must not appear more than once in a single
3941 * mixed-content declaration.
3942 *
3943 * returns: the list of the xmlElementContentPtr describing the element choices
3944 */
3945xmlElementContentPtr
3946xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3947 xmlElementContentPtr ret = NULL, cur = NULL, n;
3948 xmlChar *elem = NULL;
3949
3950 GROW;
3951 if ((RAW == '#') && (NXT(1) == 'P') &&
3952 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3953 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3954 (NXT(6) == 'A')) {
3955 SKIP(7);
3956 SKIP_BLANKS;
3957 SHRINK;
3958 if (RAW == ')') {
3959 ctxt->entity = ctxt->input;
3960 NEXT;
3961 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3962 if (RAW == '*') {
3963 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3964 NEXT;
3965 }
3966 return(ret);
3967 }
3968 if ((RAW == '(') || (RAW == '|')) {
3969 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3970 if (ret == NULL) return(NULL);
3971 }
3972 while (RAW == '|') {
3973 NEXT;
3974 if (elem == NULL) {
3975 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3976 if (ret == NULL) return(NULL);
3977 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003978 if (cur != NULL)
3979 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003980 cur = ret;
3981 } else {
3982 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3983 if (n == NULL) return(NULL);
3984 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003985 if (n->c1 != NULL)
3986 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00003987 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003988 if (n != NULL)
3989 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003990 cur = n;
3991 xmlFree(elem);
3992 }
3993 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003994 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003995 if (elem == NULL) {
3996 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3998 ctxt->sax->error(ctxt->userData,
3999 "xmlParseElementMixedContentDecl : Name expected\n");
4000 ctxt->wellFormed = 0;
4001 ctxt->disableSAX = 1;
4002 xmlFreeElementContent(cur);
4003 return(NULL);
4004 }
4005 SKIP_BLANKS;
4006 GROW;
4007 }
4008 if ((RAW == ')') && (NXT(1) == '*')) {
4009 if (elem != NULL) {
4010 cur->c2 = xmlNewElementContent(elem,
4011 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004012 if (cur->c2 != NULL)
4013 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004014 xmlFree(elem);
4015 }
4016 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4017 ctxt->entity = ctxt->input;
4018 SKIP(2);
4019 } else {
4020 if (elem != NULL) xmlFree(elem);
4021 xmlFreeElementContent(ret);
4022 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4024 ctxt->sax->error(ctxt->userData,
4025 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4026 ctxt->wellFormed = 0;
4027 ctxt->disableSAX = 1;
4028 return(NULL);
4029 }
4030
4031 } else {
4032 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4034 ctxt->sax->error(ctxt->userData,
4035 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4036 ctxt->wellFormed = 0;
4037 ctxt->disableSAX = 1;
4038 }
4039 return(ret);
4040}
4041
4042/**
4043 * xmlParseElementChildrenContentDecl:
4044 * @ctxt: an XML parser context
4045 *
4046 * parse the declaration for a Mixed Element content
4047 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4048 *
4049 *
4050 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4051 *
4052 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4053 *
4054 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4055 *
4056 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4057 *
4058 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4059 * TODO Parameter-entity replacement text must be properly nested
4060 * with parenthetized groups. That is to say, if either of the
4061 * opening or closing parentheses in a choice, seq, or Mixed
4062 * construct is contained in the replacement text for a parameter
4063 * entity, both must be contained in the same replacement text. For
4064 * interoperability, if a parameter-entity reference appears in a
4065 * choice, seq, or Mixed construct, its replacement text should not
4066 * be empty, and neither the first nor last non-blank character of
4067 * the replacement text should be a connector (| or ,).
4068 *
4069 * returns: the tree of xmlElementContentPtr describing the element
4070 * hierarchy.
4071 */
4072xmlElementContentPtr
4073#ifdef VMS
4074xmlParseElementChildrenContentD
4075#else
4076xmlParseElementChildrenContentDecl
4077#endif
4078(xmlParserCtxtPtr ctxt) {
4079 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4080 xmlChar *elem;
4081 xmlChar type = 0;
4082
4083 SKIP_BLANKS;
4084 GROW;
4085 if (RAW == '(') {
4086 /* Recurse on first child */
4087 NEXT;
4088 SKIP_BLANKS;
4089 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4090 SKIP_BLANKS;
4091 GROW;
4092 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004093 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004094 if (elem == NULL) {
4095 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4097 ctxt->sax->error(ctxt->userData,
4098 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4099 ctxt->wellFormed = 0;
4100 ctxt->disableSAX = 1;
4101 return(NULL);
4102 }
4103 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4104 GROW;
4105 if (RAW == '?') {
4106 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4107 NEXT;
4108 } else if (RAW == '*') {
4109 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4110 NEXT;
4111 } else if (RAW == '+') {
4112 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4113 NEXT;
4114 } else {
4115 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4116 }
4117 xmlFree(elem);
4118 GROW;
4119 }
4120 SKIP_BLANKS;
4121 SHRINK;
4122 while (RAW != ')') {
4123 /*
4124 * Each loop we parse one separator and one element.
4125 */
4126 if (RAW == ',') {
4127 if (type == 0) type = CUR;
4128
4129 /*
4130 * Detect "Name | Name , Name" error
4131 */
4132 else if (type != CUR) {
4133 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4135 ctxt->sax->error(ctxt->userData,
4136 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4137 type);
4138 ctxt->wellFormed = 0;
4139 ctxt->disableSAX = 1;
4140 if ((op != NULL) && (op != ret))
4141 xmlFreeElementContent(op);
4142 if ((last != NULL) && (last != ret) &&
4143 (last != ret->c1) && (last != ret->c2))
4144 xmlFreeElementContent(last);
4145 if (ret != NULL)
4146 xmlFreeElementContent(ret);
4147 return(NULL);
4148 }
4149 NEXT;
4150
4151 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4152 if (op == NULL) {
4153 xmlFreeElementContent(ret);
4154 return(NULL);
4155 }
4156 if (last == NULL) {
4157 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004158 if (ret != NULL)
4159 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004160 ret = cur = op;
4161 } else {
4162 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004163 if (op != NULL)
4164 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004165 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004166 if (last != NULL)
4167 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004168 cur =op;
4169 last = NULL;
4170 }
4171 } else if (RAW == '|') {
4172 if (type == 0) type = CUR;
4173
4174 /*
4175 * Detect "Name , Name | Name" error
4176 */
4177 else if (type != CUR) {
4178 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4179 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4180 ctxt->sax->error(ctxt->userData,
4181 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4182 type);
4183 ctxt->wellFormed = 0;
4184 ctxt->disableSAX = 1;
4185 if ((op != NULL) && (op != ret) && (op != last))
4186 xmlFreeElementContent(op);
4187 if ((last != NULL) && (last != ret) &&
4188 (last != ret->c1) && (last != ret->c2))
4189 xmlFreeElementContent(last);
4190 if (ret != NULL)
4191 xmlFreeElementContent(ret);
4192 return(NULL);
4193 }
4194 NEXT;
4195
4196 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4197 if (op == NULL) {
4198 if ((op != NULL) && (op != ret))
4199 xmlFreeElementContent(op);
4200 if ((last != NULL) && (last != ret) &&
4201 (last != ret->c1) && (last != ret->c2))
4202 xmlFreeElementContent(last);
4203 if (ret != NULL)
4204 xmlFreeElementContent(ret);
4205 return(NULL);
4206 }
4207 if (last == NULL) {
4208 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004209 if (ret != NULL)
4210 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004211 ret = cur = op;
4212 } else {
4213 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004214 if (op != NULL)
4215 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004216 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004217 if (last != NULL)
4218 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004219 cur =op;
4220 last = NULL;
4221 }
4222 } else {
4223 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4225 ctxt->sax->error(ctxt->userData,
4226 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4227 ctxt->wellFormed = 0;
4228 ctxt->disableSAX = 1;
4229 if ((op != NULL) && (op != ret))
4230 xmlFreeElementContent(op);
4231 if ((last != NULL) && (last != ret) &&
4232 (last != ret->c1) && (last != ret->c2))
4233 xmlFreeElementContent(last);
4234 if (ret != NULL)
4235 xmlFreeElementContent(ret);
4236 return(NULL);
4237 }
4238 GROW;
4239 SKIP_BLANKS;
4240 GROW;
4241 if (RAW == '(') {
4242 /* Recurse on second child */
4243 NEXT;
4244 SKIP_BLANKS;
4245 last = xmlParseElementChildrenContentDecl(ctxt);
4246 SKIP_BLANKS;
4247 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004248 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004249 if (elem == NULL) {
4250 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4251 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4252 ctxt->sax->error(ctxt->userData,
4253 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4254 ctxt->wellFormed = 0;
4255 ctxt->disableSAX = 1;
4256 if ((op != NULL) && (op != ret))
4257 xmlFreeElementContent(op);
4258 if ((last != NULL) && (last != ret) &&
4259 (last != ret->c1) && (last != ret->c2))
4260 xmlFreeElementContent(last);
4261 if (ret != NULL)
4262 xmlFreeElementContent(ret);
4263 return(NULL);
4264 }
4265 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4266 xmlFree(elem);
4267 if (RAW == '?') {
4268 last->ocur = XML_ELEMENT_CONTENT_OPT;
4269 NEXT;
4270 } else if (RAW == '*') {
4271 last->ocur = XML_ELEMENT_CONTENT_MULT;
4272 NEXT;
4273 } else if (RAW == '+') {
4274 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4275 NEXT;
4276 } else {
4277 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4278 }
4279 }
4280 SKIP_BLANKS;
4281 GROW;
4282 }
4283 if ((cur != NULL) && (last != NULL)) {
4284 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004285 if (last != NULL)
4286 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004287 }
4288 ctxt->entity = ctxt->input;
4289 NEXT;
4290 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004291 if (ret != NULL)
4292 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004293 NEXT;
4294 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004295 if (ret != NULL)
4296 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004297 NEXT;
4298 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004299 if (ret != NULL)
4300 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004301 NEXT;
4302 }
4303 return(ret);
4304}
4305
4306/**
4307 * xmlParseElementContentDecl:
4308 * @ctxt: an XML parser context
4309 * @name: the name of the element being defined.
4310 * @result: the Element Content pointer will be stored here if any
4311 *
4312 * parse the declaration for an Element content either Mixed or Children,
4313 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4314 *
4315 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4316 *
4317 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4318 */
4319
4320int
4321xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4322 xmlElementContentPtr *result) {
4323
4324 xmlElementContentPtr tree = NULL;
4325 xmlParserInputPtr input = ctxt->input;
4326 int res;
4327
4328 *result = NULL;
4329
4330 if (RAW != '(') {
4331 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4333 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004334 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004335 ctxt->wellFormed = 0;
4336 ctxt->disableSAX = 1;
4337 return(-1);
4338 }
4339 NEXT;
4340 GROW;
4341 SKIP_BLANKS;
4342 if ((RAW == '#') && (NXT(1) == 'P') &&
4343 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4344 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4345 (NXT(6) == 'A')) {
4346 tree = xmlParseElementMixedContentDecl(ctxt);
4347 res = XML_ELEMENT_TYPE_MIXED;
4348 } else {
4349 tree = xmlParseElementChildrenContentDecl(ctxt);
4350 res = XML_ELEMENT_TYPE_ELEMENT;
4351 }
4352 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4353 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4355 ctxt->sax->error(ctxt->userData,
4356"Element content declaration doesn't start and stop in the same entity\n");
4357 ctxt->wellFormed = 0;
4358 ctxt->disableSAX = 1;
4359 }
4360 SKIP_BLANKS;
4361 *result = tree;
4362 return(res);
4363}
4364
4365/**
4366 * xmlParseElementDecl:
4367 * @ctxt: an XML parser context
4368 *
4369 * parse an Element declaration.
4370 *
4371 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4372 *
4373 * [ VC: Unique Element Type Declaration ]
4374 * No element type may be declared more than once
4375 *
4376 * Returns the type of the element, or -1 in case of error
4377 */
4378int
4379xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4380 xmlChar *name;
4381 int ret = -1;
4382 xmlElementContentPtr content = NULL;
4383
4384 GROW;
4385 if ((RAW == '<') && (NXT(1) == '!') &&
4386 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4387 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4388 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4389 (NXT(8) == 'T')) {
4390 xmlParserInputPtr input = ctxt->input;
4391
4392 SKIP(9);
4393 if (!IS_BLANK(CUR)) {
4394 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4396 ctxt->sax->error(ctxt->userData,
4397 "Space required after 'ELEMENT'\n");
4398 ctxt->wellFormed = 0;
4399 ctxt->disableSAX = 1;
4400 }
4401 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004402 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004403 if (name == NULL) {
4404 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4406 ctxt->sax->error(ctxt->userData,
4407 "xmlParseElementDecl: no name for Element\n");
4408 ctxt->wellFormed = 0;
4409 ctxt->disableSAX = 1;
4410 return(-1);
4411 }
4412 while ((RAW == 0) && (ctxt->inputNr > 1))
4413 xmlPopInput(ctxt);
4414 if (!IS_BLANK(CUR)) {
4415 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4417 ctxt->sax->error(ctxt->userData,
4418 "Space required after the element name\n");
4419 ctxt->wellFormed = 0;
4420 ctxt->disableSAX = 1;
4421 }
4422 SKIP_BLANKS;
4423 if ((RAW == 'E') && (NXT(1) == 'M') &&
4424 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4425 (NXT(4) == 'Y')) {
4426 SKIP(5);
4427 /*
4428 * Element must always be empty.
4429 */
4430 ret = XML_ELEMENT_TYPE_EMPTY;
4431 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4432 (NXT(2) == 'Y')) {
4433 SKIP(3);
4434 /*
4435 * Element is a generic container.
4436 */
4437 ret = XML_ELEMENT_TYPE_ANY;
4438 } else if (RAW == '(') {
4439 ret = xmlParseElementContentDecl(ctxt, name, &content);
4440 } else {
4441 /*
4442 * [ WFC: PEs in Internal Subset ] error handling.
4443 */
4444 if ((RAW == '%') && (ctxt->external == 0) &&
4445 (ctxt->inputNr == 1)) {
4446 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4448 ctxt->sax->error(ctxt->userData,
4449 "PEReference: forbidden within markup decl in internal subset\n");
4450 } else {
4451 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4453 ctxt->sax->error(ctxt->userData,
4454 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4455 }
4456 ctxt->wellFormed = 0;
4457 ctxt->disableSAX = 1;
4458 if (name != NULL) xmlFree(name);
4459 return(-1);
4460 }
4461
4462 SKIP_BLANKS;
4463 /*
4464 * Pop-up of finished entities.
4465 */
4466 while ((RAW == 0) && (ctxt->inputNr > 1))
4467 xmlPopInput(ctxt);
4468 SKIP_BLANKS;
4469
4470 if (RAW != '>') {
4471 ctxt->errNo = XML_ERR_GT_REQUIRED;
4472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4473 ctxt->sax->error(ctxt->userData,
4474 "xmlParseElementDecl: expected '>' at the end\n");
4475 ctxt->wellFormed = 0;
4476 ctxt->disableSAX = 1;
4477 } else {
4478 if (input != ctxt->input) {
4479 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4480 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4481 ctxt->sax->error(ctxt->userData,
4482"Element declaration doesn't start and stop in the same entity\n");
4483 ctxt->wellFormed = 0;
4484 ctxt->disableSAX = 1;
4485 }
4486
4487 NEXT;
4488 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4489 (ctxt->sax->elementDecl != NULL))
4490 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4491 content);
4492 }
4493 if (content != NULL) {
4494 xmlFreeElementContent(content);
4495 }
4496 if (name != NULL) {
4497 xmlFree(name);
4498 }
4499 }
4500 return(ret);
4501}
4502
4503/**
4504 * xmlParseMarkupDecl:
4505 * @ctxt: an XML parser context
4506 *
4507 * parse Markup declarations
4508 *
4509 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4510 * NotationDecl | PI | Comment
4511 *
4512 * [ VC: Proper Declaration/PE Nesting ]
4513 * Parameter-entity replacement text must be properly nested with
4514 * markup declarations. That is to say, if either the first character
4515 * or the last character of a markup declaration (markupdecl above) is
4516 * contained in the replacement text for a parameter-entity reference,
4517 * both must be contained in the same replacement text.
4518 *
4519 * [ WFC: PEs in Internal Subset ]
4520 * In the internal DTD subset, parameter-entity references can occur
4521 * only where markup declarations can occur, not within markup declarations.
4522 * (This does not apply to references that occur in external parameter
4523 * entities or to the external subset.)
4524 */
4525void
4526xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4527 GROW;
4528 xmlParseElementDecl(ctxt);
4529 xmlParseAttributeListDecl(ctxt);
4530 xmlParseEntityDecl(ctxt);
4531 xmlParseNotationDecl(ctxt);
4532 xmlParsePI(ctxt);
4533 xmlParseComment(ctxt);
4534 /*
4535 * This is only for internal subset. On external entities,
4536 * the replacement is done before parsing stage
4537 */
4538 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4539 xmlParsePEReference(ctxt);
4540 ctxt->instate = XML_PARSER_DTD;
4541}
4542
4543/**
4544 * xmlParseTextDecl:
4545 * @ctxt: an XML parser context
4546 *
4547 * parse an XML declaration header for external entities
4548 *
4549 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4550 *
4551 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4552 */
4553
4554void
4555xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4556 xmlChar *version;
4557
4558 /*
4559 * We know that '<?xml' is here.
4560 */
4561 if ((RAW == '<') && (NXT(1) == '?') &&
4562 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4563 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4564 SKIP(5);
4565 } else {
4566 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4568 ctxt->sax->error(ctxt->userData,
4569 "Text declaration '<?xml' required\n");
4570 ctxt->wellFormed = 0;
4571 ctxt->disableSAX = 1;
4572
4573 return;
4574 }
4575
4576 if (!IS_BLANK(CUR)) {
4577 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4578 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4579 ctxt->sax->error(ctxt->userData,
4580 "Space needed after '<?xml'\n");
4581 ctxt->wellFormed = 0;
4582 ctxt->disableSAX = 1;
4583 }
4584 SKIP_BLANKS;
4585
4586 /*
4587 * We may have the VersionInfo here.
4588 */
4589 version = xmlParseVersionInfo(ctxt);
4590 if (version == NULL)
4591 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4592 ctxt->input->version = version;
4593
4594 /*
4595 * We must have the encoding declaration
4596 */
4597 if (!IS_BLANK(CUR)) {
4598 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4600 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4601 ctxt->wellFormed = 0;
4602 ctxt->disableSAX = 1;
4603 }
4604 xmlParseEncodingDecl(ctxt);
4605 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4606 /*
4607 * The XML REC instructs us to stop parsing right here
4608 */
4609 return;
4610 }
4611
4612 SKIP_BLANKS;
4613 if ((RAW == '?') && (NXT(1) == '>')) {
4614 SKIP(2);
4615 } else if (RAW == '>') {
4616 /* Deprecated old WD ... */
4617 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4619 ctxt->sax->error(ctxt->userData,
4620 "XML declaration must end-up with '?>'\n");
4621 ctxt->wellFormed = 0;
4622 ctxt->disableSAX = 1;
4623 NEXT;
4624 } else {
4625 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4627 ctxt->sax->error(ctxt->userData,
4628 "parsing XML declaration: '?>' expected\n");
4629 ctxt->wellFormed = 0;
4630 ctxt->disableSAX = 1;
4631 MOVETO_ENDTAG(CUR_PTR);
4632 NEXT;
4633 }
4634}
4635
4636/*
4637 * xmlParseConditionalSections
4638 * @ctxt: an XML parser context
4639 *
4640 * [61] conditionalSect ::= includeSect | ignoreSect
4641 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4642 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4643 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4644 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4645 */
4646
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004647static void
Owen Taylor3473f882001-02-23 17:55:21 +00004648xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4649 SKIP(3);
4650 SKIP_BLANKS;
4651 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4652 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4653 (NXT(6) == 'E')) {
4654 SKIP(7);
4655 SKIP_BLANKS;
4656 if (RAW != '[') {
4657 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4659 ctxt->sax->error(ctxt->userData,
4660 "XML conditional section '[' expected\n");
4661 ctxt->wellFormed = 0;
4662 ctxt->disableSAX = 1;
4663 } else {
4664 NEXT;
4665 }
4666 if (xmlParserDebugEntities) {
4667 if ((ctxt->input != NULL) && (ctxt->input->filename))
4668 xmlGenericError(xmlGenericErrorContext,
4669 "%s(%d): ", ctxt->input->filename,
4670 ctxt->input->line);
4671 xmlGenericError(xmlGenericErrorContext,
4672 "Entering INCLUDE Conditional Section\n");
4673 }
4674
4675 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4676 (NXT(2) != '>'))) {
4677 const xmlChar *check = CUR_PTR;
4678 int cons = ctxt->input->consumed;
4679 int tok = ctxt->token;
4680
4681 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4682 xmlParseConditionalSections(ctxt);
4683 } else if (IS_BLANK(CUR)) {
4684 NEXT;
4685 } else if (RAW == '%') {
4686 xmlParsePEReference(ctxt);
4687 } else
4688 xmlParseMarkupDecl(ctxt);
4689
4690 /*
4691 * Pop-up of finished entities.
4692 */
4693 while ((RAW == 0) && (ctxt->inputNr > 1))
4694 xmlPopInput(ctxt);
4695
4696 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4697 (tok == ctxt->token)) {
4698 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4699 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4700 ctxt->sax->error(ctxt->userData,
4701 "Content error in the external subset\n");
4702 ctxt->wellFormed = 0;
4703 ctxt->disableSAX = 1;
4704 break;
4705 }
4706 }
4707 if (xmlParserDebugEntities) {
4708 if ((ctxt->input != NULL) && (ctxt->input->filename))
4709 xmlGenericError(xmlGenericErrorContext,
4710 "%s(%d): ", ctxt->input->filename,
4711 ctxt->input->line);
4712 xmlGenericError(xmlGenericErrorContext,
4713 "Leaving INCLUDE Conditional Section\n");
4714 }
4715
4716 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4717 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4718 int state;
4719 int instate;
4720 int depth = 0;
4721
4722 SKIP(6);
4723 SKIP_BLANKS;
4724 if (RAW != '[') {
4725 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4727 ctxt->sax->error(ctxt->userData,
4728 "XML conditional section '[' expected\n");
4729 ctxt->wellFormed = 0;
4730 ctxt->disableSAX = 1;
4731 } else {
4732 NEXT;
4733 }
4734 if (xmlParserDebugEntities) {
4735 if ((ctxt->input != NULL) && (ctxt->input->filename))
4736 xmlGenericError(xmlGenericErrorContext,
4737 "%s(%d): ", ctxt->input->filename,
4738 ctxt->input->line);
4739 xmlGenericError(xmlGenericErrorContext,
4740 "Entering IGNORE Conditional Section\n");
4741 }
4742
4743 /*
4744 * Parse up to the end of the conditionnal section
4745 * But disable SAX event generating DTD building in the meantime
4746 */
4747 state = ctxt->disableSAX;
4748 instate = ctxt->instate;
4749 ctxt->disableSAX = 1;
4750 ctxt->instate = XML_PARSER_IGNORE;
4751
4752 while (depth >= 0) {
4753 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4754 depth++;
4755 SKIP(3);
4756 continue;
4757 }
4758 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4759 if (--depth >= 0) SKIP(3);
4760 continue;
4761 }
4762 NEXT;
4763 continue;
4764 }
4765
4766 ctxt->disableSAX = state;
4767 ctxt->instate = instate;
4768
4769 if (xmlParserDebugEntities) {
4770 if ((ctxt->input != NULL) && (ctxt->input->filename))
4771 xmlGenericError(xmlGenericErrorContext,
4772 "%s(%d): ", ctxt->input->filename,
4773 ctxt->input->line);
4774 xmlGenericError(xmlGenericErrorContext,
4775 "Leaving IGNORE Conditional Section\n");
4776 }
4777
4778 } else {
4779 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4781 ctxt->sax->error(ctxt->userData,
4782 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4783 ctxt->wellFormed = 0;
4784 ctxt->disableSAX = 1;
4785 }
4786
4787 if (RAW == 0)
4788 SHRINK;
4789
4790 if (RAW == 0) {
4791 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4793 ctxt->sax->error(ctxt->userData,
4794 "XML conditional section not closed\n");
4795 ctxt->wellFormed = 0;
4796 ctxt->disableSAX = 1;
4797 } else {
4798 SKIP(3);
4799 }
4800}
4801
4802/**
4803 * xmlParseExternalSubset:
4804 * @ctxt: an XML parser context
4805 * @ExternalID: the external identifier
4806 * @SystemID: the system identifier (or URL)
4807 *
4808 * parse Markup declarations from an external subset
4809 *
4810 * [30] extSubset ::= textDecl? extSubsetDecl
4811 *
4812 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4813 */
4814void
4815xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4816 const xmlChar *SystemID) {
4817 GROW;
4818 if ((RAW == '<') && (NXT(1) == '?') &&
4819 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4820 (NXT(4) == 'l')) {
4821 xmlParseTextDecl(ctxt);
4822 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4823 /*
4824 * The XML REC instructs us to stop parsing right here
4825 */
4826 ctxt->instate = XML_PARSER_EOF;
4827 return;
4828 }
4829 }
4830 if (ctxt->myDoc == NULL) {
4831 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4832 }
4833 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4834 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4835
4836 ctxt->instate = XML_PARSER_DTD;
4837 ctxt->external = 1;
4838 while (((RAW == '<') && (NXT(1) == '?')) ||
4839 ((RAW == '<') && (NXT(1) == '!')) ||
4840 IS_BLANK(CUR)) {
4841 const xmlChar *check = CUR_PTR;
4842 int cons = ctxt->input->consumed;
4843 int tok = ctxt->token;
4844
4845 GROW;
4846 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4847 xmlParseConditionalSections(ctxt);
4848 } else if (IS_BLANK(CUR)) {
4849 NEXT;
4850 } else if (RAW == '%') {
4851 xmlParsePEReference(ctxt);
4852 } else
4853 xmlParseMarkupDecl(ctxt);
4854
4855 /*
4856 * Pop-up of finished entities.
4857 */
4858 while ((RAW == 0) && (ctxt->inputNr > 1))
4859 xmlPopInput(ctxt);
4860
4861 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4862 (tok == ctxt->token)) {
4863 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4865 ctxt->sax->error(ctxt->userData,
4866 "Content error in the external subset\n");
4867 ctxt->wellFormed = 0;
4868 ctxt->disableSAX = 1;
4869 break;
4870 }
4871 }
4872
4873 if (RAW != 0) {
4874 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4876 ctxt->sax->error(ctxt->userData,
4877 "Extra content at the end of the document\n");
4878 ctxt->wellFormed = 0;
4879 ctxt->disableSAX = 1;
4880 }
4881
4882}
4883
4884/**
4885 * xmlParseReference:
4886 * @ctxt: an XML parser context
4887 *
4888 * parse and handle entity references in content, depending on the SAX
4889 * interface, this may end-up in a call to character() if this is a
4890 * CharRef, a predefined entity, if there is no reference() callback.
4891 * or if the parser was asked to switch to that mode.
4892 *
4893 * [67] Reference ::= EntityRef | CharRef
4894 */
4895void
4896xmlParseReference(xmlParserCtxtPtr ctxt) {
4897 xmlEntityPtr ent;
4898 xmlChar *val;
4899 if (RAW != '&') return;
4900
4901 if (NXT(1) == '#') {
4902 int i = 0;
4903 xmlChar out[10];
4904 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004905 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004906
4907 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4908 /*
4909 * So we are using non-UTF-8 buffers
4910 * Check that the char fit on 8bits, if not
4911 * generate a CharRef.
4912 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004913 if (value <= 0xFF) {
4914 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004915 out[1] = 0;
4916 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4917 (!ctxt->disableSAX))
4918 ctxt->sax->characters(ctxt->userData, out, 1);
4919 } else {
4920 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004921 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004922 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004923 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004924 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4925 (!ctxt->disableSAX))
4926 ctxt->sax->reference(ctxt->userData, out);
4927 }
4928 } else {
4929 /*
4930 * Just encode the value in UTF-8
4931 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004932 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004933 out[i] = 0;
4934 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4935 (!ctxt->disableSAX))
4936 ctxt->sax->characters(ctxt->userData, out, i);
4937 }
4938 } else {
4939 ent = xmlParseEntityRef(ctxt);
4940 if (ent == NULL) return;
4941 if ((ent->name != NULL) &&
4942 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4943 xmlNodePtr list = NULL;
4944 int ret;
4945
4946
4947 /*
4948 * The first reference to the entity trigger a parsing phase
4949 * where the ent->children is filled with the result from
4950 * the parsing.
4951 */
4952 if (ent->children == NULL) {
4953 xmlChar *value;
4954 value = ent->content;
4955
4956 /*
4957 * Check that this entity is well formed
4958 */
4959 if ((value != NULL) &&
4960 (value[1] == 0) && (value[0] == '<') &&
4961 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4962 /*
4963 * DONE: get definite answer on this !!!
4964 * Lots of entity decls are used to declare a single
4965 * char
4966 * <!ENTITY lt "<">
4967 * Which seems to be valid since
4968 * 2.4: The ampersand character (&) and the left angle
4969 * bracket (<) may appear in their literal form only
4970 * when used ... They are also legal within the literal
4971 * entity value of an internal entity declaration;i
4972 * see "4.3.2 Well-Formed Parsed Entities".
4973 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4974 * Looking at the OASIS test suite and James Clark
4975 * tests, this is broken. However the XML REC uses
4976 * it. Is the XML REC not well-formed ????
4977 * This is a hack to avoid this problem
4978 *
4979 * ANSWER: since lt gt amp .. are already defined,
4980 * this is a redefinition and hence the fact that the
4981 * contentis not well balanced is not a Wf error, this
4982 * is lousy but acceptable.
4983 */
4984 list = xmlNewDocText(ctxt->myDoc, value);
4985 if (list != NULL) {
4986 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4987 (ent->children == NULL)) {
4988 ent->children = list;
4989 ent->last = list;
4990 list->parent = (xmlNodePtr) ent;
4991 } else {
4992 xmlFreeNodeList(list);
4993 }
4994 } else if (list != NULL) {
4995 xmlFreeNodeList(list);
4996 }
4997 } else {
4998 /*
4999 * 4.3.2: An internal general parsed entity is well-formed
5000 * if its replacement text matches the production labeled
5001 * content.
5002 */
5003 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5004 ctxt->depth++;
5005 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5006 ctxt->sax, NULL, ctxt->depth,
5007 value, &list);
5008 ctxt->depth--;
5009 } else if (ent->etype ==
5010 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5011 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005012 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005013 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005014 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005015 ctxt->depth--;
5016 } else {
5017 ret = -1;
5018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5019 ctxt->sax->error(ctxt->userData,
5020 "Internal: invalid entity type\n");
5021 }
5022 if (ret == XML_ERR_ENTITY_LOOP) {
5023 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5025 ctxt->sax->error(ctxt->userData,
5026 "Detected entity reference loop\n");
5027 ctxt->wellFormed = 0;
5028 ctxt->disableSAX = 1;
5029 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005030 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5031 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005032 (ent->children == NULL)) {
5033 ent->children = list;
5034 while (list != NULL) {
5035 list->parent = (xmlNodePtr) ent;
5036 if (list->next == NULL)
5037 ent->last = list;
5038 list = list->next;
5039 }
5040 } else {
5041 xmlFreeNodeList(list);
5042 }
5043 } else if (ret > 0) {
5044 ctxt->errNo = ret;
5045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5046 ctxt->sax->error(ctxt->userData,
5047 "Entity value required\n");
5048 ctxt->wellFormed = 0;
5049 ctxt->disableSAX = 1;
5050 } else if (list != NULL) {
5051 xmlFreeNodeList(list);
5052 }
5053 }
5054 }
5055 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5056 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5057 /*
5058 * Create a node.
5059 */
5060 ctxt->sax->reference(ctxt->userData, ent->name);
5061 return;
5062 } else if (ctxt->replaceEntities) {
5063 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5064 /*
5065 * Seems we are generating the DOM content, do
5066 * a simple tree copy
5067 */
5068 xmlNodePtr new;
5069 new = xmlCopyNodeList(ent->children);
5070
5071 xmlAddChildList(ctxt->node, new);
5072 /*
5073 * This is to avoid a nasty side effect, see
5074 * characters() in SAX.c
5075 */
5076 ctxt->nodemem = 0;
5077 ctxt->nodelen = 0;
5078 return;
5079 } else {
5080 /*
5081 * Probably running in SAX mode
5082 */
5083 xmlParserInputPtr input;
5084
5085 input = xmlNewEntityInputStream(ctxt, ent);
5086 xmlPushInput(ctxt, input);
5087 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5088 (RAW == '<') && (NXT(1) == '?') &&
5089 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5090 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5091 xmlParseTextDecl(ctxt);
5092 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5093 /*
5094 * The XML REC instructs us to stop parsing right here
5095 */
5096 ctxt->instate = XML_PARSER_EOF;
5097 return;
5098 }
5099 if (input->standalone == 1) {
5100 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5102 ctxt->sax->error(ctxt->userData,
5103 "external parsed entities cannot be standalone\n");
5104 ctxt->wellFormed = 0;
5105 ctxt->disableSAX = 1;
5106 }
5107 }
5108 return;
5109 }
5110 }
5111 } else {
5112 val = ent->content;
5113 if (val == NULL) return;
5114 /*
5115 * inline the entity.
5116 */
5117 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5118 (!ctxt->disableSAX))
5119 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5120 }
5121 }
5122}
5123
5124/**
5125 * xmlParseEntityRef:
5126 * @ctxt: an XML parser context
5127 *
5128 * parse ENTITY references declarations
5129 *
5130 * [68] EntityRef ::= '&' Name ';'
5131 *
5132 * [ WFC: Entity Declared ]
5133 * In a document without any DTD, a document with only an internal DTD
5134 * subset which contains no parameter entity references, or a document
5135 * with "standalone='yes'", the Name given in the entity reference
5136 * must match that in an entity declaration, except that well-formed
5137 * documents need not declare any of the following entities: amp, lt,
5138 * gt, apos, quot. The declaration of a parameter entity must precede
5139 * any reference to it. Similarly, the declaration of a general entity
5140 * must precede any reference to it which appears in a default value in an
5141 * attribute-list declaration. Note that if entities are declared in the
5142 * external subset or in external parameter entities, a non-validating
5143 * processor is not obligated to read and process their declarations;
5144 * for such documents, the rule that an entity must be declared is a
5145 * well-formedness constraint only if standalone='yes'.
5146 *
5147 * [ WFC: Parsed Entity ]
5148 * An entity reference must not contain the name of an unparsed entity
5149 *
5150 * Returns the xmlEntityPtr if found, or NULL otherwise.
5151 */
5152xmlEntityPtr
5153xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5154 xmlChar *name;
5155 xmlEntityPtr ent = NULL;
5156
5157 GROW;
5158
5159 if (RAW == '&') {
5160 NEXT;
5161 name = xmlParseName(ctxt);
5162 if (name == NULL) {
5163 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5165 ctxt->sax->error(ctxt->userData,
5166 "xmlParseEntityRef: no name\n");
5167 ctxt->wellFormed = 0;
5168 ctxt->disableSAX = 1;
5169 } else {
5170 if (RAW == ';') {
5171 NEXT;
5172 /*
5173 * Ask first SAX for entity resolution, otherwise try the
5174 * predefined set.
5175 */
5176 if (ctxt->sax != NULL) {
5177 if (ctxt->sax->getEntity != NULL)
5178 ent = ctxt->sax->getEntity(ctxt->userData, name);
5179 if (ent == NULL)
5180 ent = xmlGetPredefinedEntity(name);
5181 }
5182 /*
5183 * [ WFC: Entity Declared ]
5184 * In a document without any DTD, a document with only an
5185 * internal DTD subset which contains no parameter entity
5186 * references, or a document with "standalone='yes'", the
5187 * Name given in the entity reference must match that in an
5188 * entity declaration, except that well-formed documents
5189 * need not declare any of the following entities: amp, lt,
5190 * gt, apos, quot.
5191 * The declaration of a parameter entity must precede any
5192 * reference to it.
5193 * Similarly, the declaration of a general entity must
5194 * precede any reference to it which appears in a default
5195 * value in an attribute-list declaration. Note that if
5196 * entities are declared in the external subset or in
5197 * external parameter entities, a non-validating processor
5198 * is not obligated to read and process their declarations;
5199 * for such documents, the rule that an entity must be
5200 * declared is a well-formedness constraint only if
5201 * standalone='yes'.
5202 */
5203 if (ent == NULL) {
5204 if ((ctxt->standalone == 1) ||
5205 ((ctxt->hasExternalSubset == 0) &&
5206 (ctxt->hasPErefs == 0))) {
5207 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5209 ctxt->sax->error(ctxt->userData,
5210 "Entity '%s' not defined\n", name);
5211 ctxt->wellFormed = 0;
5212 ctxt->disableSAX = 1;
5213 } else {
5214 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5215 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5216 ctxt->sax->warning(ctxt->userData,
5217 "Entity '%s' not defined\n", name);
5218 }
5219 }
5220
5221 /*
5222 * [ WFC: Parsed Entity ]
5223 * An entity reference must not contain the name of an
5224 * unparsed entity
5225 */
5226 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5227 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5229 ctxt->sax->error(ctxt->userData,
5230 "Entity reference to unparsed entity %s\n", name);
5231 ctxt->wellFormed = 0;
5232 ctxt->disableSAX = 1;
5233 }
5234
5235 /*
5236 * [ WFC: No External Entity References ]
5237 * Attribute values cannot contain direct or indirect
5238 * entity references to external entities.
5239 */
5240 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5241 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5242 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5244 ctxt->sax->error(ctxt->userData,
5245 "Attribute references external entity '%s'\n", name);
5246 ctxt->wellFormed = 0;
5247 ctxt->disableSAX = 1;
5248 }
5249 /*
5250 * [ WFC: No < in Attribute Values ]
5251 * The replacement text of any entity referred to directly or
5252 * indirectly in an attribute value (other than "&lt;") must
5253 * not contain a <.
5254 */
5255 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5256 (ent != NULL) &&
5257 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5258 (ent->content != NULL) &&
5259 (xmlStrchr(ent->content, '<'))) {
5260 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5262 ctxt->sax->error(ctxt->userData,
5263 "'<' in entity '%s' is not allowed in attributes values\n", name);
5264 ctxt->wellFormed = 0;
5265 ctxt->disableSAX = 1;
5266 }
5267
5268 /*
5269 * Internal check, no parameter entities here ...
5270 */
5271 else {
5272 switch (ent->etype) {
5273 case XML_INTERNAL_PARAMETER_ENTITY:
5274 case XML_EXTERNAL_PARAMETER_ENTITY:
5275 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5277 ctxt->sax->error(ctxt->userData,
5278 "Attempt to reference the parameter entity '%s'\n", name);
5279 ctxt->wellFormed = 0;
5280 ctxt->disableSAX = 1;
5281 break;
5282 default:
5283 break;
5284 }
5285 }
5286
5287 /*
5288 * [ WFC: No Recursion ]
5289 * A parsed entity must not contain a recursive reference
5290 * to itself, either directly or indirectly.
5291 * Done somewhere else
5292 */
5293
5294 } else {
5295 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5296 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5297 ctxt->sax->error(ctxt->userData,
5298 "xmlParseEntityRef: expecting ';'\n");
5299 ctxt->wellFormed = 0;
5300 ctxt->disableSAX = 1;
5301 }
5302 xmlFree(name);
5303 }
5304 }
5305 return(ent);
5306}
5307
5308/**
5309 * xmlParseStringEntityRef:
5310 * @ctxt: an XML parser context
5311 * @str: a pointer to an index in the string
5312 *
5313 * parse ENTITY references declarations, but this version parses it from
5314 * a string value.
5315 *
5316 * [68] EntityRef ::= '&' Name ';'
5317 *
5318 * [ WFC: Entity Declared ]
5319 * In a document without any DTD, a document with only an internal DTD
5320 * subset which contains no parameter entity references, or a document
5321 * with "standalone='yes'", the Name given in the entity reference
5322 * must match that in an entity declaration, except that well-formed
5323 * documents need not declare any of the following entities: amp, lt,
5324 * gt, apos, quot. The declaration of a parameter entity must precede
5325 * any reference to it. Similarly, the declaration of a general entity
5326 * must precede any reference to it which appears in a default value in an
5327 * attribute-list declaration. Note that if entities are declared in the
5328 * external subset or in external parameter entities, a non-validating
5329 * processor is not obligated to read and process their declarations;
5330 * for such documents, the rule that an entity must be declared is a
5331 * well-formedness constraint only if standalone='yes'.
5332 *
5333 * [ WFC: Parsed Entity ]
5334 * An entity reference must not contain the name of an unparsed entity
5335 *
5336 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5337 * is updated to the current location in the string.
5338 */
5339xmlEntityPtr
5340xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5341 xmlChar *name;
5342 const xmlChar *ptr;
5343 xmlChar cur;
5344 xmlEntityPtr ent = NULL;
5345
5346 if ((str == NULL) || (*str == NULL))
5347 return(NULL);
5348 ptr = *str;
5349 cur = *ptr;
5350 if (cur == '&') {
5351 ptr++;
5352 cur = *ptr;
5353 name = xmlParseStringName(ctxt, &ptr);
5354 if (name == NULL) {
5355 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5357 ctxt->sax->error(ctxt->userData,
5358 "xmlParseEntityRef: no name\n");
5359 ctxt->wellFormed = 0;
5360 ctxt->disableSAX = 1;
5361 } else {
5362 if (*ptr == ';') {
5363 ptr++;
5364 /*
5365 * Ask first SAX for entity resolution, otherwise try the
5366 * predefined set.
5367 */
5368 if (ctxt->sax != NULL) {
5369 if (ctxt->sax->getEntity != NULL)
5370 ent = ctxt->sax->getEntity(ctxt->userData, name);
5371 if (ent == NULL)
5372 ent = xmlGetPredefinedEntity(name);
5373 }
5374 /*
5375 * [ WFC: Entity Declared ]
5376 * In a document without any DTD, a document with only an
5377 * internal DTD subset which contains no parameter entity
5378 * references, or a document with "standalone='yes'", the
5379 * Name given in the entity reference must match that in an
5380 * entity declaration, except that well-formed documents
5381 * need not declare any of the following entities: amp, lt,
5382 * gt, apos, quot.
5383 * The declaration of a parameter entity must precede any
5384 * reference to it.
5385 * Similarly, the declaration of a general entity must
5386 * precede any reference to it which appears in a default
5387 * value in an attribute-list declaration. Note that if
5388 * entities are declared in the external subset or in
5389 * external parameter entities, a non-validating processor
5390 * is not obligated to read and process their declarations;
5391 * for such documents, the rule that an entity must be
5392 * declared is a well-formedness constraint only if
5393 * standalone='yes'.
5394 */
5395 if (ent == NULL) {
5396 if ((ctxt->standalone == 1) ||
5397 ((ctxt->hasExternalSubset == 0) &&
5398 (ctxt->hasPErefs == 0))) {
5399 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5401 ctxt->sax->error(ctxt->userData,
5402 "Entity '%s' not defined\n", name);
5403 ctxt->wellFormed = 0;
5404 ctxt->disableSAX = 1;
5405 } else {
5406 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5407 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5408 ctxt->sax->warning(ctxt->userData,
5409 "Entity '%s' not defined\n", name);
5410 }
5411 }
5412
5413 /*
5414 * [ WFC: Parsed Entity ]
5415 * An entity reference must not contain the name of an
5416 * unparsed entity
5417 */
5418 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5419 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5420 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5421 ctxt->sax->error(ctxt->userData,
5422 "Entity reference to unparsed entity %s\n", name);
5423 ctxt->wellFormed = 0;
5424 ctxt->disableSAX = 1;
5425 }
5426
5427 /*
5428 * [ WFC: No External Entity References ]
5429 * Attribute values cannot contain direct or indirect
5430 * entity references to external entities.
5431 */
5432 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5433 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5434 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5436 ctxt->sax->error(ctxt->userData,
5437 "Attribute references external entity '%s'\n", name);
5438 ctxt->wellFormed = 0;
5439 ctxt->disableSAX = 1;
5440 }
5441 /*
5442 * [ WFC: No < in Attribute Values ]
5443 * The replacement text of any entity referred to directly or
5444 * indirectly in an attribute value (other than "&lt;") must
5445 * not contain a <.
5446 */
5447 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5448 (ent != NULL) &&
5449 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5450 (ent->content != NULL) &&
5451 (xmlStrchr(ent->content, '<'))) {
5452 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5454 ctxt->sax->error(ctxt->userData,
5455 "'<' in entity '%s' is not allowed in attributes values\n", name);
5456 ctxt->wellFormed = 0;
5457 ctxt->disableSAX = 1;
5458 }
5459
5460 /*
5461 * Internal check, no parameter entities here ...
5462 */
5463 else {
5464 switch (ent->etype) {
5465 case XML_INTERNAL_PARAMETER_ENTITY:
5466 case XML_EXTERNAL_PARAMETER_ENTITY:
5467 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5469 ctxt->sax->error(ctxt->userData,
5470 "Attempt to reference the parameter entity '%s'\n", name);
5471 ctxt->wellFormed = 0;
5472 ctxt->disableSAX = 1;
5473 break;
5474 default:
5475 break;
5476 }
5477 }
5478
5479 /*
5480 * [ WFC: No Recursion ]
5481 * A parsed entity must not contain a recursive reference
5482 * to itself, either directly or indirectly.
5483 * Done somewhwere else
5484 */
5485
5486 } else {
5487 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5488 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5489 ctxt->sax->error(ctxt->userData,
5490 "xmlParseEntityRef: expecting ';'\n");
5491 ctxt->wellFormed = 0;
5492 ctxt->disableSAX = 1;
5493 }
5494 xmlFree(name);
5495 }
5496 }
5497 *str = ptr;
5498 return(ent);
5499}
5500
5501/**
5502 * xmlParsePEReference:
5503 * @ctxt: an XML parser context
5504 *
5505 * parse PEReference declarations
5506 * The entity content is handled directly by pushing it's content as
5507 * a new input stream.
5508 *
5509 * [69] PEReference ::= '%' Name ';'
5510 *
5511 * [ WFC: No Recursion ]
5512 * A parsed entity must not contain a recursive
5513 * reference to itself, either directly or indirectly.
5514 *
5515 * [ WFC: Entity Declared ]
5516 * In a document without any DTD, a document with only an internal DTD
5517 * subset which contains no parameter entity references, or a document
5518 * with "standalone='yes'", ... ... The declaration of a parameter
5519 * entity must precede any reference to it...
5520 *
5521 * [ VC: Entity Declared ]
5522 * In a document with an external subset or external parameter entities
5523 * with "standalone='no'", ... ... The declaration of a parameter entity
5524 * must precede any reference to it...
5525 *
5526 * [ WFC: In DTD ]
5527 * Parameter-entity references may only appear in the DTD.
5528 * NOTE: misleading but this is handled.
5529 */
5530void
5531xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5532 xmlChar *name;
5533 xmlEntityPtr entity = NULL;
5534 xmlParserInputPtr input;
5535
5536 if (RAW == '%') {
5537 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005538 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005539 if (name == NULL) {
5540 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5541 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5542 ctxt->sax->error(ctxt->userData,
5543 "xmlParsePEReference: no name\n");
5544 ctxt->wellFormed = 0;
5545 ctxt->disableSAX = 1;
5546 } else {
5547 if (RAW == ';') {
5548 NEXT;
5549 if ((ctxt->sax != NULL) &&
5550 (ctxt->sax->getParameterEntity != NULL))
5551 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5552 name);
5553 if (entity == NULL) {
5554 /*
5555 * [ WFC: Entity Declared ]
5556 * In a document without any DTD, a document with only an
5557 * internal DTD subset which contains no parameter entity
5558 * references, or a document with "standalone='yes'", ...
5559 * ... The declaration of a parameter entity must precede
5560 * any reference to it...
5561 */
5562 if ((ctxt->standalone == 1) ||
5563 ((ctxt->hasExternalSubset == 0) &&
5564 (ctxt->hasPErefs == 0))) {
5565 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5566 if ((!ctxt->disableSAX) &&
5567 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5568 ctxt->sax->error(ctxt->userData,
5569 "PEReference: %%%s; not found\n", name);
5570 ctxt->wellFormed = 0;
5571 ctxt->disableSAX = 1;
5572 } else {
5573 /*
5574 * [ VC: Entity Declared ]
5575 * In a document with an external subset or external
5576 * parameter entities with "standalone='no'", ...
5577 * ... The declaration of a parameter entity must precede
5578 * any reference to it...
5579 */
5580 if ((!ctxt->disableSAX) &&
5581 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5582 ctxt->sax->warning(ctxt->userData,
5583 "PEReference: %%%s; not found\n", name);
5584 ctxt->valid = 0;
5585 }
5586 } else {
5587 /*
5588 * Internal checking in case the entity quest barfed
5589 */
5590 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5591 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5592 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5593 ctxt->sax->warning(ctxt->userData,
5594 "Internal: %%%s; is not a parameter entity\n", name);
5595 } else {
5596 /*
5597 * TODO !!!
5598 * handle the extra spaces added before and after
5599 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5600 */
5601 input = xmlNewEntityInputStream(ctxt, entity);
5602 xmlPushInput(ctxt, input);
5603 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5604 (RAW == '<') && (NXT(1) == '?') &&
5605 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5606 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5607 xmlParseTextDecl(ctxt);
5608 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5609 /*
5610 * The XML REC instructs us to stop parsing
5611 * right here
5612 */
5613 ctxt->instate = XML_PARSER_EOF;
5614 xmlFree(name);
5615 return;
5616 }
5617 }
5618 if (ctxt->token == 0)
5619 ctxt->token = ' ';
5620 }
5621 }
5622 ctxt->hasPErefs = 1;
5623 } else {
5624 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5626 ctxt->sax->error(ctxt->userData,
5627 "xmlParsePEReference: expecting ';'\n");
5628 ctxt->wellFormed = 0;
5629 ctxt->disableSAX = 1;
5630 }
5631 xmlFree(name);
5632 }
5633 }
5634}
5635
5636/**
5637 * xmlParseStringPEReference:
5638 * @ctxt: an XML parser context
5639 * @str: a pointer to an index in the string
5640 *
5641 * parse PEReference declarations
5642 *
5643 * [69] PEReference ::= '%' Name ';'
5644 *
5645 * [ WFC: No Recursion ]
5646 * A parsed entity must not contain a recursive
5647 * reference to itself, either directly or indirectly.
5648 *
5649 * [ WFC: Entity Declared ]
5650 * In a document without any DTD, a document with only an internal DTD
5651 * subset which contains no parameter entity references, or a document
5652 * with "standalone='yes'", ... ... The declaration of a parameter
5653 * entity must precede any reference to it...
5654 *
5655 * [ VC: Entity Declared ]
5656 * In a document with an external subset or external parameter entities
5657 * with "standalone='no'", ... ... The declaration of a parameter entity
5658 * must precede any reference to it...
5659 *
5660 * [ WFC: In DTD ]
5661 * Parameter-entity references may only appear in the DTD.
5662 * NOTE: misleading but this is handled.
5663 *
5664 * Returns the string of the entity content.
5665 * str is updated to the current value of the index
5666 */
5667xmlEntityPtr
5668xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5669 const xmlChar *ptr;
5670 xmlChar cur;
5671 xmlChar *name;
5672 xmlEntityPtr entity = NULL;
5673
5674 if ((str == NULL) || (*str == NULL)) return(NULL);
5675 ptr = *str;
5676 cur = *ptr;
5677 if (cur == '%') {
5678 ptr++;
5679 cur = *ptr;
5680 name = xmlParseStringName(ctxt, &ptr);
5681 if (name == NULL) {
5682 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5684 ctxt->sax->error(ctxt->userData,
5685 "xmlParseStringPEReference: no name\n");
5686 ctxt->wellFormed = 0;
5687 ctxt->disableSAX = 1;
5688 } else {
5689 cur = *ptr;
5690 if (cur == ';') {
5691 ptr++;
5692 cur = *ptr;
5693 if ((ctxt->sax != NULL) &&
5694 (ctxt->sax->getParameterEntity != NULL))
5695 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5696 name);
5697 if (entity == NULL) {
5698 /*
5699 * [ WFC: Entity Declared ]
5700 * In a document without any DTD, a document with only an
5701 * internal DTD subset which contains no parameter entity
5702 * references, or a document with "standalone='yes'", ...
5703 * ... The declaration of a parameter entity must precede
5704 * any reference to it...
5705 */
5706 if ((ctxt->standalone == 1) ||
5707 ((ctxt->hasExternalSubset == 0) &&
5708 (ctxt->hasPErefs == 0))) {
5709 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5711 ctxt->sax->error(ctxt->userData,
5712 "PEReference: %%%s; not found\n", name);
5713 ctxt->wellFormed = 0;
5714 ctxt->disableSAX = 1;
5715 } else {
5716 /*
5717 * [ VC: Entity Declared ]
5718 * In a document with an external subset or external
5719 * parameter entities with "standalone='no'", ...
5720 * ... The declaration of a parameter entity must
5721 * precede any reference to it...
5722 */
5723 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5724 ctxt->sax->warning(ctxt->userData,
5725 "PEReference: %%%s; not found\n", name);
5726 ctxt->valid = 0;
5727 }
5728 } else {
5729 /*
5730 * Internal checking in case the entity quest barfed
5731 */
5732 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5733 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5734 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5735 ctxt->sax->warning(ctxt->userData,
5736 "Internal: %%%s; is not a parameter entity\n", name);
5737 }
5738 }
5739 ctxt->hasPErefs = 1;
5740 } else {
5741 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5743 ctxt->sax->error(ctxt->userData,
5744 "xmlParseStringPEReference: expecting ';'\n");
5745 ctxt->wellFormed = 0;
5746 ctxt->disableSAX = 1;
5747 }
5748 xmlFree(name);
5749 }
5750 }
5751 *str = ptr;
5752 return(entity);
5753}
5754
5755/**
5756 * xmlParseDocTypeDecl:
5757 * @ctxt: an XML parser context
5758 *
5759 * parse a DOCTYPE declaration
5760 *
5761 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5762 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5763 *
5764 * [ VC: Root Element Type ]
5765 * The Name in the document type declaration must match the element
5766 * type of the root element.
5767 */
5768
5769void
5770xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5771 xmlChar *name = NULL;
5772 xmlChar *ExternalID = NULL;
5773 xmlChar *URI = NULL;
5774
5775 /*
5776 * We know that '<!DOCTYPE' has been detected.
5777 */
5778 SKIP(9);
5779
5780 SKIP_BLANKS;
5781
5782 /*
5783 * Parse the DOCTYPE name.
5784 */
5785 name = xmlParseName(ctxt);
5786 if (name == NULL) {
5787 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5789 ctxt->sax->error(ctxt->userData,
5790 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5791 ctxt->wellFormed = 0;
5792 ctxt->disableSAX = 1;
5793 }
5794 ctxt->intSubName = name;
5795
5796 SKIP_BLANKS;
5797
5798 /*
5799 * Check for SystemID and ExternalID
5800 */
5801 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5802
5803 if ((URI != NULL) || (ExternalID != NULL)) {
5804 ctxt->hasExternalSubset = 1;
5805 }
5806 ctxt->extSubURI = URI;
5807 ctxt->extSubSystem = ExternalID;
5808
5809 SKIP_BLANKS;
5810
5811 /*
5812 * Create and update the internal subset.
5813 */
5814 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5815 (!ctxt->disableSAX))
5816 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5817
5818 /*
5819 * Is there any internal subset declarations ?
5820 * they are handled separately in xmlParseInternalSubset()
5821 */
5822 if (RAW == '[')
5823 return;
5824
5825 /*
5826 * We should be at the end of the DOCTYPE declaration.
5827 */
5828 if (RAW != '>') {
5829 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5831 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5832 ctxt->wellFormed = 0;
5833 ctxt->disableSAX = 1;
5834 }
5835 NEXT;
5836}
5837
5838/**
5839 * xmlParseInternalsubset:
5840 * @ctxt: an XML parser context
5841 *
5842 * parse the internal subset declaration
5843 *
5844 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5845 */
5846
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005847static void
Owen Taylor3473f882001-02-23 17:55:21 +00005848xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5849 /*
5850 * Is there any DTD definition ?
5851 */
5852 if (RAW == '[') {
5853 ctxt->instate = XML_PARSER_DTD;
5854 NEXT;
5855 /*
5856 * Parse the succession of Markup declarations and
5857 * PEReferences.
5858 * Subsequence (markupdecl | PEReference | S)*
5859 */
5860 while (RAW != ']') {
5861 const xmlChar *check = CUR_PTR;
5862 int cons = ctxt->input->consumed;
5863
5864 SKIP_BLANKS;
5865 xmlParseMarkupDecl(ctxt);
5866 xmlParsePEReference(ctxt);
5867
5868 /*
5869 * Pop-up of finished entities.
5870 */
5871 while ((RAW == 0) && (ctxt->inputNr > 1))
5872 xmlPopInput(ctxt);
5873
5874 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5875 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5876 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5877 ctxt->sax->error(ctxt->userData,
5878 "xmlParseInternalSubset: error detected in Markup declaration\n");
5879 ctxt->wellFormed = 0;
5880 ctxt->disableSAX = 1;
5881 break;
5882 }
5883 }
5884 if (RAW == ']') {
5885 NEXT;
5886 SKIP_BLANKS;
5887 }
5888 }
5889
5890 /*
5891 * We should be at the end of the DOCTYPE declaration.
5892 */
5893 if (RAW != '>') {
5894 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5896 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5897 ctxt->wellFormed = 0;
5898 ctxt->disableSAX = 1;
5899 }
5900 NEXT;
5901}
5902
5903/**
5904 * xmlParseAttribute:
5905 * @ctxt: an XML parser context
5906 * @value: a xmlChar ** used to store the value of the attribute
5907 *
5908 * parse an attribute
5909 *
5910 * [41] Attribute ::= Name Eq AttValue
5911 *
5912 * [ WFC: No External Entity References ]
5913 * Attribute values cannot contain direct or indirect entity references
5914 * to external entities.
5915 *
5916 * [ WFC: No < in Attribute Values ]
5917 * The replacement text of any entity referred to directly or indirectly in
5918 * an attribute value (other than "&lt;") must not contain a <.
5919 *
5920 * [ VC: Attribute Value Type ]
5921 * The attribute must have been declared; the value must be of the type
5922 * declared for it.
5923 *
5924 * [25] Eq ::= S? '=' S?
5925 *
5926 * With namespace:
5927 *
5928 * [NS 11] Attribute ::= QName Eq AttValue
5929 *
5930 * Also the case QName == xmlns:??? is handled independently as a namespace
5931 * definition.
5932 *
5933 * Returns the attribute name, and the value in *value.
5934 */
5935
5936xmlChar *
5937xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5938 xmlChar *name, *val;
5939
5940 *value = NULL;
5941 name = xmlParseName(ctxt);
5942 if (name == NULL) {
5943 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5945 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5946 ctxt->wellFormed = 0;
5947 ctxt->disableSAX = 1;
5948 return(NULL);
5949 }
5950
5951 /*
5952 * read the value
5953 */
5954 SKIP_BLANKS;
5955 if (RAW == '=') {
5956 NEXT;
5957 SKIP_BLANKS;
5958 val = xmlParseAttValue(ctxt);
5959 ctxt->instate = XML_PARSER_CONTENT;
5960 } else {
5961 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5963 ctxt->sax->error(ctxt->userData,
5964 "Specification mandate value for attribute %s\n", name);
5965 ctxt->wellFormed = 0;
5966 ctxt->disableSAX = 1;
5967 xmlFree(name);
5968 return(NULL);
5969 }
5970
5971 /*
5972 * Check that xml:lang conforms to the specification
5973 * No more registered as an error, just generate a warning now
5974 * since this was deprecated in XML second edition
5975 */
5976 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5977 if (!xmlCheckLanguageID(val)) {
5978 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5979 ctxt->sax->warning(ctxt->userData,
5980 "Malformed value for xml:lang : %s\n", val);
5981 }
5982 }
5983
5984 /*
5985 * Check that xml:space conforms to the specification
5986 */
5987 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5988 if (xmlStrEqual(val, BAD_CAST "default"))
5989 *(ctxt->space) = 0;
5990 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5991 *(ctxt->space) = 1;
5992 else {
5993 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5995 ctxt->sax->error(ctxt->userData,
5996"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5997 val);
5998 ctxt->wellFormed = 0;
5999 ctxt->disableSAX = 1;
6000 }
6001 }
6002
6003 *value = val;
6004 return(name);
6005}
6006
6007/**
6008 * xmlParseStartTag:
6009 * @ctxt: an XML parser context
6010 *
6011 * parse a start of tag either for rule element or
6012 * EmptyElement. In both case we don't parse the tag closing chars.
6013 *
6014 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6015 *
6016 * [ WFC: Unique Att Spec ]
6017 * No attribute name may appear more than once in the same start-tag or
6018 * empty-element tag.
6019 *
6020 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6021 *
6022 * [ WFC: Unique Att Spec ]
6023 * No attribute name may appear more than once in the same start-tag or
6024 * empty-element tag.
6025 *
6026 * With namespace:
6027 *
6028 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6029 *
6030 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6031 *
6032 * Returns the element name parsed
6033 */
6034
6035xmlChar *
6036xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6037 xmlChar *name;
6038 xmlChar *attname;
6039 xmlChar *attvalue;
6040 const xmlChar **atts = NULL;
6041 int nbatts = 0;
6042 int maxatts = 0;
6043 int i;
6044
6045 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006046 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006047
6048 name = xmlParseName(ctxt);
6049 if (name == NULL) {
6050 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6051 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6052 ctxt->sax->error(ctxt->userData,
6053 "xmlParseStartTag: invalid element name\n");
6054 ctxt->wellFormed = 0;
6055 ctxt->disableSAX = 1;
6056 return(NULL);
6057 }
6058
6059 /*
6060 * Now parse the attributes, it ends up with the ending
6061 *
6062 * (S Attribute)* S?
6063 */
6064 SKIP_BLANKS;
6065 GROW;
6066
Daniel Veillard21a0f912001-02-25 19:54:14 +00006067 while ((RAW != '>') &&
6068 ((RAW != '/') || (NXT(1) != '>')) &&
6069 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006070 const xmlChar *q = CUR_PTR;
6071 int cons = ctxt->input->consumed;
6072
6073 attname = xmlParseAttribute(ctxt, &attvalue);
6074 if ((attname != NULL) && (attvalue != NULL)) {
6075 /*
6076 * [ WFC: Unique Att Spec ]
6077 * No attribute name may appear more than once in the same
6078 * start-tag or empty-element tag.
6079 */
6080 for (i = 0; i < nbatts;i += 2) {
6081 if (xmlStrEqual(atts[i], attname)) {
6082 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6084 ctxt->sax->error(ctxt->userData,
6085 "Attribute %s redefined\n",
6086 attname);
6087 ctxt->wellFormed = 0;
6088 ctxt->disableSAX = 1;
6089 xmlFree(attname);
6090 xmlFree(attvalue);
6091 goto failed;
6092 }
6093 }
6094
6095 /*
6096 * Add the pair to atts
6097 */
6098 if (atts == NULL) {
6099 maxatts = 10;
6100 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6101 if (atts == NULL) {
6102 xmlGenericError(xmlGenericErrorContext,
6103 "malloc of %ld byte failed\n",
6104 maxatts * (long)sizeof(xmlChar *));
6105 return(NULL);
6106 }
6107 } else if (nbatts + 4 > maxatts) {
6108 maxatts *= 2;
6109 atts = (const xmlChar **) xmlRealloc((void *) atts,
6110 maxatts * sizeof(xmlChar *));
6111 if (atts == NULL) {
6112 xmlGenericError(xmlGenericErrorContext,
6113 "realloc of %ld byte failed\n",
6114 maxatts * (long)sizeof(xmlChar *));
6115 return(NULL);
6116 }
6117 }
6118 atts[nbatts++] = attname;
6119 atts[nbatts++] = attvalue;
6120 atts[nbatts] = NULL;
6121 atts[nbatts + 1] = NULL;
6122 } else {
6123 if (attname != NULL)
6124 xmlFree(attname);
6125 if (attvalue != NULL)
6126 xmlFree(attvalue);
6127 }
6128
6129failed:
6130
6131 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6132 break;
6133 if (!IS_BLANK(RAW)) {
6134 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6136 ctxt->sax->error(ctxt->userData,
6137 "attributes construct error\n");
6138 ctxt->wellFormed = 0;
6139 ctxt->disableSAX = 1;
6140 }
6141 SKIP_BLANKS;
6142 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6143 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6145 ctxt->sax->error(ctxt->userData,
6146 "xmlParseStartTag: problem parsing attributes\n");
6147 ctxt->wellFormed = 0;
6148 ctxt->disableSAX = 1;
6149 break;
6150 }
6151 GROW;
6152 }
6153
6154 /*
6155 * SAX: Start of Element !
6156 */
6157 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6158 (!ctxt->disableSAX))
6159 ctxt->sax->startElement(ctxt->userData, name, atts);
6160
6161 if (atts != NULL) {
6162 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6163 xmlFree((void *) atts);
6164 }
6165 return(name);
6166}
6167
6168/**
6169 * xmlParseEndTag:
6170 * @ctxt: an XML parser context
6171 *
6172 * parse an end of tag
6173 *
6174 * [42] ETag ::= '</' Name S? '>'
6175 *
6176 * With namespace
6177 *
6178 * [NS 9] ETag ::= '</' QName S? '>'
6179 */
6180
6181void
6182xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6183 xmlChar *name;
6184 xmlChar *oldname;
6185
6186 GROW;
6187 if ((RAW != '<') || (NXT(1) != '/')) {
6188 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6190 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6191 ctxt->wellFormed = 0;
6192 ctxt->disableSAX = 1;
6193 return;
6194 }
6195 SKIP(2);
6196
6197 name = xmlParseName(ctxt);
6198
6199 /*
6200 * We should definitely be at the ending "S? '>'" part
6201 */
6202 GROW;
6203 SKIP_BLANKS;
6204 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6205 ctxt->errNo = XML_ERR_GT_REQUIRED;
6206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6207 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6208 ctxt->wellFormed = 0;
6209 ctxt->disableSAX = 1;
6210 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006211 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006212
6213 /*
6214 * [ WFC: Element Type Match ]
6215 * The Name in an element's end-tag must match the element type in the
6216 * start-tag.
6217 *
6218 */
6219 if ((name == NULL) || (ctxt->name == NULL) ||
6220 (!xmlStrEqual(name, ctxt->name))) {
6221 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6223 if ((name != NULL) && (ctxt->name != NULL)) {
6224 ctxt->sax->error(ctxt->userData,
6225 "Opening and ending tag mismatch: %s and %s\n",
6226 ctxt->name, name);
6227 } else if (ctxt->name != NULL) {
6228 ctxt->sax->error(ctxt->userData,
6229 "Ending tag eror for: %s\n", ctxt->name);
6230 } else {
6231 ctxt->sax->error(ctxt->userData,
6232 "Ending tag error: internal error ???\n");
6233 }
6234
6235 }
6236 ctxt->wellFormed = 0;
6237 ctxt->disableSAX = 1;
6238 }
6239
6240 /*
6241 * SAX: End of Tag
6242 */
6243 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6244 (!ctxt->disableSAX))
6245 ctxt->sax->endElement(ctxt->userData, name);
6246
6247 if (name != NULL)
6248 xmlFree(name);
6249 oldname = namePop(ctxt);
6250 spacePop(ctxt);
6251 if (oldname != NULL) {
6252#ifdef DEBUG_STACK
6253 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6254#endif
6255 xmlFree(oldname);
6256 }
6257 return;
6258}
6259
6260/**
6261 * xmlParseCDSect:
6262 * @ctxt: an XML parser context
6263 *
6264 * Parse escaped pure raw content.
6265 *
6266 * [18] CDSect ::= CDStart CData CDEnd
6267 *
6268 * [19] CDStart ::= '<![CDATA['
6269 *
6270 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6271 *
6272 * [21] CDEnd ::= ']]>'
6273 */
6274void
6275xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6276 xmlChar *buf = NULL;
6277 int len = 0;
6278 int size = XML_PARSER_BUFFER_SIZE;
6279 int r, rl;
6280 int s, sl;
6281 int cur, l;
6282 int count = 0;
6283
6284 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6285 (NXT(2) == '[') && (NXT(3) == 'C') &&
6286 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6287 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6288 (NXT(8) == '[')) {
6289 SKIP(9);
6290 } else
6291 return;
6292
6293 ctxt->instate = XML_PARSER_CDATA_SECTION;
6294 r = CUR_CHAR(rl);
6295 if (!IS_CHAR(r)) {
6296 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6298 ctxt->sax->error(ctxt->userData,
6299 "CData section not finished\n");
6300 ctxt->wellFormed = 0;
6301 ctxt->disableSAX = 1;
6302 ctxt->instate = XML_PARSER_CONTENT;
6303 return;
6304 }
6305 NEXTL(rl);
6306 s = CUR_CHAR(sl);
6307 if (!IS_CHAR(s)) {
6308 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6310 ctxt->sax->error(ctxt->userData,
6311 "CData section not finished\n");
6312 ctxt->wellFormed = 0;
6313 ctxt->disableSAX = 1;
6314 ctxt->instate = XML_PARSER_CONTENT;
6315 return;
6316 }
6317 NEXTL(sl);
6318 cur = CUR_CHAR(l);
6319 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6320 if (buf == NULL) {
6321 xmlGenericError(xmlGenericErrorContext,
6322 "malloc of %d byte failed\n", size);
6323 return;
6324 }
6325 while (IS_CHAR(cur) &&
6326 ((r != ']') || (s != ']') || (cur != '>'))) {
6327 if (len + 5 >= size) {
6328 size *= 2;
6329 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6330 if (buf == NULL) {
6331 xmlGenericError(xmlGenericErrorContext,
6332 "realloc of %d byte failed\n", size);
6333 return;
6334 }
6335 }
6336 COPY_BUF(rl,buf,len,r);
6337 r = s;
6338 rl = sl;
6339 s = cur;
6340 sl = l;
6341 count++;
6342 if (count > 50) {
6343 GROW;
6344 count = 0;
6345 }
6346 NEXTL(l);
6347 cur = CUR_CHAR(l);
6348 }
6349 buf[len] = 0;
6350 ctxt->instate = XML_PARSER_CONTENT;
6351 if (cur != '>') {
6352 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6353 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6354 ctxt->sax->error(ctxt->userData,
6355 "CData section not finished\n%.50s\n", buf);
6356 ctxt->wellFormed = 0;
6357 ctxt->disableSAX = 1;
6358 xmlFree(buf);
6359 return;
6360 }
6361 NEXTL(l);
6362
6363 /*
6364 * Ok the buffer is to be consumed as cdata.
6365 */
6366 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6367 if (ctxt->sax->cdataBlock != NULL)
6368 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6369 }
6370 xmlFree(buf);
6371}
6372
6373/**
6374 * xmlParseContent:
6375 * @ctxt: an XML parser context
6376 *
6377 * Parse a content:
6378 *
6379 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6380 */
6381
6382void
6383xmlParseContent(xmlParserCtxtPtr ctxt) {
6384 GROW;
6385 while (((RAW != 0) || (ctxt->token != 0)) &&
6386 ((RAW != '<') || (NXT(1) != '/'))) {
6387 const xmlChar *test = CUR_PTR;
6388 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006389 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006390 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006391
6392 /*
6393 * Handle possible processed charrefs.
6394 */
6395 if (ctxt->token != 0) {
6396 xmlParseCharData(ctxt, 0);
6397 }
6398 /*
6399 * First case : a Processing Instruction.
6400 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006401 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006402 xmlParsePI(ctxt);
6403 }
6404
6405 /*
6406 * Second case : a CDSection
6407 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006408 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006409 (NXT(2) == '[') && (NXT(3) == 'C') &&
6410 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6411 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6412 (NXT(8) == '[')) {
6413 xmlParseCDSect(ctxt);
6414 }
6415
6416 /*
6417 * Third case : a comment
6418 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006419 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006420 (NXT(2) == '-') && (NXT(3) == '-')) {
6421 xmlParseComment(ctxt);
6422 ctxt->instate = XML_PARSER_CONTENT;
6423 }
6424
6425 /*
6426 * Fourth case : a sub-element.
6427 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006428 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006429 xmlParseElement(ctxt);
6430 }
6431
6432 /*
6433 * Fifth case : a reference. If if has not been resolved,
6434 * parsing returns it's Name, create the node
6435 */
6436
Daniel Veillard21a0f912001-02-25 19:54:14 +00006437 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006438 xmlParseReference(ctxt);
6439 }
6440
6441 /*
6442 * Last case, text. Note that References are handled directly.
6443 */
6444 else {
6445 xmlParseCharData(ctxt, 0);
6446 }
6447
6448 GROW;
6449 /*
6450 * Pop-up of finished entities.
6451 */
6452 while ((RAW == 0) && (ctxt->inputNr > 1))
6453 xmlPopInput(ctxt);
6454 SHRINK;
6455
6456 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6457 (tok == ctxt->token)) {
6458 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6460 ctxt->sax->error(ctxt->userData,
6461 "detected an error in element content\n");
6462 ctxt->wellFormed = 0;
6463 ctxt->disableSAX = 1;
6464 ctxt->instate = XML_PARSER_EOF;
6465 break;
6466 }
6467 }
6468}
6469
6470/**
6471 * xmlParseElement:
6472 * @ctxt: an XML parser context
6473 *
6474 * parse an XML element, this is highly recursive
6475 *
6476 * [39] element ::= EmptyElemTag | STag content ETag
6477 *
6478 * [ WFC: Element Type Match ]
6479 * The Name in an element's end-tag must match the element type in the
6480 * start-tag.
6481 *
6482 * [ VC: Element Valid ]
6483 * An element is valid if there is a declaration matching elementdecl
6484 * where the Name matches the element type and one of the following holds:
6485 * - The declaration matches EMPTY and the element has no content.
6486 * - The declaration matches children and the sequence of child elements
6487 * belongs to the language generated by the regular expression in the
6488 * content model, with optional white space (characters matching the
6489 * nonterminal S) between each pair of child elements.
6490 * - The declaration matches Mixed and the content consists of character
6491 * data and child elements whose types match names in the content model.
6492 * - The declaration matches ANY, and the types of any child elements have
6493 * been declared.
6494 */
6495
6496void
6497xmlParseElement(xmlParserCtxtPtr ctxt) {
6498 const xmlChar *openTag = CUR_PTR;
6499 xmlChar *name;
6500 xmlChar *oldname;
6501 xmlParserNodeInfo node_info;
6502 xmlNodePtr ret;
6503
6504 /* Capture start position */
6505 if (ctxt->record_info) {
6506 node_info.begin_pos = ctxt->input->consumed +
6507 (CUR_PTR - ctxt->input->base);
6508 node_info.begin_line = ctxt->input->line;
6509 }
6510
6511 if (ctxt->spaceNr == 0)
6512 spacePush(ctxt, -1);
6513 else
6514 spacePush(ctxt, *ctxt->space);
6515
6516 name = xmlParseStartTag(ctxt);
6517 if (name == NULL) {
6518 spacePop(ctxt);
6519 return;
6520 }
6521 namePush(ctxt, name);
6522 ret = ctxt->node;
6523
6524 /*
6525 * [ VC: Root Element Type ]
6526 * The Name in the document type declaration must match the element
6527 * type of the root element.
6528 */
6529 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6530 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6531 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6532
6533 /*
6534 * Check for an Empty Element.
6535 */
6536 if ((RAW == '/') && (NXT(1) == '>')) {
6537 SKIP(2);
6538 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6539 (!ctxt->disableSAX))
6540 ctxt->sax->endElement(ctxt->userData, name);
6541 oldname = namePop(ctxt);
6542 spacePop(ctxt);
6543 if (oldname != NULL) {
6544#ifdef DEBUG_STACK
6545 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6546#endif
6547 xmlFree(oldname);
6548 }
6549 if ( ret != NULL && ctxt->record_info ) {
6550 node_info.end_pos = ctxt->input->consumed +
6551 (CUR_PTR - ctxt->input->base);
6552 node_info.end_line = ctxt->input->line;
6553 node_info.node = ret;
6554 xmlParserAddNodeInfo(ctxt, &node_info);
6555 }
6556 return;
6557 }
6558 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006559 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006560 } else {
6561 ctxt->errNo = XML_ERR_GT_REQUIRED;
6562 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6563 ctxt->sax->error(ctxt->userData,
6564 "Couldn't find end of Start Tag\n%.30s\n",
6565 openTag);
6566 ctxt->wellFormed = 0;
6567 ctxt->disableSAX = 1;
6568
6569 /*
6570 * end of parsing of this node.
6571 */
6572 nodePop(ctxt);
6573 oldname = namePop(ctxt);
6574 spacePop(ctxt);
6575 if (oldname != NULL) {
6576#ifdef DEBUG_STACK
6577 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6578#endif
6579 xmlFree(oldname);
6580 }
6581
6582 /*
6583 * Capture end position and add node
6584 */
6585 if ( ret != NULL && ctxt->record_info ) {
6586 node_info.end_pos = ctxt->input->consumed +
6587 (CUR_PTR - ctxt->input->base);
6588 node_info.end_line = ctxt->input->line;
6589 node_info.node = ret;
6590 xmlParserAddNodeInfo(ctxt, &node_info);
6591 }
6592 return;
6593 }
6594
6595 /*
6596 * Parse the content of the element:
6597 */
6598 xmlParseContent(ctxt);
6599 if (!IS_CHAR(RAW)) {
6600 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6602 ctxt->sax->error(ctxt->userData,
6603 "Premature end of data in tag %.30s\n", openTag);
6604 ctxt->wellFormed = 0;
6605 ctxt->disableSAX = 1;
6606
6607 /*
6608 * end of parsing of this node.
6609 */
6610 nodePop(ctxt);
6611 oldname = namePop(ctxt);
6612 spacePop(ctxt);
6613 if (oldname != NULL) {
6614#ifdef DEBUG_STACK
6615 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6616#endif
6617 xmlFree(oldname);
6618 }
6619 return;
6620 }
6621
6622 /*
6623 * parse the end of tag: '</' should be here.
6624 */
6625 xmlParseEndTag(ctxt);
6626
6627 /*
6628 * Capture end position and add node
6629 */
6630 if ( ret != NULL && ctxt->record_info ) {
6631 node_info.end_pos = ctxt->input->consumed +
6632 (CUR_PTR - ctxt->input->base);
6633 node_info.end_line = ctxt->input->line;
6634 node_info.node = ret;
6635 xmlParserAddNodeInfo(ctxt, &node_info);
6636 }
6637}
6638
6639/**
6640 * xmlParseVersionNum:
6641 * @ctxt: an XML parser context
6642 *
6643 * parse the XML version value.
6644 *
6645 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6646 *
6647 * Returns the string giving the XML version number, or NULL
6648 */
6649xmlChar *
6650xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6651 xmlChar *buf = NULL;
6652 int len = 0;
6653 int size = 10;
6654 xmlChar cur;
6655
6656 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6657 if (buf == NULL) {
6658 xmlGenericError(xmlGenericErrorContext,
6659 "malloc of %d byte failed\n", size);
6660 return(NULL);
6661 }
6662 cur = CUR;
6663 while (((cur >= 'a') && (cur <= 'z')) ||
6664 ((cur >= 'A') && (cur <= 'Z')) ||
6665 ((cur >= '0') && (cur <= '9')) ||
6666 (cur == '_') || (cur == '.') ||
6667 (cur == ':') || (cur == '-')) {
6668 if (len + 1 >= size) {
6669 size *= 2;
6670 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6671 if (buf == NULL) {
6672 xmlGenericError(xmlGenericErrorContext,
6673 "realloc of %d byte failed\n", size);
6674 return(NULL);
6675 }
6676 }
6677 buf[len++] = cur;
6678 NEXT;
6679 cur=CUR;
6680 }
6681 buf[len] = 0;
6682 return(buf);
6683}
6684
6685/**
6686 * xmlParseVersionInfo:
6687 * @ctxt: an XML parser context
6688 *
6689 * parse the XML version.
6690 *
6691 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6692 *
6693 * [25] Eq ::= S? '=' S?
6694 *
6695 * Returns the version string, e.g. "1.0"
6696 */
6697
6698xmlChar *
6699xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6700 xmlChar *version = NULL;
6701 const xmlChar *q;
6702
6703 if ((RAW == 'v') && (NXT(1) == 'e') &&
6704 (NXT(2) == 'r') && (NXT(3) == 's') &&
6705 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6706 (NXT(6) == 'n')) {
6707 SKIP(7);
6708 SKIP_BLANKS;
6709 if (RAW != '=') {
6710 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6712 ctxt->sax->error(ctxt->userData,
6713 "xmlParseVersionInfo : expected '='\n");
6714 ctxt->wellFormed = 0;
6715 ctxt->disableSAX = 1;
6716 return(NULL);
6717 }
6718 NEXT;
6719 SKIP_BLANKS;
6720 if (RAW == '"') {
6721 NEXT;
6722 q = CUR_PTR;
6723 version = xmlParseVersionNum(ctxt);
6724 if (RAW != '"') {
6725 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6727 ctxt->sax->error(ctxt->userData,
6728 "String not closed\n%.50s\n", q);
6729 ctxt->wellFormed = 0;
6730 ctxt->disableSAX = 1;
6731 } else
6732 NEXT;
6733 } else if (RAW == '\''){
6734 NEXT;
6735 q = CUR_PTR;
6736 version = xmlParseVersionNum(ctxt);
6737 if (RAW != '\'') {
6738 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6740 ctxt->sax->error(ctxt->userData,
6741 "String not closed\n%.50s\n", q);
6742 ctxt->wellFormed = 0;
6743 ctxt->disableSAX = 1;
6744 } else
6745 NEXT;
6746 } else {
6747 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6748 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6749 ctxt->sax->error(ctxt->userData,
6750 "xmlParseVersionInfo : expected ' or \"\n");
6751 ctxt->wellFormed = 0;
6752 ctxt->disableSAX = 1;
6753 }
6754 }
6755 return(version);
6756}
6757
6758/**
6759 * xmlParseEncName:
6760 * @ctxt: an XML parser context
6761 *
6762 * parse the XML encoding name
6763 *
6764 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6765 *
6766 * Returns the encoding name value or NULL
6767 */
6768xmlChar *
6769xmlParseEncName(xmlParserCtxtPtr ctxt) {
6770 xmlChar *buf = NULL;
6771 int len = 0;
6772 int size = 10;
6773 xmlChar cur;
6774
6775 cur = CUR;
6776 if (((cur >= 'a') && (cur <= 'z')) ||
6777 ((cur >= 'A') && (cur <= 'Z'))) {
6778 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6779 if (buf == NULL) {
6780 xmlGenericError(xmlGenericErrorContext,
6781 "malloc of %d byte failed\n", size);
6782 return(NULL);
6783 }
6784
6785 buf[len++] = cur;
6786 NEXT;
6787 cur = CUR;
6788 while (((cur >= 'a') && (cur <= 'z')) ||
6789 ((cur >= 'A') && (cur <= 'Z')) ||
6790 ((cur >= '0') && (cur <= '9')) ||
6791 (cur == '.') || (cur == '_') ||
6792 (cur == '-')) {
6793 if (len + 1 >= size) {
6794 size *= 2;
6795 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6796 if (buf == NULL) {
6797 xmlGenericError(xmlGenericErrorContext,
6798 "realloc of %d byte failed\n", size);
6799 return(NULL);
6800 }
6801 }
6802 buf[len++] = cur;
6803 NEXT;
6804 cur = CUR;
6805 if (cur == 0) {
6806 SHRINK;
6807 GROW;
6808 cur = CUR;
6809 }
6810 }
6811 buf[len] = 0;
6812 } else {
6813 ctxt->errNo = XML_ERR_ENCODING_NAME;
6814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6815 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6816 ctxt->wellFormed = 0;
6817 ctxt->disableSAX = 1;
6818 }
6819 return(buf);
6820}
6821
6822/**
6823 * xmlParseEncodingDecl:
6824 * @ctxt: an XML parser context
6825 *
6826 * parse the XML encoding declaration
6827 *
6828 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6829 *
6830 * this setups the conversion filters.
6831 *
6832 * Returns the encoding value or NULL
6833 */
6834
6835xmlChar *
6836xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6837 xmlChar *encoding = NULL;
6838 const xmlChar *q;
6839
6840 SKIP_BLANKS;
6841 if ((RAW == 'e') && (NXT(1) == 'n') &&
6842 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6843 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6844 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6845 SKIP(8);
6846 SKIP_BLANKS;
6847 if (RAW != '=') {
6848 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6849 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6850 ctxt->sax->error(ctxt->userData,
6851 "xmlParseEncodingDecl : expected '='\n");
6852 ctxt->wellFormed = 0;
6853 ctxt->disableSAX = 1;
6854 return(NULL);
6855 }
6856 NEXT;
6857 SKIP_BLANKS;
6858 if (RAW == '"') {
6859 NEXT;
6860 q = CUR_PTR;
6861 encoding = xmlParseEncName(ctxt);
6862 if (RAW != '"') {
6863 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6865 ctxt->sax->error(ctxt->userData,
6866 "String not closed\n%.50s\n", q);
6867 ctxt->wellFormed = 0;
6868 ctxt->disableSAX = 1;
6869 } else
6870 NEXT;
6871 } else if (RAW == '\''){
6872 NEXT;
6873 q = CUR_PTR;
6874 encoding = xmlParseEncName(ctxt);
6875 if (RAW != '\'') {
6876 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6878 ctxt->sax->error(ctxt->userData,
6879 "String not closed\n%.50s\n", q);
6880 ctxt->wellFormed = 0;
6881 ctxt->disableSAX = 1;
6882 } else
6883 NEXT;
6884 } else if (RAW == '"'){
6885 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6887 ctxt->sax->error(ctxt->userData,
6888 "xmlParseEncodingDecl : expected ' or \"\n");
6889 ctxt->wellFormed = 0;
6890 ctxt->disableSAX = 1;
6891 }
6892 if (encoding != NULL) {
6893 xmlCharEncoding enc;
6894 xmlCharEncodingHandlerPtr handler;
6895
6896 if (ctxt->input->encoding != NULL)
6897 xmlFree((xmlChar *) ctxt->input->encoding);
6898 ctxt->input->encoding = encoding;
6899
6900 enc = xmlParseCharEncoding((const char *) encoding);
6901 /*
6902 * registered set of known encodings
6903 */
6904 if (enc != XML_CHAR_ENCODING_ERROR) {
6905 xmlSwitchEncoding(ctxt, enc);
6906 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6907 xmlFree(encoding);
6908 return(NULL);
6909 }
6910 } else {
6911 /*
6912 * fallback for unknown encodings
6913 */
6914 handler = xmlFindCharEncodingHandler((const char *) encoding);
6915 if (handler != NULL) {
6916 xmlSwitchToEncoding(ctxt, handler);
6917 } else {
6918 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6920 ctxt->sax->error(ctxt->userData,
6921 "Unsupported encoding %s\n", encoding);
6922 return(NULL);
6923 }
6924 }
6925 }
6926 }
6927 return(encoding);
6928}
6929
6930/**
6931 * xmlParseSDDecl:
6932 * @ctxt: an XML parser context
6933 *
6934 * parse the XML standalone declaration
6935 *
6936 * [32] SDDecl ::= S 'standalone' Eq
6937 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6938 *
6939 * [ VC: Standalone Document Declaration ]
6940 * TODO The standalone document declaration must have the value "no"
6941 * if any external markup declarations contain declarations of:
6942 * - attributes with default values, if elements to which these
6943 * attributes apply appear in the document without specifications
6944 * of values for these attributes, or
6945 * - entities (other than amp, lt, gt, apos, quot), if references
6946 * to those entities appear in the document, or
6947 * - attributes with values subject to normalization, where the
6948 * attribute appears in the document with a value which will change
6949 * as a result of normalization, or
6950 * - element types with element content, if white space occurs directly
6951 * within any instance of those types.
6952 *
6953 * Returns 1 if standalone, 0 otherwise
6954 */
6955
6956int
6957xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6958 int standalone = -1;
6959
6960 SKIP_BLANKS;
6961 if ((RAW == 's') && (NXT(1) == 't') &&
6962 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6963 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6964 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6965 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6966 SKIP(10);
6967 SKIP_BLANKS;
6968 if (RAW != '=') {
6969 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6971 ctxt->sax->error(ctxt->userData,
6972 "XML standalone declaration : expected '='\n");
6973 ctxt->wellFormed = 0;
6974 ctxt->disableSAX = 1;
6975 return(standalone);
6976 }
6977 NEXT;
6978 SKIP_BLANKS;
6979 if (RAW == '\''){
6980 NEXT;
6981 if ((RAW == 'n') && (NXT(1) == 'o')) {
6982 standalone = 0;
6983 SKIP(2);
6984 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6985 (NXT(2) == 's')) {
6986 standalone = 1;
6987 SKIP(3);
6988 } else {
6989 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6991 ctxt->sax->error(ctxt->userData,
6992 "standalone accepts only 'yes' or 'no'\n");
6993 ctxt->wellFormed = 0;
6994 ctxt->disableSAX = 1;
6995 }
6996 if (RAW != '\'') {
6997 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6999 ctxt->sax->error(ctxt->userData, "String not closed\n");
7000 ctxt->wellFormed = 0;
7001 ctxt->disableSAX = 1;
7002 } else
7003 NEXT;
7004 } else if (RAW == '"'){
7005 NEXT;
7006 if ((RAW == 'n') && (NXT(1) == 'o')) {
7007 standalone = 0;
7008 SKIP(2);
7009 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7010 (NXT(2) == 's')) {
7011 standalone = 1;
7012 SKIP(3);
7013 } else {
7014 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7015 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7016 ctxt->sax->error(ctxt->userData,
7017 "standalone accepts only 'yes' or 'no'\n");
7018 ctxt->wellFormed = 0;
7019 ctxt->disableSAX = 1;
7020 }
7021 if (RAW != '"') {
7022 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7024 ctxt->sax->error(ctxt->userData, "String not closed\n");
7025 ctxt->wellFormed = 0;
7026 ctxt->disableSAX = 1;
7027 } else
7028 NEXT;
7029 } else {
7030 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7032 ctxt->sax->error(ctxt->userData,
7033 "Standalone value not found\n");
7034 ctxt->wellFormed = 0;
7035 ctxt->disableSAX = 1;
7036 }
7037 }
7038 return(standalone);
7039}
7040
7041/**
7042 * xmlParseXMLDecl:
7043 * @ctxt: an XML parser context
7044 *
7045 * parse an XML declaration header
7046 *
7047 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7048 */
7049
7050void
7051xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7052 xmlChar *version;
7053
7054 /*
7055 * We know that '<?xml' is here.
7056 */
7057 SKIP(5);
7058
7059 if (!IS_BLANK(RAW)) {
7060 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7062 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7063 ctxt->wellFormed = 0;
7064 ctxt->disableSAX = 1;
7065 }
7066 SKIP_BLANKS;
7067
7068 /*
7069 * We should have the VersionInfo here.
7070 */
7071 version = xmlParseVersionInfo(ctxt);
7072 if (version == NULL)
7073 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7074 ctxt->version = xmlStrdup(version);
7075 xmlFree(version);
7076
7077 /*
7078 * We may have the encoding declaration
7079 */
7080 if (!IS_BLANK(RAW)) {
7081 if ((RAW == '?') && (NXT(1) == '>')) {
7082 SKIP(2);
7083 return;
7084 }
7085 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7086 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7087 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7088 ctxt->wellFormed = 0;
7089 ctxt->disableSAX = 1;
7090 }
7091 xmlParseEncodingDecl(ctxt);
7092 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7093 /*
7094 * The XML REC instructs us to stop parsing right here
7095 */
7096 return;
7097 }
7098
7099 /*
7100 * We may have the standalone status.
7101 */
7102 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7103 if ((RAW == '?') && (NXT(1) == '>')) {
7104 SKIP(2);
7105 return;
7106 }
7107 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7108 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7109 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7110 ctxt->wellFormed = 0;
7111 ctxt->disableSAX = 1;
7112 }
7113 SKIP_BLANKS;
7114 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7115
7116 SKIP_BLANKS;
7117 if ((RAW == '?') && (NXT(1) == '>')) {
7118 SKIP(2);
7119 } else if (RAW == '>') {
7120 /* Deprecated old WD ... */
7121 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7123 ctxt->sax->error(ctxt->userData,
7124 "XML declaration must end-up with '?>'\n");
7125 ctxt->wellFormed = 0;
7126 ctxt->disableSAX = 1;
7127 NEXT;
7128 } else {
7129 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7131 ctxt->sax->error(ctxt->userData,
7132 "parsing XML declaration: '?>' expected\n");
7133 ctxt->wellFormed = 0;
7134 ctxt->disableSAX = 1;
7135 MOVETO_ENDTAG(CUR_PTR);
7136 NEXT;
7137 }
7138}
7139
7140/**
7141 * xmlParseMisc:
7142 * @ctxt: an XML parser context
7143 *
7144 * parse an XML Misc* optionnal field.
7145 *
7146 * [27] Misc ::= Comment | PI | S
7147 */
7148
7149void
7150xmlParseMisc(xmlParserCtxtPtr ctxt) {
7151 while (((RAW == '<') && (NXT(1) == '?')) ||
7152 ((RAW == '<') && (NXT(1) == '!') &&
7153 (NXT(2) == '-') && (NXT(3) == '-')) ||
7154 IS_BLANK(CUR)) {
7155 if ((RAW == '<') && (NXT(1) == '?')) {
7156 xmlParsePI(ctxt);
7157 } else if (IS_BLANK(CUR)) {
7158 NEXT;
7159 } else
7160 xmlParseComment(ctxt);
7161 }
7162}
7163
7164/**
7165 * xmlParseDocument:
7166 * @ctxt: an XML parser context
7167 *
7168 * parse an XML document (and build a tree if using the standard SAX
7169 * interface).
7170 *
7171 * [1] document ::= prolog element Misc*
7172 *
7173 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7174 *
7175 * Returns 0, -1 in case of error. the parser context is augmented
7176 * as a result of the parsing.
7177 */
7178
7179int
7180xmlParseDocument(xmlParserCtxtPtr ctxt) {
7181 xmlChar start[4];
7182 xmlCharEncoding enc;
7183
7184 xmlInitParser();
7185
7186 GROW;
7187
7188 /*
7189 * SAX: beginning of the document processing.
7190 */
7191 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7192 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7193
7194 /*
7195 * Get the 4 first bytes and decode the charset
7196 * if enc != XML_CHAR_ENCODING_NONE
7197 * plug some encoding conversion routines.
7198 */
7199 start[0] = RAW;
7200 start[1] = NXT(1);
7201 start[2] = NXT(2);
7202 start[3] = NXT(3);
7203 enc = xmlDetectCharEncoding(start, 4);
7204 if (enc != XML_CHAR_ENCODING_NONE) {
7205 xmlSwitchEncoding(ctxt, enc);
7206 }
7207
7208
7209 if (CUR == 0) {
7210 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7212 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7213 ctxt->wellFormed = 0;
7214 ctxt->disableSAX = 1;
7215 }
7216
7217 /*
7218 * Check for the XMLDecl in the Prolog.
7219 */
7220 GROW;
7221 if ((RAW == '<') && (NXT(1) == '?') &&
7222 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7223 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7224
7225 /*
7226 * Note that we will switch encoding on the fly.
7227 */
7228 xmlParseXMLDecl(ctxt);
7229 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7230 /*
7231 * The XML REC instructs us to stop parsing right here
7232 */
7233 return(-1);
7234 }
7235 ctxt->standalone = ctxt->input->standalone;
7236 SKIP_BLANKS;
7237 } else {
7238 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7239 }
7240 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7241 ctxt->sax->startDocument(ctxt->userData);
7242
7243 /*
7244 * The Misc part of the Prolog
7245 */
7246 GROW;
7247 xmlParseMisc(ctxt);
7248
7249 /*
7250 * Then possibly doc type declaration(s) and more Misc
7251 * (doctypedecl Misc*)?
7252 */
7253 GROW;
7254 if ((RAW == '<') && (NXT(1) == '!') &&
7255 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7256 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7257 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7258 (NXT(8) == 'E')) {
7259
7260 ctxt->inSubset = 1;
7261 xmlParseDocTypeDecl(ctxt);
7262 if (RAW == '[') {
7263 ctxt->instate = XML_PARSER_DTD;
7264 xmlParseInternalSubset(ctxt);
7265 }
7266
7267 /*
7268 * Create and update the external subset.
7269 */
7270 ctxt->inSubset = 2;
7271 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7272 (!ctxt->disableSAX))
7273 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7274 ctxt->extSubSystem, ctxt->extSubURI);
7275 ctxt->inSubset = 0;
7276
7277
7278 ctxt->instate = XML_PARSER_PROLOG;
7279 xmlParseMisc(ctxt);
7280 }
7281
7282 /*
7283 * Time to start parsing the tree itself
7284 */
7285 GROW;
7286 if (RAW != '<') {
7287 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7288 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7289 ctxt->sax->error(ctxt->userData,
7290 "Start tag expected, '<' not found\n");
7291 ctxt->wellFormed = 0;
7292 ctxt->disableSAX = 1;
7293 ctxt->instate = XML_PARSER_EOF;
7294 } else {
7295 ctxt->instate = XML_PARSER_CONTENT;
7296 xmlParseElement(ctxt);
7297 ctxt->instate = XML_PARSER_EPILOG;
7298
7299
7300 /*
7301 * The Misc part at the end
7302 */
7303 xmlParseMisc(ctxt);
7304
7305 if (RAW != 0) {
7306 ctxt->errNo = XML_ERR_DOCUMENT_END;
7307 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7308 ctxt->sax->error(ctxt->userData,
7309 "Extra content at the end of the document\n");
7310 ctxt->wellFormed = 0;
7311 ctxt->disableSAX = 1;
7312 }
7313 ctxt->instate = XML_PARSER_EOF;
7314 }
7315
7316 /*
7317 * SAX: end of the document processing.
7318 */
7319 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7320 (!ctxt->disableSAX))
7321 ctxt->sax->endDocument(ctxt->userData);
7322
7323 if (! ctxt->wellFormed) return(-1);
7324 return(0);
7325}
7326
7327/**
7328 * xmlParseExtParsedEnt:
7329 * @ctxt: an XML parser context
7330 *
7331 * parse a genreral parsed entity
7332 * An external general parsed entity is well-formed if it matches the
7333 * production labeled extParsedEnt.
7334 *
7335 * [78] extParsedEnt ::= TextDecl? content
7336 *
7337 * Returns 0, -1 in case of error. the parser context is augmented
7338 * as a result of the parsing.
7339 */
7340
7341int
7342xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7343 xmlChar start[4];
7344 xmlCharEncoding enc;
7345
7346 xmlDefaultSAXHandlerInit();
7347
7348 GROW;
7349
7350 /*
7351 * SAX: beginning of the document processing.
7352 */
7353 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7354 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7355
7356 /*
7357 * Get the 4 first bytes and decode the charset
7358 * if enc != XML_CHAR_ENCODING_NONE
7359 * plug some encoding conversion routines.
7360 */
7361 start[0] = RAW;
7362 start[1] = NXT(1);
7363 start[2] = NXT(2);
7364 start[3] = NXT(3);
7365 enc = xmlDetectCharEncoding(start, 4);
7366 if (enc != XML_CHAR_ENCODING_NONE) {
7367 xmlSwitchEncoding(ctxt, enc);
7368 }
7369
7370
7371 if (CUR == 0) {
7372 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7374 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7375 ctxt->wellFormed = 0;
7376 ctxt->disableSAX = 1;
7377 }
7378
7379 /*
7380 * Check for the XMLDecl in the Prolog.
7381 */
7382 GROW;
7383 if ((RAW == '<') && (NXT(1) == '?') &&
7384 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7385 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7386
7387 /*
7388 * Note that we will switch encoding on the fly.
7389 */
7390 xmlParseXMLDecl(ctxt);
7391 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7392 /*
7393 * The XML REC instructs us to stop parsing right here
7394 */
7395 return(-1);
7396 }
7397 SKIP_BLANKS;
7398 } else {
7399 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7400 }
7401 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7402 ctxt->sax->startDocument(ctxt->userData);
7403
7404 /*
7405 * Doing validity checking on chunk doesn't make sense
7406 */
7407 ctxt->instate = XML_PARSER_CONTENT;
7408 ctxt->validate = 0;
7409 ctxt->loadsubset = 0;
7410 ctxt->depth = 0;
7411
7412 xmlParseContent(ctxt);
7413
7414 if ((RAW == '<') && (NXT(1) == '/')) {
7415 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7417 ctxt->sax->error(ctxt->userData,
7418 "chunk is not well balanced\n");
7419 ctxt->wellFormed = 0;
7420 ctxt->disableSAX = 1;
7421 } else if (RAW != 0) {
7422 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7423 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7424 ctxt->sax->error(ctxt->userData,
7425 "extra content at the end of well balanced chunk\n");
7426 ctxt->wellFormed = 0;
7427 ctxt->disableSAX = 1;
7428 }
7429
7430 /*
7431 * SAX: end of the document processing.
7432 */
7433 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7434 (!ctxt->disableSAX))
7435 ctxt->sax->endDocument(ctxt->userData);
7436
7437 if (! ctxt->wellFormed) return(-1);
7438 return(0);
7439}
7440
7441/************************************************************************
7442 * *
7443 * Progressive parsing interfaces *
7444 * *
7445 ************************************************************************/
7446
7447/**
7448 * xmlParseLookupSequence:
7449 * @ctxt: an XML parser context
7450 * @first: the first char to lookup
7451 * @next: the next char to lookup or zero
7452 * @third: the next char to lookup or zero
7453 *
7454 * Try to find if a sequence (first, next, third) or just (first next) or
7455 * (first) is available in the input stream.
7456 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7457 * to avoid rescanning sequences of bytes, it DOES change the state of the
7458 * parser, do not use liberally.
7459 *
7460 * Returns the index to the current parsing point if the full sequence
7461 * is available, -1 otherwise.
7462 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007463static int
Owen Taylor3473f882001-02-23 17:55:21 +00007464xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7465 xmlChar next, xmlChar third) {
7466 int base, len;
7467 xmlParserInputPtr in;
7468 const xmlChar *buf;
7469
7470 in = ctxt->input;
7471 if (in == NULL) return(-1);
7472 base = in->cur - in->base;
7473 if (base < 0) return(-1);
7474 if (ctxt->checkIndex > base)
7475 base = ctxt->checkIndex;
7476 if (in->buf == NULL) {
7477 buf = in->base;
7478 len = in->length;
7479 } else {
7480 buf = in->buf->buffer->content;
7481 len = in->buf->buffer->use;
7482 }
7483 /* take into account the sequence length */
7484 if (third) len -= 2;
7485 else if (next) len --;
7486 for (;base < len;base++) {
7487 if (buf[base] == first) {
7488 if (third != 0) {
7489 if ((buf[base + 1] != next) ||
7490 (buf[base + 2] != third)) continue;
7491 } else if (next != 0) {
7492 if (buf[base + 1] != next) continue;
7493 }
7494 ctxt->checkIndex = 0;
7495#ifdef DEBUG_PUSH
7496 if (next == 0)
7497 xmlGenericError(xmlGenericErrorContext,
7498 "PP: lookup '%c' found at %d\n",
7499 first, base);
7500 else if (third == 0)
7501 xmlGenericError(xmlGenericErrorContext,
7502 "PP: lookup '%c%c' found at %d\n",
7503 first, next, base);
7504 else
7505 xmlGenericError(xmlGenericErrorContext,
7506 "PP: lookup '%c%c%c' found at %d\n",
7507 first, next, third, base);
7508#endif
7509 return(base - (in->cur - in->base));
7510 }
7511 }
7512 ctxt->checkIndex = base;
7513#ifdef DEBUG_PUSH
7514 if (next == 0)
7515 xmlGenericError(xmlGenericErrorContext,
7516 "PP: lookup '%c' failed\n", first);
7517 else if (third == 0)
7518 xmlGenericError(xmlGenericErrorContext,
7519 "PP: lookup '%c%c' failed\n", first, next);
7520 else
7521 xmlGenericError(xmlGenericErrorContext,
7522 "PP: lookup '%c%c%c' failed\n", first, next, third);
7523#endif
7524 return(-1);
7525}
7526
7527/**
7528 * xmlParseTryOrFinish:
7529 * @ctxt: an XML parser context
7530 * @terminate: last chunk indicator
7531 *
7532 * Try to progress on parsing
7533 *
7534 * Returns zero if no parsing was possible
7535 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007536static int
Owen Taylor3473f882001-02-23 17:55:21 +00007537xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7538 int ret = 0;
7539 int avail;
7540 xmlChar cur, next;
7541
7542#ifdef DEBUG_PUSH
7543 switch (ctxt->instate) {
7544 case XML_PARSER_EOF:
7545 xmlGenericError(xmlGenericErrorContext,
7546 "PP: try EOF\n"); break;
7547 case XML_PARSER_START:
7548 xmlGenericError(xmlGenericErrorContext,
7549 "PP: try START\n"); break;
7550 case XML_PARSER_MISC:
7551 xmlGenericError(xmlGenericErrorContext,
7552 "PP: try MISC\n");break;
7553 case XML_PARSER_COMMENT:
7554 xmlGenericError(xmlGenericErrorContext,
7555 "PP: try COMMENT\n");break;
7556 case XML_PARSER_PROLOG:
7557 xmlGenericError(xmlGenericErrorContext,
7558 "PP: try PROLOG\n");break;
7559 case XML_PARSER_START_TAG:
7560 xmlGenericError(xmlGenericErrorContext,
7561 "PP: try START_TAG\n");break;
7562 case XML_PARSER_CONTENT:
7563 xmlGenericError(xmlGenericErrorContext,
7564 "PP: try CONTENT\n");break;
7565 case XML_PARSER_CDATA_SECTION:
7566 xmlGenericError(xmlGenericErrorContext,
7567 "PP: try CDATA_SECTION\n");break;
7568 case XML_PARSER_END_TAG:
7569 xmlGenericError(xmlGenericErrorContext,
7570 "PP: try END_TAG\n");break;
7571 case XML_PARSER_ENTITY_DECL:
7572 xmlGenericError(xmlGenericErrorContext,
7573 "PP: try ENTITY_DECL\n");break;
7574 case XML_PARSER_ENTITY_VALUE:
7575 xmlGenericError(xmlGenericErrorContext,
7576 "PP: try ENTITY_VALUE\n");break;
7577 case XML_PARSER_ATTRIBUTE_VALUE:
7578 xmlGenericError(xmlGenericErrorContext,
7579 "PP: try ATTRIBUTE_VALUE\n");break;
7580 case XML_PARSER_DTD:
7581 xmlGenericError(xmlGenericErrorContext,
7582 "PP: try DTD\n");break;
7583 case XML_PARSER_EPILOG:
7584 xmlGenericError(xmlGenericErrorContext,
7585 "PP: try EPILOG\n");break;
7586 case XML_PARSER_PI:
7587 xmlGenericError(xmlGenericErrorContext,
7588 "PP: try PI\n");break;
7589 case XML_PARSER_IGNORE:
7590 xmlGenericError(xmlGenericErrorContext,
7591 "PP: try IGNORE\n");break;
7592 }
7593#endif
7594
7595 while (1) {
7596 /*
7597 * Pop-up of finished entities.
7598 */
7599 while ((RAW == 0) && (ctxt->inputNr > 1))
7600 xmlPopInput(ctxt);
7601
7602 if (ctxt->input ==NULL) break;
7603 if (ctxt->input->buf == NULL)
7604 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7605 else
7606 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7607 if (avail < 1)
7608 goto done;
7609 switch (ctxt->instate) {
7610 case XML_PARSER_EOF:
7611 /*
7612 * Document parsing is done !
7613 */
7614 goto done;
7615 case XML_PARSER_START:
7616 /*
7617 * Very first chars read from the document flow.
7618 */
Owen Taylor3473f882001-02-23 17:55:21 +00007619 if (avail < 2)
7620 goto done;
7621
7622 cur = ctxt->input->cur[0];
7623 next = ctxt->input->cur[1];
7624 if (cur == 0) {
7625 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7626 ctxt->sax->setDocumentLocator(ctxt->userData,
7627 &xmlDefaultSAXLocator);
7628 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7630 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7631 ctxt->wellFormed = 0;
7632 ctxt->disableSAX = 1;
7633 ctxt->instate = XML_PARSER_EOF;
7634#ifdef DEBUG_PUSH
7635 xmlGenericError(xmlGenericErrorContext,
7636 "PP: entering EOF\n");
7637#endif
7638 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7639 ctxt->sax->endDocument(ctxt->userData);
7640 goto done;
7641 }
7642 if ((cur == '<') && (next == '?')) {
7643 /* PI or XML decl */
7644 if (avail < 5) return(ret);
7645 if ((!terminate) &&
7646 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7647 return(ret);
7648 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7649 ctxt->sax->setDocumentLocator(ctxt->userData,
7650 &xmlDefaultSAXLocator);
7651 if ((ctxt->input->cur[2] == 'x') &&
7652 (ctxt->input->cur[3] == 'm') &&
7653 (ctxt->input->cur[4] == 'l') &&
7654 (IS_BLANK(ctxt->input->cur[5]))) {
7655 ret += 5;
7656#ifdef DEBUG_PUSH
7657 xmlGenericError(xmlGenericErrorContext,
7658 "PP: Parsing XML Decl\n");
7659#endif
7660 xmlParseXMLDecl(ctxt);
7661 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7662 /*
7663 * The XML REC instructs us to stop parsing right
7664 * here
7665 */
7666 ctxt->instate = XML_PARSER_EOF;
7667 return(0);
7668 }
7669 ctxt->standalone = ctxt->input->standalone;
7670 if ((ctxt->encoding == NULL) &&
7671 (ctxt->input->encoding != NULL))
7672 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7673 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7674 (!ctxt->disableSAX))
7675 ctxt->sax->startDocument(ctxt->userData);
7676 ctxt->instate = XML_PARSER_MISC;
7677#ifdef DEBUG_PUSH
7678 xmlGenericError(xmlGenericErrorContext,
7679 "PP: entering MISC\n");
7680#endif
7681 } else {
7682 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7683 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7684 (!ctxt->disableSAX))
7685 ctxt->sax->startDocument(ctxt->userData);
7686 ctxt->instate = XML_PARSER_MISC;
7687#ifdef DEBUG_PUSH
7688 xmlGenericError(xmlGenericErrorContext,
7689 "PP: entering MISC\n");
7690#endif
7691 }
7692 } else {
7693 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7694 ctxt->sax->setDocumentLocator(ctxt->userData,
7695 &xmlDefaultSAXLocator);
7696 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7697 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7698 (!ctxt->disableSAX))
7699 ctxt->sax->startDocument(ctxt->userData);
7700 ctxt->instate = XML_PARSER_MISC;
7701#ifdef DEBUG_PUSH
7702 xmlGenericError(xmlGenericErrorContext,
7703 "PP: entering MISC\n");
7704#endif
7705 }
7706 break;
7707 case XML_PARSER_MISC:
7708 SKIP_BLANKS;
7709 if (ctxt->input->buf == NULL)
7710 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7711 else
7712 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7713 if (avail < 2)
7714 goto done;
7715 cur = ctxt->input->cur[0];
7716 next = ctxt->input->cur[1];
7717 if ((cur == '<') && (next == '?')) {
7718 if ((!terminate) &&
7719 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7720 goto done;
7721#ifdef DEBUG_PUSH
7722 xmlGenericError(xmlGenericErrorContext,
7723 "PP: Parsing PI\n");
7724#endif
7725 xmlParsePI(ctxt);
7726 } else if ((cur == '<') && (next == '!') &&
7727 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7728 if ((!terminate) &&
7729 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7730 goto done;
7731#ifdef DEBUG_PUSH
7732 xmlGenericError(xmlGenericErrorContext,
7733 "PP: Parsing Comment\n");
7734#endif
7735 xmlParseComment(ctxt);
7736 ctxt->instate = XML_PARSER_MISC;
7737 } else if ((cur == '<') && (next == '!') &&
7738 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7739 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7740 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7741 (ctxt->input->cur[8] == 'E')) {
7742 if ((!terminate) &&
7743 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7744 goto done;
7745#ifdef DEBUG_PUSH
7746 xmlGenericError(xmlGenericErrorContext,
7747 "PP: Parsing internal subset\n");
7748#endif
7749 ctxt->inSubset = 1;
7750 xmlParseDocTypeDecl(ctxt);
7751 if (RAW == '[') {
7752 ctxt->instate = XML_PARSER_DTD;
7753#ifdef DEBUG_PUSH
7754 xmlGenericError(xmlGenericErrorContext,
7755 "PP: entering DTD\n");
7756#endif
7757 } else {
7758 /*
7759 * Create and update the external subset.
7760 */
7761 ctxt->inSubset = 2;
7762 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7763 (ctxt->sax->externalSubset != NULL))
7764 ctxt->sax->externalSubset(ctxt->userData,
7765 ctxt->intSubName, ctxt->extSubSystem,
7766 ctxt->extSubURI);
7767 ctxt->inSubset = 0;
7768 ctxt->instate = XML_PARSER_PROLOG;
7769#ifdef DEBUG_PUSH
7770 xmlGenericError(xmlGenericErrorContext,
7771 "PP: entering PROLOG\n");
7772#endif
7773 }
7774 } else if ((cur == '<') && (next == '!') &&
7775 (avail < 9)) {
7776 goto done;
7777 } else {
7778 ctxt->instate = XML_PARSER_START_TAG;
7779#ifdef DEBUG_PUSH
7780 xmlGenericError(xmlGenericErrorContext,
7781 "PP: entering START_TAG\n");
7782#endif
7783 }
7784 break;
7785 case XML_PARSER_IGNORE:
7786 xmlGenericError(xmlGenericErrorContext,
7787 "PP: internal error, state == IGNORE");
7788 ctxt->instate = XML_PARSER_DTD;
7789#ifdef DEBUG_PUSH
7790 xmlGenericError(xmlGenericErrorContext,
7791 "PP: entering DTD\n");
7792#endif
7793 break;
7794 case XML_PARSER_PROLOG:
7795 SKIP_BLANKS;
7796 if (ctxt->input->buf == NULL)
7797 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7798 else
7799 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7800 if (avail < 2)
7801 goto done;
7802 cur = ctxt->input->cur[0];
7803 next = ctxt->input->cur[1];
7804 if ((cur == '<') && (next == '?')) {
7805 if ((!terminate) &&
7806 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7807 goto done;
7808#ifdef DEBUG_PUSH
7809 xmlGenericError(xmlGenericErrorContext,
7810 "PP: Parsing PI\n");
7811#endif
7812 xmlParsePI(ctxt);
7813 } else if ((cur == '<') && (next == '!') &&
7814 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7815 if ((!terminate) &&
7816 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7817 goto done;
7818#ifdef DEBUG_PUSH
7819 xmlGenericError(xmlGenericErrorContext,
7820 "PP: Parsing Comment\n");
7821#endif
7822 xmlParseComment(ctxt);
7823 ctxt->instate = XML_PARSER_PROLOG;
7824 } else if ((cur == '<') && (next == '!') &&
7825 (avail < 4)) {
7826 goto done;
7827 } else {
7828 ctxt->instate = XML_PARSER_START_TAG;
7829#ifdef DEBUG_PUSH
7830 xmlGenericError(xmlGenericErrorContext,
7831 "PP: entering START_TAG\n");
7832#endif
7833 }
7834 break;
7835 case XML_PARSER_EPILOG:
7836 SKIP_BLANKS;
7837 if (ctxt->input->buf == NULL)
7838 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7839 else
7840 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7841 if (avail < 2)
7842 goto done;
7843 cur = ctxt->input->cur[0];
7844 next = ctxt->input->cur[1];
7845 if ((cur == '<') && (next == '?')) {
7846 if ((!terminate) &&
7847 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7848 goto done;
7849#ifdef DEBUG_PUSH
7850 xmlGenericError(xmlGenericErrorContext,
7851 "PP: Parsing PI\n");
7852#endif
7853 xmlParsePI(ctxt);
7854 ctxt->instate = XML_PARSER_EPILOG;
7855 } else if ((cur == '<') && (next == '!') &&
7856 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7857 if ((!terminate) &&
7858 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7859 goto done;
7860#ifdef DEBUG_PUSH
7861 xmlGenericError(xmlGenericErrorContext,
7862 "PP: Parsing Comment\n");
7863#endif
7864 xmlParseComment(ctxt);
7865 ctxt->instate = XML_PARSER_EPILOG;
7866 } else if ((cur == '<') && (next == '!') &&
7867 (avail < 4)) {
7868 goto done;
7869 } else {
7870 ctxt->errNo = XML_ERR_DOCUMENT_END;
7871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7872 ctxt->sax->error(ctxt->userData,
7873 "Extra content at the end of the document\n");
7874 ctxt->wellFormed = 0;
7875 ctxt->disableSAX = 1;
7876 ctxt->instate = XML_PARSER_EOF;
7877#ifdef DEBUG_PUSH
7878 xmlGenericError(xmlGenericErrorContext,
7879 "PP: entering EOF\n");
7880#endif
7881 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7882 (!ctxt->disableSAX))
7883 ctxt->sax->endDocument(ctxt->userData);
7884 goto done;
7885 }
7886 break;
7887 case XML_PARSER_START_TAG: {
7888 xmlChar *name, *oldname;
7889
7890 if ((avail < 2) && (ctxt->inputNr == 1))
7891 goto done;
7892 cur = ctxt->input->cur[0];
7893 if (cur != '<') {
7894 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7896 ctxt->sax->error(ctxt->userData,
7897 "Start tag expect, '<' not found\n");
7898 ctxt->wellFormed = 0;
7899 ctxt->disableSAX = 1;
7900 ctxt->instate = XML_PARSER_EOF;
7901#ifdef DEBUG_PUSH
7902 xmlGenericError(xmlGenericErrorContext,
7903 "PP: entering EOF\n");
7904#endif
7905 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7906 (!ctxt->disableSAX))
7907 ctxt->sax->endDocument(ctxt->userData);
7908 goto done;
7909 }
7910 if ((!terminate) &&
7911 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7912 goto done;
7913 if (ctxt->spaceNr == 0)
7914 spacePush(ctxt, -1);
7915 else
7916 spacePush(ctxt, *ctxt->space);
7917 name = xmlParseStartTag(ctxt);
7918 if (name == NULL) {
7919 spacePop(ctxt);
7920 ctxt->instate = XML_PARSER_EOF;
7921#ifdef DEBUG_PUSH
7922 xmlGenericError(xmlGenericErrorContext,
7923 "PP: entering EOF\n");
7924#endif
7925 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7926 (!ctxt->disableSAX))
7927 ctxt->sax->endDocument(ctxt->userData);
7928 goto done;
7929 }
7930 namePush(ctxt, xmlStrdup(name));
7931
7932 /*
7933 * [ VC: Root Element Type ]
7934 * The Name in the document type declaration must match
7935 * the element type of the root element.
7936 */
7937 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7938 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7939 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7940
7941 /*
7942 * Check for an Empty Element.
7943 */
7944 if ((RAW == '/') && (NXT(1) == '>')) {
7945 SKIP(2);
7946 if ((ctxt->sax != NULL) &&
7947 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7948 ctxt->sax->endElement(ctxt->userData, name);
7949 xmlFree(name);
7950 oldname = namePop(ctxt);
7951 spacePop(ctxt);
7952 if (oldname != NULL) {
7953#ifdef DEBUG_STACK
7954 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7955#endif
7956 xmlFree(oldname);
7957 }
7958 if (ctxt->name == NULL) {
7959 ctxt->instate = XML_PARSER_EPILOG;
7960#ifdef DEBUG_PUSH
7961 xmlGenericError(xmlGenericErrorContext,
7962 "PP: entering EPILOG\n");
7963#endif
7964 } else {
7965 ctxt->instate = XML_PARSER_CONTENT;
7966#ifdef DEBUG_PUSH
7967 xmlGenericError(xmlGenericErrorContext,
7968 "PP: entering CONTENT\n");
7969#endif
7970 }
7971 break;
7972 }
7973 if (RAW == '>') {
7974 NEXT;
7975 } else {
7976 ctxt->errNo = XML_ERR_GT_REQUIRED;
7977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7978 ctxt->sax->error(ctxt->userData,
7979 "Couldn't find end of Start Tag %s\n",
7980 name);
7981 ctxt->wellFormed = 0;
7982 ctxt->disableSAX = 1;
7983
7984 /*
7985 * end of parsing of this node.
7986 */
7987 nodePop(ctxt);
7988 oldname = namePop(ctxt);
7989 spacePop(ctxt);
7990 if (oldname != NULL) {
7991#ifdef DEBUG_STACK
7992 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7993#endif
7994 xmlFree(oldname);
7995 }
7996 }
7997 xmlFree(name);
7998 ctxt->instate = XML_PARSER_CONTENT;
7999#ifdef DEBUG_PUSH
8000 xmlGenericError(xmlGenericErrorContext,
8001 "PP: entering CONTENT\n");
8002#endif
8003 break;
8004 }
8005 case XML_PARSER_CONTENT: {
8006 const xmlChar *test;
8007 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008008 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008009
8010 /*
8011 * Handle preparsed entities and charRef
8012 */
8013 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008014 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008015
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008016 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008017 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8018 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008019 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008020 ctxt->token = 0;
8021 }
8022 if ((avail < 2) && (ctxt->inputNr == 1))
8023 goto done;
8024 cur = ctxt->input->cur[0];
8025 next = ctxt->input->cur[1];
8026
8027 test = CUR_PTR;
8028 cons = ctxt->input->consumed;
8029 tok = ctxt->token;
8030 if ((cur == '<') && (next == '?')) {
8031 if ((!terminate) &&
8032 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8033 goto done;
8034#ifdef DEBUG_PUSH
8035 xmlGenericError(xmlGenericErrorContext,
8036 "PP: Parsing PI\n");
8037#endif
8038 xmlParsePI(ctxt);
8039 } else if ((cur == '<') && (next == '!') &&
8040 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8041 if ((!terminate) &&
8042 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8043 goto done;
8044#ifdef DEBUG_PUSH
8045 xmlGenericError(xmlGenericErrorContext,
8046 "PP: Parsing Comment\n");
8047#endif
8048 xmlParseComment(ctxt);
8049 ctxt->instate = XML_PARSER_CONTENT;
8050 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8051 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8052 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8053 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8054 (ctxt->input->cur[8] == '[')) {
8055 SKIP(9);
8056 ctxt->instate = XML_PARSER_CDATA_SECTION;
8057#ifdef DEBUG_PUSH
8058 xmlGenericError(xmlGenericErrorContext,
8059 "PP: entering CDATA_SECTION\n");
8060#endif
8061 break;
8062 } else if ((cur == '<') && (next == '!') &&
8063 (avail < 9)) {
8064 goto done;
8065 } else if ((cur == '<') && (next == '/')) {
8066 ctxt->instate = XML_PARSER_END_TAG;
8067#ifdef DEBUG_PUSH
8068 xmlGenericError(xmlGenericErrorContext,
8069 "PP: entering END_TAG\n");
8070#endif
8071 break;
8072 } else if (cur == '<') {
8073 ctxt->instate = XML_PARSER_START_TAG;
8074#ifdef DEBUG_PUSH
8075 xmlGenericError(xmlGenericErrorContext,
8076 "PP: entering START_TAG\n");
8077#endif
8078 break;
8079 } else if (cur == '&') {
8080 if ((!terminate) &&
8081 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8082 goto done;
8083#ifdef DEBUG_PUSH
8084 xmlGenericError(xmlGenericErrorContext,
8085 "PP: Parsing Reference\n");
8086#endif
8087 xmlParseReference(ctxt);
8088 } else {
8089 /* TODO Avoid the extra copy, handle directly !!! */
8090 /*
8091 * Goal of the following test is:
8092 * - minimize calls to the SAX 'character' callback
8093 * when they are mergeable
8094 * - handle an problem for isBlank when we only parse
8095 * a sequence of blank chars and the next one is
8096 * not available to check against '<' presence.
8097 * - tries to homogenize the differences in SAX
8098 * callbacks beween the push and pull versions
8099 * of the parser.
8100 */
8101 if ((ctxt->inputNr == 1) &&
8102 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8103 if ((!terminate) &&
8104 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8105 goto done;
8106 }
8107 ctxt->checkIndex = 0;
8108#ifdef DEBUG_PUSH
8109 xmlGenericError(xmlGenericErrorContext,
8110 "PP: Parsing char data\n");
8111#endif
8112 xmlParseCharData(ctxt, 0);
8113 }
8114 /*
8115 * Pop-up of finished entities.
8116 */
8117 while ((RAW == 0) && (ctxt->inputNr > 1))
8118 xmlPopInput(ctxt);
8119 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8120 (tok == ctxt->token)) {
8121 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8123 ctxt->sax->error(ctxt->userData,
8124 "detected an error in element content\n");
8125 ctxt->wellFormed = 0;
8126 ctxt->disableSAX = 1;
8127 ctxt->instate = XML_PARSER_EOF;
8128 break;
8129 }
8130 break;
8131 }
8132 case XML_PARSER_CDATA_SECTION: {
8133 /*
8134 * The Push mode need to have the SAX callback for
8135 * cdataBlock merge back contiguous callbacks.
8136 */
8137 int base;
8138
8139 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8140 if (base < 0) {
8141 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8142 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8143 if (ctxt->sax->cdataBlock != NULL)
8144 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8145 XML_PARSER_BIG_BUFFER_SIZE);
8146 }
8147 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8148 ctxt->checkIndex = 0;
8149 }
8150 goto done;
8151 } else {
8152 if ((ctxt->sax != NULL) && (base > 0) &&
8153 (!ctxt->disableSAX)) {
8154 if (ctxt->sax->cdataBlock != NULL)
8155 ctxt->sax->cdataBlock(ctxt->userData,
8156 ctxt->input->cur, base);
8157 }
8158 SKIP(base + 3);
8159 ctxt->checkIndex = 0;
8160 ctxt->instate = XML_PARSER_CONTENT;
8161#ifdef DEBUG_PUSH
8162 xmlGenericError(xmlGenericErrorContext,
8163 "PP: entering CONTENT\n");
8164#endif
8165 }
8166 break;
8167 }
8168 case XML_PARSER_END_TAG:
8169 if (avail < 2)
8170 goto done;
8171 if ((!terminate) &&
8172 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8173 goto done;
8174 xmlParseEndTag(ctxt);
8175 if (ctxt->name == NULL) {
8176 ctxt->instate = XML_PARSER_EPILOG;
8177#ifdef DEBUG_PUSH
8178 xmlGenericError(xmlGenericErrorContext,
8179 "PP: entering EPILOG\n");
8180#endif
8181 } else {
8182 ctxt->instate = XML_PARSER_CONTENT;
8183#ifdef DEBUG_PUSH
8184 xmlGenericError(xmlGenericErrorContext,
8185 "PP: entering CONTENT\n");
8186#endif
8187 }
8188 break;
8189 case XML_PARSER_DTD: {
8190 /*
8191 * Sorry but progressive parsing of the internal subset
8192 * is not expected to be supported. We first check that
8193 * the full content of the internal subset is available and
8194 * the parsing is launched only at that point.
8195 * Internal subset ends up with "']' S? '>'" in an unescaped
8196 * section and not in a ']]>' sequence which are conditional
8197 * sections (whoever argued to keep that crap in XML deserve
8198 * a place in hell !).
8199 */
8200 int base, i;
8201 xmlChar *buf;
8202 xmlChar quote = 0;
8203
8204 base = ctxt->input->cur - ctxt->input->base;
8205 if (base < 0) return(0);
8206 if (ctxt->checkIndex > base)
8207 base = ctxt->checkIndex;
8208 buf = ctxt->input->buf->buffer->content;
8209 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8210 base++) {
8211 if (quote != 0) {
8212 if (buf[base] == quote)
8213 quote = 0;
8214 continue;
8215 }
8216 if (buf[base] == '"') {
8217 quote = '"';
8218 continue;
8219 }
8220 if (buf[base] == '\'') {
8221 quote = '\'';
8222 continue;
8223 }
8224 if (buf[base] == ']') {
8225 if ((unsigned int) base +1 >=
8226 ctxt->input->buf->buffer->use)
8227 break;
8228 if (buf[base + 1] == ']') {
8229 /* conditional crap, skip both ']' ! */
8230 base++;
8231 continue;
8232 }
8233 for (i = 0;
8234 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8235 i++) {
8236 if (buf[base + i] == '>')
8237 goto found_end_int_subset;
8238 }
8239 break;
8240 }
8241 }
8242 /*
8243 * We didn't found the end of the Internal subset
8244 */
8245 if (quote == 0)
8246 ctxt->checkIndex = base;
8247#ifdef DEBUG_PUSH
8248 if (next == 0)
8249 xmlGenericError(xmlGenericErrorContext,
8250 "PP: lookup of int subset end filed\n");
8251#endif
8252 goto done;
8253
8254found_end_int_subset:
8255 xmlParseInternalSubset(ctxt);
8256 ctxt->inSubset = 2;
8257 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8258 (ctxt->sax->externalSubset != NULL))
8259 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8260 ctxt->extSubSystem, ctxt->extSubURI);
8261 ctxt->inSubset = 0;
8262 ctxt->instate = XML_PARSER_PROLOG;
8263 ctxt->checkIndex = 0;
8264#ifdef DEBUG_PUSH
8265 xmlGenericError(xmlGenericErrorContext,
8266 "PP: entering PROLOG\n");
8267#endif
8268 break;
8269 }
8270 case XML_PARSER_COMMENT:
8271 xmlGenericError(xmlGenericErrorContext,
8272 "PP: internal error, state == COMMENT\n");
8273 ctxt->instate = XML_PARSER_CONTENT;
8274#ifdef DEBUG_PUSH
8275 xmlGenericError(xmlGenericErrorContext,
8276 "PP: entering CONTENT\n");
8277#endif
8278 break;
8279 case XML_PARSER_PI:
8280 xmlGenericError(xmlGenericErrorContext,
8281 "PP: internal error, state == PI\n");
8282 ctxt->instate = XML_PARSER_CONTENT;
8283#ifdef DEBUG_PUSH
8284 xmlGenericError(xmlGenericErrorContext,
8285 "PP: entering CONTENT\n");
8286#endif
8287 break;
8288 case XML_PARSER_ENTITY_DECL:
8289 xmlGenericError(xmlGenericErrorContext,
8290 "PP: internal error, state == ENTITY_DECL\n");
8291 ctxt->instate = XML_PARSER_DTD;
8292#ifdef DEBUG_PUSH
8293 xmlGenericError(xmlGenericErrorContext,
8294 "PP: entering DTD\n");
8295#endif
8296 break;
8297 case XML_PARSER_ENTITY_VALUE:
8298 xmlGenericError(xmlGenericErrorContext,
8299 "PP: internal error, state == ENTITY_VALUE\n");
8300 ctxt->instate = XML_PARSER_CONTENT;
8301#ifdef DEBUG_PUSH
8302 xmlGenericError(xmlGenericErrorContext,
8303 "PP: entering DTD\n");
8304#endif
8305 break;
8306 case XML_PARSER_ATTRIBUTE_VALUE:
8307 xmlGenericError(xmlGenericErrorContext,
8308 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8309 ctxt->instate = XML_PARSER_START_TAG;
8310#ifdef DEBUG_PUSH
8311 xmlGenericError(xmlGenericErrorContext,
8312 "PP: entering START_TAG\n");
8313#endif
8314 break;
8315 case XML_PARSER_SYSTEM_LITERAL:
8316 xmlGenericError(xmlGenericErrorContext,
8317 "PP: internal error, state == SYSTEM_LITERAL\n");
8318 ctxt->instate = XML_PARSER_START_TAG;
8319#ifdef DEBUG_PUSH
8320 xmlGenericError(xmlGenericErrorContext,
8321 "PP: entering START_TAG\n");
8322#endif
8323 break;
8324 }
8325 }
8326done:
8327#ifdef DEBUG_PUSH
8328 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8329#endif
8330 return(ret);
8331}
8332
8333/**
Owen Taylor3473f882001-02-23 17:55:21 +00008334 * xmlParseChunk:
8335 * @ctxt: an XML parser context
8336 * @chunk: an char array
8337 * @size: the size in byte of the chunk
8338 * @terminate: last chunk indicator
8339 *
8340 * Parse a Chunk of memory
8341 *
8342 * Returns zero if no error, the xmlParserErrors otherwise.
8343 */
8344int
8345xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8346 int terminate) {
8347 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8348 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8349 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8350 int cur = ctxt->input->cur - ctxt->input->base;
8351
8352 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8353 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8354 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008355 ctxt->input->end =
8356 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008357#ifdef DEBUG_PUSH
8358 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8359#endif
8360
8361 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8362 xmlParseTryOrFinish(ctxt, terminate);
8363 } else if (ctxt->instate != XML_PARSER_EOF) {
8364 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8365 xmlParserInputBufferPtr in = ctxt->input->buf;
8366 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8367 (in->raw != NULL)) {
8368 int nbchars;
8369
8370 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8371 if (nbchars < 0) {
8372 xmlGenericError(xmlGenericErrorContext,
8373 "xmlParseChunk: encoder error\n");
8374 return(XML_ERR_INVALID_ENCODING);
8375 }
8376 }
8377 }
8378 }
8379 xmlParseTryOrFinish(ctxt, terminate);
8380 if (terminate) {
8381 /*
8382 * Check for termination
8383 */
8384 if ((ctxt->instate != XML_PARSER_EOF) &&
8385 (ctxt->instate != XML_PARSER_EPILOG)) {
8386 ctxt->errNo = XML_ERR_DOCUMENT_END;
8387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8388 ctxt->sax->error(ctxt->userData,
8389 "Extra content at the end of the document\n");
8390 ctxt->wellFormed = 0;
8391 ctxt->disableSAX = 1;
8392 }
8393 if (ctxt->instate != XML_PARSER_EOF) {
8394 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8395 (!ctxt->disableSAX))
8396 ctxt->sax->endDocument(ctxt->userData);
8397 }
8398 ctxt->instate = XML_PARSER_EOF;
8399 }
8400 return((xmlParserErrors) ctxt->errNo);
8401}
8402
8403/************************************************************************
8404 * *
8405 * I/O front end functions to the parser *
8406 * *
8407 ************************************************************************/
8408
8409/**
8410 * xmlStopParser:
8411 * @ctxt: an XML parser context
8412 *
8413 * Blocks further parser processing
8414 */
8415void
8416xmlStopParser(xmlParserCtxtPtr ctxt) {
8417 ctxt->instate = XML_PARSER_EOF;
8418 if (ctxt->input != NULL)
8419 ctxt->input->cur = BAD_CAST"";
8420}
8421
8422/**
8423 * xmlCreatePushParserCtxt:
8424 * @sax: a SAX handler
8425 * @user_data: The user data returned on SAX callbacks
8426 * @chunk: a pointer to an array of chars
8427 * @size: number of chars in the array
8428 * @filename: an optional file name or URI
8429 *
8430 * Create a parser context for using the XML parser in push mode
8431 * To allow content encoding detection, @size should be >= 4
8432 * The value of @filename is used for fetching external entities
8433 * and error/warning reports.
8434 *
8435 * Returns the new parser context or NULL
8436 */
8437xmlParserCtxtPtr
8438xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8439 const char *chunk, int size, const char *filename) {
8440 xmlParserCtxtPtr ctxt;
8441 xmlParserInputPtr inputStream;
8442 xmlParserInputBufferPtr buf;
8443 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8444
8445 /*
8446 * plug some encoding conversion routines
8447 */
8448 if ((chunk != NULL) && (size >= 4))
8449 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8450
8451 buf = xmlAllocParserInputBuffer(enc);
8452 if (buf == NULL) return(NULL);
8453
8454 ctxt = xmlNewParserCtxt();
8455 if (ctxt == NULL) {
8456 xmlFree(buf);
8457 return(NULL);
8458 }
8459 if (sax != NULL) {
8460 if (ctxt->sax != &xmlDefaultSAXHandler)
8461 xmlFree(ctxt->sax);
8462 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8463 if (ctxt->sax == NULL) {
8464 xmlFree(buf);
8465 xmlFree(ctxt);
8466 return(NULL);
8467 }
8468 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8469 if (user_data != NULL)
8470 ctxt->userData = user_data;
8471 }
8472 if (filename == NULL) {
8473 ctxt->directory = NULL;
8474 } else {
8475 ctxt->directory = xmlParserGetDirectory(filename);
8476 }
8477
8478 inputStream = xmlNewInputStream(ctxt);
8479 if (inputStream == NULL) {
8480 xmlFreeParserCtxt(ctxt);
8481 return(NULL);
8482 }
8483
8484 if (filename == NULL)
8485 inputStream->filename = NULL;
8486 else
8487 inputStream->filename = xmlMemStrdup(filename);
8488 inputStream->buf = buf;
8489 inputStream->base = inputStream->buf->buffer->content;
8490 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008491 inputStream->end =
8492 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008493 if (enc != XML_CHAR_ENCODING_NONE) {
8494 xmlSwitchEncoding(ctxt, enc);
8495 }
8496
8497 inputPush(ctxt, inputStream);
8498
8499 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8500 (ctxt->input->buf != NULL)) {
8501 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8502#ifdef DEBUG_PUSH
8503 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8504#endif
8505 }
8506
8507 return(ctxt);
8508}
8509
8510/**
8511 * xmlCreateIOParserCtxt:
8512 * @sax: a SAX handler
8513 * @user_data: The user data returned on SAX callbacks
8514 * @ioread: an I/O read function
8515 * @ioclose: an I/O close function
8516 * @ioctx: an I/O handler
8517 * @enc: the charset encoding if known
8518 *
8519 * Create a parser context for using the XML parser with an existing
8520 * I/O stream
8521 *
8522 * Returns the new parser context or NULL
8523 */
8524xmlParserCtxtPtr
8525xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8526 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8527 void *ioctx, xmlCharEncoding enc) {
8528 xmlParserCtxtPtr ctxt;
8529 xmlParserInputPtr inputStream;
8530 xmlParserInputBufferPtr buf;
8531
8532 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8533 if (buf == NULL) return(NULL);
8534
8535 ctxt = xmlNewParserCtxt();
8536 if (ctxt == NULL) {
8537 xmlFree(buf);
8538 return(NULL);
8539 }
8540 if (sax != NULL) {
8541 if (ctxt->sax != &xmlDefaultSAXHandler)
8542 xmlFree(ctxt->sax);
8543 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8544 if (ctxt->sax == NULL) {
8545 xmlFree(buf);
8546 xmlFree(ctxt);
8547 return(NULL);
8548 }
8549 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8550 if (user_data != NULL)
8551 ctxt->userData = user_data;
8552 }
8553
8554 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8555 if (inputStream == NULL) {
8556 xmlFreeParserCtxt(ctxt);
8557 return(NULL);
8558 }
8559 inputPush(ctxt, inputStream);
8560
8561 return(ctxt);
8562}
8563
8564/************************************************************************
8565 * *
8566 * Front ends when parsing a Dtd *
8567 * *
8568 ************************************************************************/
8569
8570/**
8571 * xmlIOParseDTD:
8572 * @sax: the SAX handler block or NULL
8573 * @input: an Input Buffer
8574 * @enc: the charset encoding if known
8575 *
8576 * Load and parse a DTD
8577 *
8578 * Returns the resulting xmlDtdPtr or NULL in case of error.
8579 * @input will be freed at parsing end.
8580 */
8581
8582xmlDtdPtr
8583xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8584 xmlCharEncoding enc) {
8585 xmlDtdPtr ret = NULL;
8586 xmlParserCtxtPtr ctxt;
8587 xmlParserInputPtr pinput = NULL;
8588
8589 if (input == NULL)
8590 return(NULL);
8591
8592 ctxt = xmlNewParserCtxt();
8593 if (ctxt == NULL) {
8594 return(NULL);
8595 }
8596
8597 /*
8598 * Set-up the SAX context
8599 */
8600 if (sax != NULL) {
8601 if (ctxt->sax != NULL)
8602 xmlFree(ctxt->sax);
8603 ctxt->sax = sax;
8604 ctxt->userData = NULL;
8605 }
8606
8607 /*
8608 * generate a parser input from the I/O handler
8609 */
8610
8611 pinput = xmlNewIOInputStream(ctxt, input, enc);
8612 if (pinput == NULL) {
8613 if (sax != NULL) ctxt->sax = NULL;
8614 xmlFreeParserCtxt(ctxt);
8615 return(NULL);
8616 }
8617
8618 /*
8619 * plug some encoding conversion routines here.
8620 */
8621 xmlPushInput(ctxt, pinput);
8622
8623 pinput->filename = NULL;
8624 pinput->line = 1;
8625 pinput->col = 1;
8626 pinput->base = ctxt->input->cur;
8627 pinput->cur = ctxt->input->cur;
8628 pinput->free = NULL;
8629
8630 /*
8631 * let's parse that entity knowing it's an external subset.
8632 */
8633 ctxt->inSubset = 2;
8634 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8635 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8636 BAD_CAST "none", BAD_CAST "none");
8637 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8638
8639 if (ctxt->myDoc != NULL) {
8640 if (ctxt->wellFormed) {
8641 ret = ctxt->myDoc->extSubset;
8642 ctxt->myDoc->extSubset = NULL;
8643 } else {
8644 ret = NULL;
8645 }
8646 xmlFreeDoc(ctxt->myDoc);
8647 ctxt->myDoc = NULL;
8648 }
8649 if (sax != NULL) ctxt->sax = NULL;
8650 xmlFreeParserCtxt(ctxt);
8651
8652 return(ret);
8653}
8654
8655/**
8656 * xmlSAXParseDTD:
8657 * @sax: the SAX handler block
8658 * @ExternalID: a NAME* containing the External ID of the DTD
8659 * @SystemID: a NAME* containing the URL to the DTD
8660 *
8661 * Load and parse an external subset.
8662 *
8663 * Returns the resulting xmlDtdPtr or NULL in case of error.
8664 */
8665
8666xmlDtdPtr
8667xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8668 const xmlChar *SystemID) {
8669 xmlDtdPtr ret = NULL;
8670 xmlParserCtxtPtr ctxt;
8671 xmlParserInputPtr input = NULL;
8672 xmlCharEncoding enc;
8673
8674 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8675
8676 ctxt = xmlNewParserCtxt();
8677 if (ctxt == NULL) {
8678 return(NULL);
8679 }
8680
8681 /*
8682 * Set-up the SAX context
8683 */
8684 if (sax != NULL) {
8685 if (ctxt->sax != NULL)
8686 xmlFree(ctxt->sax);
8687 ctxt->sax = sax;
8688 ctxt->userData = NULL;
8689 }
8690
8691 /*
8692 * Ask the Entity resolver to load the damn thing
8693 */
8694
8695 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8696 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8697 if (input == NULL) {
8698 if (sax != NULL) ctxt->sax = NULL;
8699 xmlFreeParserCtxt(ctxt);
8700 return(NULL);
8701 }
8702
8703 /*
8704 * plug some encoding conversion routines here.
8705 */
8706 xmlPushInput(ctxt, input);
8707 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8708 xmlSwitchEncoding(ctxt, enc);
8709
8710 if (input->filename == NULL)
8711 input->filename = (char *) xmlStrdup(SystemID);
8712 input->line = 1;
8713 input->col = 1;
8714 input->base = ctxt->input->cur;
8715 input->cur = ctxt->input->cur;
8716 input->free = NULL;
8717
8718 /*
8719 * let's parse that entity knowing it's an external subset.
8720 */
8721 ctxt->inSubset = 2;
8722 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8723 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8724 ExternalID, SystemID);
8725 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8726
8727 if (ctxt->myDoc != NULL) {
8728 if (ctxt->wellFormed) {
8729 ret = ctxt->myDoc->extSubset;
8730 ctxt->myDoc->extSubset = NULL;
8731 } else {
8732 ret = NULL;
8733 }
8734 xmlFreeDoc(ctxt->myDoc);
8735 ctxt->myDoc = NULL;
8736 }
8737 if (sax != NULL) ctxt->sax = NULL;
8738 xmlFreeParserCtxt(ctxt);
8739
8740 return(ret);
8741}
8742
8743/**
8744 * xmlParseDTD:
8745 * @ExternalID: a NAME* containing the External ID of the DTD
8746 * @SystemID: a NAME* containing the URL to the DTD
8747 *
8748 * Load and parse an external subset.
8749 *
8750 * Returns the resulting xmlDtdPtr or NULL in case of error.
8751 */
8752
8753xmlDtdPtr
8754xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8755 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8756}
8757
8758/************************************************************************
8759 * *
8760 * Front ends when parsing an Entity *
8761 * *
8762 ************************************************************************/
8763
8764/**
Owen Taylor3473f882001-02-23 17:55:21 +00008765 * xmlParseCtxtExternalEntity:
8766 * @ctx: the existing parsing context
8767 * @URL: the URL for the entity to load
8768 * @ID: the System ID for the entity to load
8769 * @list: the return value for the set of parsed nodes
8770 *
8771 * Parse an external general entity within an existing parsing context
8772 * An external general parsed entity is well-formed if it matches the
8773 * production labeled extParsedEnt.
8774 *
8775 * [78] extParsedEnt ::= TextDecl? content
8776 *
8777 * Returns 0 if the entity is well formed, -1 in case of args problem and
8778 * the parser error code otherwise
8779 */
8780
8781int
8782xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8783 const xmlChar *ID, xmlNodePtr *list) {
8784 xmlParserCtxtPtr ctxt;
8785 xmlDocPtr newDoc;
8786 xmlSAXHandlerPtr oldsax = NULL;
8787 int ret = 0;
8788
8789 if (ctx->depth > 40) {
8790 return(XML_ERR_ENTITY_LOOP);
8791 }
8792
8793 if (list != NULL)
8794 *list = NULL;
8795 if ((URL == NULL) && (ID == NULL))
8796 return(-1);
8797 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8798 return(-1);
8799
8800
8801 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8802 if (ctxt == NULL) return(-1);
8803 ctxt->userData = ctxt;
8804 oldsax = ctxt->sax;
8805 ctxt->sax = ctx->sax;
8806 newDoc = xmlNewDoc(BAD_CAST "1.0");
8807 if (newDoc == NULL) {
8808 xmlFreeParserCtxt(ctxt);
8809 return(-1);
8810 }
8811 if (ctx->myDoc != NULL) {
8812 newDoc->intSubset = ctx->myDoc->intSubset;
8813 newDoc->extSubset = ctx->myDoc->extSubset;
8814 }
8815 if (ctx->myDoc->URL != NULL) {
8816 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8817 }
8818 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8819 if (newDoc->children == NULL) {
8820 ctxt->sax = oldsax;
8821 xmlFreeParserCtxt(ctxt);
8822 newDoc->intSubset = NULL;
8823 newDoc->extSubset = NULL;
8824 xmlFreeDoc(newDoc);
8825 return(-1);
8826 }
8827 nodePush(ctxt, newDoc->children);
8828 if (ctx->myDoc == NULL) {
8829 ctxt->myDoc = newDoc;
8830 } else {
8831 ctxt->myDoc = ctx->myDoc;
8832 newDoc->children->doc = ctx->myDoc;
8833 }
8834
8835 /*
8836 * Parse a possible text declaration first
8837 */
8838 GROW;
8839 if ((RAW == '<') && (NXT(1) == '?') &&
8840 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8841 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8842 xmlParseTextDecl(ctxt);
8843 }
8844
8845 /*
8846 * Doing validity checking on chunk doesn't make sense
8847 */
8848 ctxt->instate = XML_PARSER_CONTENT;
8849 ctxt->validate = ctx->validate;
8850 ctxt->loadsubset = ctx->loadsubset;
8851 ctxt->depth = ctx->depth + 1;
8852 ctxt->replaceEntities = ctx->replaceEntities;
8853 if (ctxt->validate) {
8854 ctxt->vctxt.error = ctx->vctxt.error;
8855 ctxt->vctxt.warning = ctx->vctxt.warning;
8856 /* Allocate the Node stack */
8857 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8858 if (ctxt->vctxt.nodeTab == NULL) {
8859 xmlGenericError(xmlGenericErrorContext,
8860 "xmlParseCtxtExternalEntity: out of memory\n");
8861 ctxt->validate = 0;
8862 ctxt->vctxt.error = NULL;
8863 ctxt->vctxt.warning = NULL;
8864 } else {
8865 ctxt->vctxt.nodeNr = 0;
8866 ctxt->vctxt.nodeMax = 4;
8867 ctxt->vctxt.node = NULL;
8868 }
8869 } else {
8870 ctxt->vctxt.error = NULL;
8871 ctxt->vctxt.warning = NULL;
8872 }
8873
8874 xmlParseContent(ctxt);
8875
8876 if ((RAW == '<') && (NXT(1) == '/')) {
8877 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8879 ctxt->sax->error(ctxt->userData,
8880 "chunk is not well balanced\n");
8881 ctxt->wellFormed = 0;
8882 ctxt->disableSAX = 1;
8883 } else if (RAW != 0) {
8884 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8885 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8886 ctxt->sax->error(ctxt->userData,
8887 "extra content at the end of well balanced chunk\n");
8888 ctxt->wellFormed = 0;
8889 ctxt->disableSAX = 1;
8890 }
8891 if (ctxt->node != newDoc->children) {
8892 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8893 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8894 ctxt->sax->error(ctxt->userData,
8895 "chunk is not well balanced\n");
8896 ctxt->wellFormed = 0;
8897 ctxt->disableSAX = 1;
8898 }
8899
8900 if (!ctxt->wellFormed) {
8901 if (ctxt->errNo == 0)
8902 ret = 1;
8903 else
8904 ret = ctxt->errNo;
8905 } else {
8906 if (list != NULL) {
8907 xmlNodePtr cur;
8908
8909 /*
8910 * Return the newly created nodeset after unlinking it from
8911 * they pseudo parent.
8912 */
8913 cur = newDoc->children->children;
8914 *list = cur;
8915 while (cur != NULL) {
8916 cur->parent = NULL;
8917 cur = cur->next;
8918 }
8919 newDoc->children->children = NULL;
8920 }
8921 ret = 0;
8922 }
8923 ctxt->sax = oldsax;
8924 xmlFreeParserCtxt(ctxt);
8925 newDoc->intSubset = NULL;
8926 newDoc->extSubset = NULL;
8927 xmlFreeDoc(newDoc);
8928
8929 return(ret);
8930}
8931
8932/**
Daniel Veillard257d9102001-05-08 10:41:44 +00008933 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00008934 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00008935 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00008936 * @sax: the SAX handler bloc (possibly NULL)
8937 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8938 * @depth: Used for loop detection, use 0
8939 * @URL: the URL for the entity to load
8940 * @ID: the System ID for the entity to load
8941 * @list: the return value for the set of parsed nodes
8942 *
Daniel Veillard257d9102001-05-08 10:41:44 +00008943 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00008944 *
8945 * Returns 0 if the entity is well formed, -1 in case of args problem and
8946 * the parser error code otherwise
8947 */
8948
Daniel Veillard257d9102001-05-08 10:41:44 +00008949static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00008950xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
8951 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00008952 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00008953 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00008954 xmlParserCtxtPtr ctxt;
8955 xmlDocPtr newDoc;
8956 xmlSAXHandlerPtr oldsax = NULL;
8957 int ret = 0;
8958
8959 if (depth > 40) {
8960 return(XML_ERR_ENTITY_LOOP);
8961 }
8962
8963
8964
8965 if (list != NULL)
8966 *list = NULL;
8967 if ((URL == NULL) && (ID == NULL))
8968 return(-1);
8969 if (doc == NULL) /* @@ relax but check for dereferences */
8970 return(-1);
8971
8972
8973 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8974 if (ctxt == NULL) return(-1);
8975 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00008976 if (oldctxt != NULL) {
8977 ctxt->_private = oldctxt->_private;
8978 ctxt->loadsubset = oldctxt->loadsubset;
8979 ctxt->validate = oldctxt->validate;
8980 ctxt->external = oldctxt->external;
8981 } else {
8982 /*
8983 * Doing validity checking on chunk without context
8984 * doesn't make sense
8985 */
8986 ctxt->_private = NULL;
8987 ctxt->validate = 0;
8988 ctxt->external = 2;
8989 ctxt->loadsubset = 0;
8990 }
Owen Taylor3473f882001-02-23 17:55:21 +00008991 if (sax != NULL) {
8992 oldsax = ctxt->sax;
8993 ctxt->sax = sax;
8994 if (user_data != NULL)
8995 ctxt->userData = user_data;
8996 }
8997 newDoc = xmlNewDoc(BAD_CAST "1.0");
8998 if (newDoc == NULL) {
8999 xmlFreeParserCtxt(ctxt);
9000 return(-1);
9001 }
9002 if (doc != NULL) {
9003 newDoc->intSubset = doc->intSubset;
9004 newDoc->extSubset = doc->extSubset;
9005 }
9006 if (doc->URL != NULL) {
9007 newDoc->URL = xmlStrdup(doc->URL);
9008 }
9009 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9010 if (newDoc->children == NULL) {
9011 if (sax != NULL)
9012 ctxt->sax = oldsax;
9013 xmlFreeParserCtxt(ctxt);
9014 newDoc->intSubset = NULL;
9015 newDoc->extSubset = NULL;
9016 xmlFreeDoc(newDoc);
9017 return(-1);
9018 }
9019 nodePush(ctxt, newDoc->children);
9020 if (doc == NULL) {
9021 ctxt->myDoc = newDoc;
9022 } else {
9023 ctxt->myDoc = doc;
9024 newDoc->children->doc = doc;
9025 }
9026
9027 /*
9028 * Parse a possible text declaration first
9029 */
9030 GROW;
9031 if ((RAW == '<') && (NXT(1) == '?') &&
9032 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9033 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9034 xmlParseTextDecl(ctxt);
9035 }
9036
Owen Taylor3473f882001-02-23 17:55:21 +00009037 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009038 ctxt->depth = depth;
9039
9040 xmlParseContent(ctxt);
9041
9042 if ((RAW == '<') && (NXT(1) == '/')) {
9043 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9045 ctxt->sax->error(ctxt->userData,
9046 "chunk is not well balanced\n");
9047 ctxt->wellFormed = 0;
9048 ctxt->disableSAX = 1;
9049 } else if (RAW != 0) {
9050 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9051 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9052 ctxt->sax->error(ctxt->userData,
9053 "extra content at the end of well balanced chunk\n");
9054 ctxt->wellFormed = 0;
9055 ctxt->disableSAX = 1;
9056 }
9057 if (ctxt->node != newDoc->children) {
9058 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9060 ctxt->sax->error(ctxt->userData,
9061 "chunk is not well balanced\n");
9062 ctxt->wellFormed = 0;
9063 ctxt->disableSAX = 1;
9064 }
9065
9066 if (!ctxt->wellFormed) {
9067 if (ctxt->errNo == 0)
9068 ret = 1;
9069 else
9070 ret = ctxt->errNo;
9071 } else {
9072 if (list != NULL) {
9073 xmlNodePtr cur;
9074
9075 /*
9076 * Return the newly created nodeset after unlinking it from
9077 * they pseudo parent.
9078 */
9079 cur = newDoc->children->children;
9080 *list = cur;
9081 while (cur != NULL) {
9082 cur->parent = NULL;
9083 cur = cur->next;
9084 }
9085 newDoc->children->children = NULL;
9086 }
9087 ret = 0;
9088 }
9089 if (sax != NULL)
9090 ctxt->sax = oldsax;
9091 xmlFreeParserCtxt(ctxt);
9092 newDoc->intSubset = NULL;
9093 newDoc->extSubset = NULL;
9094 xmlFreeDoc(newDoc);
9095
9096 return(ret);
9097}
9098
9099/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009100 * xmlParseExternalEntity:
9101 * @doc: the document the chunk pertains to
9102 * @sax: the SAX handler bloc (possibly NULL)
9103 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9104 * @depth: Used for loop detection, use 0
9105 * @URL: the URL for the entity to load
9106 * @ID: the System ID for the entity to load
9107 * @list: the return value for the set of parsed nodes
9108 *
9109 * Parse an external general entity
9110 * An external general parsed entity is well-formed if it matches the
9111 * production labeled extParsedEnt.
9112 *
9113 * [78] extParsedEnt ::= TextDecl? content
9114 *
9115 * Returns 0 if the entity is well formed, -1 in case of args problem and
9116 * the parser error code otherwise
9117 */
9118
9119int
9120xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9121 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009122 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
9123 ID, list));
Daniel Veillard257d9102001-05-08 10:41:44 +00009124}
9125
9126/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009127 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009128 * @doc: the document the chunk pertains to
9129 * @sax: the SAX handler bloc (possibly NULL)
9130 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9131 * @depth: Used for loop detection, use 0
9132 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9133 * @list: the return value for the set of parsed nodes
9134 *
9135 * Parse a well-balanced chunk of an XML document
9136 * called by the parser
9137 * The allowed sequence for the Well Balanced Chunk is the one defined by
9138 * the content production in the XML grammar:
9139 *
9140 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9141 *
9142 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9143 * the parser error code otherwise
9144 */
9145
9146int
9147xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9148 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9149 xmlParserCtxtPtr ctxt;
9150 xmlDocPtr newDoc;
9151 xmlSAXHandlerPtr oldsax = NULL;
9152 int size;
9153 int ret = 0;
9154
9155 if (depth > 40) {
9156 return(XML_ERR_ENTITY_LOOP);
9157 }
9158
9159
9160 if (list != NULL)
9161 *list = NULL;
9162 if (string == NULL)
9163 return(-1);
9164
9165 size = xmlStrlen(string);
9166
9167 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9168 if (ctxt == NULL) return(-1);
9169 ctxt->userData = ctxt;
9170 if (sax != NULL) {
9171 oldsax = ctxt->sax;
9172 ctxt->sax = sax;
9173 if (user_data != NULL)
9174 ctxt->userData = user_data;
9175 }
9176 newDoc = xmlNewDoc(BAD_CAST "1.0");
9177 if (newDoc == NULL) {
9178 xmlFreeParserCtxt(ctxt);
9179 return(-1);
9180 }
9181 if (doc != NULL) {
9182 newDoc->intSubset = doc->intSubset;
9183 newDoc->extSubset = doc->extSubset;
9184 }
9185 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9186 if (newDoc->children == NULL) {
9187 if (sax != NULL)
9188 ctxt->sax = oldsax;
9189 xmlFreeParserCtxt(ctxt);
9190 newDoc->intSubset = NULL;
9191 newDoc->extSubset = NULL;
9192 xmlFreeDoc(newDoc);
9193 return(-1);
9194 }
9195 nodePush(ctxt, newDoc->children);
9196 if (doc == NULL) {
9197 ctxt->myDoc = newDoc;
9198 } else {
9199 ctxt->myDoc = doc;
9200 newDoc->children->doc = doc;
9201 }
9202 ctxt->instate = XML_PARSER_CONTENT;
9203 ctxt->depth = depth;
9204
9205 /*
9206 * Doing validity checking on chunk doesn't make sense
9207 */
9208 ctxt->validate = 0;
9209 ctxt->loadsubset = 0;
9210
9211 xmlParseContent(ctxt);
9212
9213 if ((RAW == '<') && (NXT(1) == '/')) {
9214 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9216 ctxt->sax->error(ctxt->userData,
9217 "chunk is not well balanced\n");
9218 ctxt->wellFormed = 0;
9219 ctxt->disableSAX = 1;
9220 } else if (RAW != 0) {
9221 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9223 ctxt->sax->error(ctxt->userData,
9224 "extra content at the end of well balanced chunk\n");
9225 ctxt->wellFormed = 0;
9226 ctxt->disableSAX = 1;
9227 }
9228 if (ctxt->node != newDoc->children) {
9229 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9231 ctxt->sax->error(ctxt->userData,
9232 "chunk is not well balanced\n");
9233 ctxt->wellFormed = 0;
9234 ctxt->disableSAX = 1;
9235 }
9236
9237 if (!ctxt->wellFormed) {
9238 if (ctxt->errNo == 0)
9239 ret = 1;
9240 else
9241 ret = ctxt->errNo;
9242 } else {
9243 if (list != NULL) {
9244 xmlNodePtr cur;
9245
9246 /*
9247 * Return the newly created nodeset after unlinking it from
9248 * they pseudo parent.
9249 */
9250 cur = newDoc->children->children;
9251 *list = cur;
9252 while (cur != NULL) {
9253 cur->parent = NULL;
9254 cur = cur->next;
9255 }
9256 newDoc->children->children = NULL;
9257 }
9258 ret = 0;
9259 }
9260 if (sax != NULL)
9261 ctxt->sax = oldsax;
9262 xmlFreeParserCtxt(ctxt);
9263 newDoc->intSubset = NULL;
9264 newDoc->extSubset = NULL;
9265 xmlFreeDoc(newDoc);
9266
9267 return(ret);
9268}
9269
9270/**
9271 * xmlSAXParseEntity:
9272 * @sax: the SAX handler block
9273 * @filename: the filename
9274 *
9275 * parse an XML external entity out of context and build a tree.
9276 * It use the given SAX function block to handle the parsing callback.
9277 * If sax is NULL, fallback to the default DOM tree building routines.
9278 *
9279 * [78] extParsedEnt ::= TextDecl? content
9280 *
9281 * This correspond to a "Well Balanced" chunk
9282 *
9283 * Returns the resulting document tree
9284 */
9285
9286xmlDocPtr
9287xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9288 xmlDocPtr ret;
9289 xmlParserCtxtPtr ctxt;
9290 char *directory = NULL;
9291
9292 ctxt = xmlCreateFileParserCtxt(filename);
9293 if (ctxt == NULL) {
9294 return(NULL);
9295 }
9296 if (sax != NULL) {
9297 if (ctxt->sax != NULL)
9298 xmlFree(ctxt->sax);
9299 ctxt->sax = sax;
9300 ctxt->userData = NULL;
9301 }
9302
9303 if ((ctxt->directory == NULL) && (directory == NULL))
9304 directory = xmlParserGetDirectory(filename);
9305
9306 xmlParseExtParsedEnt(ctxt);
9307
9308 if (ctxt->wellFormed)
9309 ret = ctxt->myDoc;
9310 else {
9311 ret = NULL;
9312 xmlFreeDoc(ctxt->myDoc);
9313 ctxt->myDoc = NULL;
9314 }
9315 if (sax != NULL)
9316 ctxt->sax = NULL;
9317 xmlFreeParserCtxt(ctxt);
9318
9319 return(ret);
9320}
9321
9322/**
9323 * xmlParseEntity:
9324 * @filename: the filename
9325 *
9326 * parse an XML external entity out of context and build a tree.
9327 *
9328 * [78] extParsedEnt ::= TextDecl? content
9329 *
9330 * This correspond to a "Well Balanced" chunk
9331 *
9332 * Returns the resulting document tree
9333 */
9334
9335xmlDocPtr
9336xmlParseEntity(const char *filename) {
9337 return(xmlSAXParseEntity(NULL, filename));
9338}
9339
9340/**
9341 * xmlCreateEntityParserCtxt:
9342 * @URL: the entity URL
9343 * @ID: the entity PUBLIC ID
9344 * @base: a posible base for the target URI
9345 *
9346 * Create a parser context for an external entity
9347 * Automatic support for ZLIB/Compress compressed document is provided
9348 * by default if found at compile-time.
9349 *
9350 * Returns the new parser context or NULL
9351 */
9352xmlParserCtxtPtr
9353xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9354 const xmlChar *base) {
9355 xmlParserCtxtPtr ctxt;
9356 xmlParserInputPtr inputStream;
9357 char *directory = NULL;
9358 xmlChar *uri;
9359
9360 ctxt = xmlNewParserCtxt();
9361 if (ctxt == NULL) {
9362 return(NULL);
9363 }
9364
9365 uri = xmlBuildURI(URL, base);
9366
9367 if (uri == NULL) {
9368 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9369 if (inputStream == NULL) {
9370 xmlFreeParserCtxt(ctxt);
9371 return(NULL);
9372 }
9373
9374 inputPush(ctxt, inputStream);
9375
9376 if ((ctxt->directory == NULL) && (directory == NULL))
9377 directory = xmlParserGetDirectory((char *)URL);
9378 if ((ctxt->directory == NULL) && (directory != NULL))
9379 ctxt->directory = directory;
9380 } else {
9381 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9382 if (inputStream == NULL) {
9383 xmlFree(uri);
9384 xmlFreeParserCtxt(ctxt);
9385 return(NULL);
9386 }
9387
9388 inputPush(ctxt, inputStream);
9389
9390 if ((ctxt->directory == NULL) && (directory == NULL))
9391 directory = xmlParserGetDirectory((char *)uri);
9392 if ((ctxt->directory == NULL) && (directory != NULL))
9393 ctxt->directory = directory;
9394 xmlFree(uri);
9395 }
9396
9397 return(ctxt);
9398}
9399
9400/************************************************************************
9401 * *
9402 * Front ends when parsing from a file *
9403 * *
9404 ************************************************************************/
9405
9406/**
9407 * xmlCreateFileParserCtxt:
9408 * @filename: the filename
9409 *
9410 * Create a parser context for a file content.
9411 * Automatic support for ZLIB/Compress compressed document is provided
9412 * by default if found at compile-time.
9413 *
9414 * Returns the new parser context or NULL
9415 */
9416xmlParserCtxtPtr
9417xmlCreateFileParserCtxt(const char *filename)
9418{
9419 xmlParserCtxtPtr ctxt;
9420 xmlParserInputPtr inputStream;
9421 xmlParserInputBufferPtr buf;
9422 char *directory = NULL;
9423
9424 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9425 if (buf == NULL) {
9426 return(NULL);
9427 }
9428
9429 ctxt = xmlNewParserCtxt();
9430 if (ctxt == NULL) {
9431 if (xmlDefaultSAXHandler.error != NULL) {
9432 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9433 }
9434 return(NULL);
9435 }
9436
9437 inputStream = xmlNewInputStream(ctxt);
9438 if (inputStream == NULL) {
9439 xmlFreeParserCtxt(ctxt);
9440 return(NULL);
9441 }
9442
9443 inputStream->filename = xmlMemStrdup(filename);
9444 inputStream->buf = buf;
9445 inputStream->base = inputStream->buf->buffer->content;
9446 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009447 inputStream->end =
9448 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009449
9450 inputPush(ctxt, inputStream);
9451 if ((ctxt->directory == NULL) && (directory == NULL))
9452 directory = xmlParserGetDirectory(filename);
9453 if ((ctxt->directory == NULL) && (directory != NULL))
9454 ctxt->directory = directory;
9455
9456 return(ctxt);
9457}
9458
9459/**
9460 * xmlSAXParseFile:
9461 * @sax: the SAX handler block
9462 * @filename: the filename
9463 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9464 * documents
9465 *
9466 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9467 * compressed document is provided by default if found at compile-time.
9468 * It use the given SAX function block to handle the parsing callback.
9469 * If sax is NULL, fallback to the default DOM tree building routines.
9470 *
9471 * Returns the resulting document tree
9472 */
9473
9474xmlDocPtr
9475xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9476 int recovery) {
9477 xmlDocPtr ret;
9478 xmlParserCtxtPtr ctxt;
9479 char *directory = NULL;
9480
9481 ctxt = xmlCreateFileParserCtxt(filename);
9482 if (ctxt == NULL) {
9483 return(NULL);
9484 }
9485 if (sax != NULL) {
9486 if (ctxt->sax != NULL)
9487 xmlFree(ctxt->sax);
9488 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009489 }
9490
9491 if ((ctxt->directory == NULL) && (directory == NULL))
9492 directory = xmlParserGetDirectory(filename);
9493 if ((ctxt->directory == NULL) && (directory != NULL))
9494 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9495
9496 xmlParseDocument(ctxt);
9497
9498 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9499 else {
9500 ret = NULL;
9501 xmlFreeDoc(ctxt->myDoc);
9502 ctxt->myDoc = NULL;
9503 }
9504 if (sax != NULL)
9505 ctxt->sax = NULL;
9506 xmlFreeParserCtxt(ctxt);
9507
9508 return(ret);
9509}
9510
9511/**
9512 * xmlRecoverDoc:
9513 * @cur: a pointer to an array of xmlChar
9514 *
9515 * parse an XML in-memory document and build a tree.
9516 * In the case the document is not Well Formed, a tree is built anyway
9517 *
9518 * Returns the resulting document tree
9519 */
9520
9521xmlDocPtr
9522xmlRecoverDoc(xmlChar *cur) {
9523 return(xmlSAXParseDoc(NULL, cur, 1));
9524}
9525
9526/**
9527 * xmlParseFile:
9528 * @filename: the filename
9529 *
9530 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9531 * compressed document is provided by default if found at compile-time.
9532 *
9533 * Returns the resulting document tree
9534 */
9535
9536xmlDocPtr
9537xmlParseFile(const char *filename) {
9538 return(xmlSAXParseFile(NULL, filename, 0));
9539}
9540
9541/**
9542 * xmlRecoverFile:
9543 * @filename: the filename
9544 *
9545 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9546 * compressed document is provided by default if found at compile-time.
9547 * In the case the document is not Well Formed, a tree is built anyway
9548 *
9549 * Returns the resulting document tree
9550 */
9551
9552xmlDocPtr
9553xmlRecoverFile(const char *filename) {
9554 return(xmlSAXParseFile(NULL, filename, 1));
9555}
9556
9557
9558/**
9559 * xmlSetupParserForBuffer:
9560 * @ctxt: an XML parser context
9561 * @buffer: a xmlChar * buffer
9562 * @filename: a file name
9563 *
9564 * Setup the parser context to parse a new buffer; Clears any prior
9565 * contents from the parser context. The buffer parameter must not be
9566 * NULL, but the filename parameter can be
9567 */
9568void
9569xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9570 const char* filename)
9571{
9572 xmlParserInputPtr input;
9573
9574 input = xmlNewInputStream(ctxt);
9575 if (input == NULL) {
9576 perror("malloc");
9577 xmlFree(ctxt);
9578 return;
9579 }
9580
9581 xmlClearParserCtxt(ctxt);
9582 if (filename != NULL)
9583 input->filename = xmlMemStrdup(filename);
9584 input->base = buffer;
9585 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009586 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009587 inputPush(ctxt, input);
9588}
9589
9590/**
9591 * xmlSAXUserParseFile:
9592 * @sax: a SAX handler
9593 * @user_data: The user data returned on SAX callbacks
9594 * @filename: a file name
9595 *
9596 * parse an XML file and call the given SAX handler routines.
9597 * Automatic support for ZLIB/Compress compressed document is provided
9598 *
9599 * Returns 0 in case of success or a error number otherwise
9600 */
9601int
9602xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9603 const char *filename) {
9604 int ret = 0;
9605 xmlParserCtxtPtr ctxt;
9606
9607 ctxt = xmlCreateFileParserCtxt(filename);
9608 if (ctxt == NULL) return -1;
9609 if (ctxt->sax != &xmlDefaultSAXHandler)
9610 xmlFree(ctxt->sax);
9611 ctxt->sax = sax;
9612 if (user_data != NULL)
9613 ctxt->userData = user_data;
9614
9615 xmlParseDocument(ctxt);
9616
9617 if (ctxt->wellFormed)
9618 ret = 0;
9619 else {
9620 if (ctxt->errNo != 0)
9621 ret = ctxt->errNo;
9622 else
9623 ret = -1;
9624 }
9625 if (sax != NULL)
9626 ctxt->sax = NULL;
9627 xmlFreeParserCtxt(ctxt);
9628
9629 return ret;
9630}
9631
9632/************************************************************************
9633 * *
9634 * Front ends when parsing from memory *
9635 * *
9636 ************************************************************************/
9637
9638/**
9639 * xmlCreateMemoryParserCtxt:
9640 * @buffer: a pointer to a char array
9641 * @size: the size of the array
9642 *
9643 * Create a parser context for an XML in-memory document.
9644 *
9645 * Returns the new parser context or NULL
9646 */
9647xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009648xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009649 xmlParserCtxtPtr ctxt;
9650 xmlParserInputPtr input;
9651 xmlParserInputBufferPtr buf;
9652
9653 if (buffer == NULL)
9654 return(NULL);
9655 if (size <= 0)
9656 return(NULL);
9657
9658 ctxt = xmlNewParserCtxt();
9659 if (ctxt == NULL)
9660 return(NULL);
9661
9662 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9663 if (buf == NULL) return(NULL);
9664
9665 input = xmlNewInputStream(ctxt);
9666 if (input == NULL) {
9667 xmlFreeParserCtxt(ctxt);
9668 return(NULL);
9669 }
9670
9671 input->filename = NULL;
9672 input->buf = buf;
9673 input->base = input->buf->buffer->content;
9674 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009675 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009676
9677 inputPush(ctxt, input);
9678 return(ctxt);
9679}
9680
9681/**
9682 * xmlSAXParseMemory:
9683 * @sax: the SAX handler block
9684 * @buffer: an pointer to a char array
9685 * @size: the size of the array
9686 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9687 * documents
9688 *
9689 * parse an XML in-memory block and use the given SAX function block
9690 * to handle the parsing callback. If sax is NULL, fallback to the default
9691 * DOM tree building routines.
9692 *
9693 * Returns the resulting document tree
9694 */
9695xmlDocPtr
9696xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9697 xmlDocPtr ret;
9698 xmlParserCtxtPtr ctxt;
9699
9700 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9701 if (ctxt == NULL) return(NULL);
9702 if (sax != NULL) {
9703 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009704 }
9705
9706 xmlParseDocument(ctxt);
9707
9708 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9709 else {
9710 ret = NULL;
9711 xmlFreeDoc(ctxt->myDoc);
9712 ctxt->myDoc = NULL;
9713 }
9714 if (sax != NULL)
9715 ctxt->sax = NULL;
9716 xmlFreeParserCtxt(ctxt);
9717
9718 return(ret);
9719}
9720
9721/**
9722 * xmlParseMemory:
9723 * @buffer: an pointer to a char array
9724 * @size: the size of the array
9725 *
9726 * parse an XML in-memory block and build a tree.
9727 *
9728 * Returns the resulting document tree
9729 */
9730
9731xmlDocPtr xmlParseMemory(char *buffer, int size) {
9732 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9733}
9734
9735/**
9736 * xmlRecoverMemory:
9737 * @buffer: an pointer to a char array
9738 * @size: the size of the array
9739 *
9740 * parse an XML in-memory block and build a tree.
9741 * In the case the document is not Well Formed, a tree is built anyway
9742 *
9743 * Returns the resulting document tree
9744 */
9745
9746xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9747 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9748}
9749
9750/**
9751 * xmlSAXUserParseMemory:
9752 * @sax: a SAX handler
9753 * @user_data: The user data returned on SAX callbacks
9754 * @buffer: an in-memory XML document input
9755 * @size: the length of the XML document in bytes
9756 *
9757 * A better SAX parsing routine.
9758 * parse an XML in-memory buffer and call the given SAX handler routines.
9759 *
9760 * Returns 0 in case of success or a error number otherwise
9761 */
9762int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009763 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009764 int ret = 0;
9765 xmlParserCtxtPtr ctxt;
9766 xmlSAXHandlerPtr oldsax = NULL;
9767
9768 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9769 if (ctxt == NULL) return -1;
9770 if (sax != NULL) {
9771 oldsax = ctxt->sax;
9772 ctxt->sax = sax;
9773 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009774 if (user_data != NULL)
9775 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009776
9777 xmlParseDocument(ctxt);
9778
9779 if (ctxt->wellFormed)
9780 ret = 0;
9781 else {
9782 if (ctxt->errNo != 0)
9783 ret = ctxt->errNo;
9784 else
9785 ret = -1;
9786 }
9787 if (sax != NULL) {
9788 ctxt->sax = oldsax;
9789 }
9790 xmlFreeParserCtxt(ctxt);
9791
9792 return ret;
9793}
9794
9795/**
9796 * xmlCreateDocParserCtxt:
9797 * @cur: a pointer to an array of xmlChar
9798 *
9799 * Creates a parser context for an XML in-memory document.
9800 *
9801 * Returns the new parser context or NULL
9802 */
9803xmlParserCtxtPtr
9804xmlCreateDocParserCtxt(xmlChar *cur) {
9805 int len;
9806
9807 if (cur == NULL)
9808 return(NULL);
9809 len = xmlStrlen(cur);
9810 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9811}
9812
9813/**
9814 * xmlSAXParseDoc:
9815 * @sax: the SAX handler block
9816 * @cur: a pointer to an array of xmlChar
9817 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9818 * documents
9819 *
9820 * parse an XML in-memory document and build a tree.
9821 * It use the given SAX function block to handle the parsing callback.
9822 * If sax is NULL, fallback to the default DOM tree building routines.
9823 *
9824 * Returns the resulting document tree
9825 */
9826
9827xmlDocPtr
9828xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9829 xmlDocPtr ret;
9830 xmlParserCtxtPtr ctxt;
9831
9832 if (cur == NULL) return(NULL);
9833
9834
9835 ctxt = xmlCreateDocParserCtxt(cur);
9836 if (ctxt == NULL) return(NULL);
9837 if (sax != NULL) {
9838 ctxt->sax = sax;
9839 ctxt->userData = NULL;
9840 }
9841
9842 xmlParseDocument(ctxt);
9843 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9844 else {
9845 ret = NULL;
9846 xmlFreeDoc(ctxt->myDoc);
9847 ctxt->myDoc = NULL;
9848 }
9849 if (sax != NULL)
9850 ctxt->sax = NULL;
9851 xmlFreeParserCtxt(ctxt);
9852
9853 return(ret);
9854}
9855
9856/**
9857 * xmlParseDoc:
9858 * @cur: a pointer to an array of xmlChar
9859 *
9860 * parse an XML in-memory document and build a tree.
9861 *
9862 * Returns the resulting document tree
9863 */
9864
9865xmlDocPtr
9866xmlParseDoc(xmlChar *cur) {
9867 return(xmlSAXParseDoc(NULL, cur, 0));
9868}
9869
9870
9871/************************************************************************
9872 * *
9873 * Miscellaneous *
9874 * *
9875 ************************************************************************/
9876
9877#ifdef LIBXML_XPATH_ENABLED
9878#include <libxml/xpath.h>
9879#endif
9880
9881static int xmlParserInitialized = 0;
9882
9883/**
9884 * xmlInitParser:
9885 *
9886 * Initialization function for the XML parser.
9887 * This is not reentrant. Call once before processing in case of
9888 * use in multithreaded programs.
9889 */
9890
9891void
9892xmlInitParser(void) {
9893 if (xmlParserInitialized) return;
9894
9895 xmlInitCharEncodingHandlers();
9896 xmlInitializePredefinedEntities();
9897 xmlDefaultSAXHandlerInit();
9898 xmlRegisterDefaultInputCallbacks();
9899 xmlRegisterDefaultOutputCallbacks();
9900#ifdef LIBXML_HTML_ENABLED
9901 htmlInitAutoClose();
9902 htmlDefaultSAXHandlerInit();
9903#endif
9904#ifdef LIBXML_XPATH_ENABLED
9905 xmlXPathInit();
9906#endif
9907 xmlParserInitialized = 1;
9908}
9909
9910/**
9911 * xmlCleanupParser:
9912 *
9913 * Cleanup function for the XML parser. It tries to reclaim all
9914 * parsing related global memory allocated for the parser processing.
9915 * It doesn't deallocate any document related memory. Calling this
9916 * function should not prevent reusing the parser.
9917 */
9918
9919void
9920xmlCleanupParser(void) {
9921 xmlParserInitialized = 0;
9922 xmlCleanupCharEncodingHandlers();
9923 xmlCleanupPredefinedEntities();
9924}
9925
9926/**
9927 * xmlPedanticParserDefault:
9928 * @val: int 0 or 1
9929 *
9930 * Set and return the previous value for enabling pedantic warnings.
9931 *
9932 * Returns the last value for 0 for no substitution, 1 for substitution.
9933 */
9934
9935int
9936xmlPedanticParserDefault(int val) {
9937 int old = xmlPedanticParserDefaultValue;
9938
9939 xmlPedanticParserDefaultValue = val;
9940 return(old);
9941}
9942
9943/**
9944 * xmlSubstituteEntitiesDefault:
9945 * @val: int 0 or 1
9946 *
9947 * Set and return the previous value for default entity support.
9948 * Initially the parser always keep entity references instead of substituting
9949 * entity values in the output. This function has to be used to change the
9950 * default parser behaviour
9951 * SAX::subtituteEntities() has to be used for changing that on a file by
9952 * file basis.
9953 *
9954 * Returns the last value for 0 for no substitution, 1 for substitution.
9955 */
9956
9957int
9958xmlSubstituteEntitiesDefault(int val) {
9959 int old = xmlSubstituteEntitiesDefaultValue;
9960
9961 xmlSubstituteEntitiesDefaultValue = val;
9962 return(old);
9963}
9964
9965/**
9966 * xmlKeepBlanksDefault:
9967 * @val: int 0 or 1
9968 *
9969 * Set and return the previous value for default blanks text nodes support.
9970 * The 1.x version of the parser used an heuristic to try to detect
9971 * ignorable white spaces. As a result the SAX callback was generating
9972 * ignorableWhitespace() callbacks instead of characters() one, and when
9973 * using the DOM output text nodes containing those blanks were not generated.
9974 * The 2.x and later version will switch to the XML standard way and
9975 * ignorableWhitespace() are only generated when running the parser in
9976 * validating mode and when the current element doesn't allow CDATA or
9977 * mixed content.
9978 * This function is provided as a way to force the standard behaviour
9979 * on 1.X libs and to switch back to the old mode for compatibility when
9980 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9981 * by using xmlIsBlankNode() commodity function to detect the "empty"
9982 * nodes generated.
9983 * This value also affect autogeneration of indentation when saving code
9984 * if blanks sections are kept, indentation is not generated.
9985 *
9986 * Returns the last value for 0 for no substitution, 1 for substitution.
9987 */
9988
9989int
9990xmlKeepBlanksDefault(int val) {
9991 int old = xmlKeepBlanksDefaultValue;
9992
9993 xmlKeepBlanksDefaultValue = val;
9994 xmlIndentTreeOutput = !val;
9995 return(old);
9996}
9997