blob: 9c76c8fcd48f7d6e2d465411ddee8ead81244910 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
83int xmlGetWarningsDefaultValue = 1;
84int xmlParserDebugEntities = 0;
85#ifdef VMS
86int xmlSubstituteEntitiesDefaultVal = 0;
87#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
88int xmlDoValidityCheckingDefaultVal = 0;
89#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
90#else
91int xmlSubstituteEntitiesDefaultValue = 0;
92int xmlDoValidityCheckingDefaultValue = 0;
93#endif
94int xmlLoadExtDtdDefaultValue = 0;
95int xmlPedanticParserDefaultValue = 0;
96int xmlKeepBlanksDefaultValue = 1;
97
98/*
99 * List of XML prefixed PI allowed by W3C specs
100 */
101
102const char *xmlW3CPIs[] = {
103 "xml-stylesheet",
104 NULL
105};
106
107/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
108void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110 const xmlChar **str);
111
Daniel Veillard257d9102001-05-08 10:41:44 +0000112static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000113xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
114 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000115 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000116 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000117
118/************************************************************************
119 * *
120 * Parser stacks related functions and macros *
121 * *
122 ************************************************************************/
123
124xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
125 const xmlChar ** str);
126
127/*
128 * Generic function for accessing stacks in the Parser Context
129 */
130
131#define PUSH_AND_POP(scope, type, name) \
132scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
133 if (ctxt->name##Nr >= ctxt->name##Max) { \
134 ctxt->name##Max *= 2; \
135 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
136 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
137 if (ctxt->name##Tab == NULL) { \
138 xmlGenericError(xmlGenericErrorContext, \
139 "realloc failed !\n"); \
140 return(0); \
141 } \
142 } \
143 ctxt->name##Tab[ctxt->name##Nr] = value; \
144 ctxt->name = value; \
145 return(ctxt->name##Nr++); \
146} \
147scope type name##Pop(xmlParserCtxtPtr ctxt) { \
148 type ret; \
149 if (ctxt->name##Nr <= 0) return(0); \
150 ctxt->name##Nr--; \
151 if (ctxt->name##Nr > 0) \
152 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
153 else \
154 ctxt->name = NULL; \
155 ret = ctxt->name##Tab[ctxt->name##Nr]; \
156 ctxt->name##Tab[ctxt->name##Nr] = 0; \
157 return(ret); \
158} \
159
160/*
161 * Those macros actually generate the functions
162 */
163PUSH_AND_POP(extern, xmlParserInputPtr, input)
164PUSH_AND_POP(extern, xmlNodePtr, node)
165PUSH_AND_POP(extern, xmlChar*, name)
166
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000167static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000168 if (ctxt->spaceNr >= ctxt->spaceMax) {
169 ctxt->spaceMax *= 2;
170 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
171 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
172 if (ctxt->spaceTab == NULL) {
173 xmlGenericError(xmlGenericErrorContext,
174 "realloc failed !\n");
175 return(0);
176 }
177 }
178 ctxt->spaceTab[ctxt->spaceNr] = val;
179 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
180 return(ctxt->spaceNr++);
181}
182
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000183static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000184 int ret;
185 if (ctxt->spaceNr <= 0) return(0);
186 ctxt->spaceNr--;
187 if (ctxt->spaceNr > 0)
188 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
189 else
190 ctxt->space = NULL;
191 ret = ctxt->spaceTab[ctxt->spaceNr];
192 ctxt->spaceTab[ctxt->spaceNr] = -1;
193 return(ret);
194}
195
196/*
197 * Macros for accessing the content. Those should be used only by the parser,
198 * and not exported.
199 *
200 * Dirty macros, i.e. one often need to make assumption on the context to
201 * use them
202 *
203 * CUR_PTR return the current pointer to the xmlChar to be parsed.
204 * To be used with extreme caution since operations consuming
205 * characters may move the input buffer to a different location !
206 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
207 * This should be used internally by the parser
208 * only to compare to ASCII values otherwise it would break when
209 * running with UTF-8 encoding.
210 * RAW same as CUR but in the input buffer, bypass any token
211 * extraction that may have been done
212 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
213 * to compare on ASCII based substring.
214 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
215 * strings within the parser.
216 *
217 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
218 *
219 * NEXT Skip to the next character, this does the proper decoding
220 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
221 * NEXTL(l) Skip l xmlChars in the input buffer
222 * CUR_CHAR(l) returns the current unicode character (int), set l
223 * to the number of xmlChars used for the encoding [0-5].
224 * CUR_SCHAR same but operate on a string instead of the context
225 * COPY_BUF copy the current unicode char to the target buffer, increment
226 * the index
227 * GROW, SHRINK handling of input buffers
228 */
229
230#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
231#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
232#define NXT(val) ctxt->input->cur[(val)]
233#define CUR_PTR ctxt->input->cur
234
235#define SKIP(val) do { \
236 ctxt->nbChars += (val),ctxt->input->cur += (val); \
237 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000238 if ((*ctxt->input->cur == 0) && \
239 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
240 xmlPopInput(ctxt); \
241 } while (0)
242
Daniel Veillard48b2f892001-02-25 16:11:03 +0000243#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000244 xmlParserInputShrink(ctxt->input); \
245 if ((*ctxt->input->cur == 0) && \
246 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
247 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000248 }
Owen Taylor3473f882001-02-23 17:55:21 +0000249
Daniel Veillard48b2f892001-02-25 16:11:03 +0000250#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000251 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
252 if ((*ctxt->input->cur == 0) && \
253 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
254 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000255 }
Owen Taylor3473f882001-02-23 17:55:21 +0000256
257#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
258
259#define NEXT xmlNextChar(ctxt)
260
Daniel Veillard21a0f912001-02-25 19:54:14 +0000261#define NEXT1 { \
262 ctxt->input->cur++; \
263 ctxt->nbChars++; \
264 if (*ctxt->input->cur == 0) \
265 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
266 }
267
Owen Taylor3473f882001-02-23 17:55:21 +0000268#define NEXTL(l) do { \
269 if (*(ctxt->input->cur) == '\n') { \
270 ctxt->input->line++; ctxt->input->col = 1; \
271 } else ctxt->input->col++; \
272 ctxt->token = 0; ctxt->input->cur += l; \
273 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000274 } while (0)
275
276#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
277#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
278
279#define COPY_BUF(l,b,i,v) \
280 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000281 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000282
283/**
284 * xmlSkipBlankChars:
285 * @ctxt: the XML parser context
286 *
287 * skip all blanks character found at that point in the input streams.
288 * It pops up finished entities in the process if allowable at that point.
289 *
290 * Returns the number of space chars skipped
291 */
292
293int
294xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000295 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000296
Daniel Veillard02141ea2001-04-30 11:46:40 +0000297 if (ctxt->token != 0) {
298 if (!IS_BLANK(ctxt->token))
299 return(0);
300 ctxt->token = 0;
301 res++;
302 }
Owen Taylor3473f882001-02-23 17:55:21 +0000303 /*
304 * It's Okay to use CUR/NEXT here since all the blanks are on
305 * the ASCII range.
306 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000307 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
308 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000309 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000310 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000311 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000312 cur = ctxt->input->cur;
313 while (IS_BLANK(*cur)) {
314 if (*cur == '\n') {
315 ctxt->input->line++; ctxt->input->col = 1;
316 }
317 cur++;
318 res++;
319 if (*cur == 0) {
320 ctxt->input->cur = cur;
321 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
322 cur = ctxt->input->cur;
323 }
324 }
325 ctxt->input->cur = cur;
326 } else {
327 int cur;
328 do {
329 cur = CUR;
330 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
331 NEXT;
332 cur = CUR;
333 res++;
334 }
335 while ((cur == 0) && (ctxt->inputNr > 1) &&
336 (ctxt->instate != XML_PARSER_COMMENT)) {
337 xmlPopInput(ctxt);
338 cur = CUR;
339 }
340 /*
341 * Need to handle support of entities branching here
342 */
343 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
344 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
345 }
Owen Taylor3473f882001-02-23 17:55:21 +0000346 return(res);
347}
348
349/************************************************************************
350 * *
351 * Commodity functions to handle entities *
352 * *
353 ************************************************************************/
354
355/**
356 * xmlPopInput:
357 * @ctxt: an XML parser context
358 *
359 * xmlPopInput: the current input pointed by ctxt->input came to an end
360 * pop it and return the next char.
361 *
362 * Returns the current xmlChar in the parser context
363 */
364xmlChar
365xmlPopInput(xmlParserCtxtPtr ctxt) {
366 if (ctxt->inputNr == 1) return(0); /* End of main Input */
367 if (xmlParserDebugEntities)
368 xmlGenericError(xmlGenericErrorContext,
369 "Popping input %d\n", ctxt->inputNr);
370 xmlFreeInputStream(inputPop(ctxt));
371 if ((*ctxt->input->cur == 0) &&
372 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
373 return(xmlPopInput(ctxt));
374 return(CUR);
375}
376
377/**
378 * xmlPushInput:
379 * @ctxt: an XML parser context
380 * @input: an XML parser input fragment (entity, XML fragment ...).
381 *
382 * xmlPushInput: switch to a new input stream which is stacked on top
383 * of the previous one(s).
384 */
385void
386xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
387 if (input == NULL) return;
388
389 if (xmlParserDebugEntities) {
390 if ((ctxt->input != NULL) && (ctxt->input->filename))
391 xmlGenericError(xmlGenericErrorContext,
392 "%s(%d): ", ctxt->input->filename,
393 ctxt->input->line);
394 xmlGenericError(xmlGenericErrorContext,
395 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
396 }
397 inputPush(ctxt, input);
398 GROW;
399}
400
401/**
402 * xmlParseCharRef:
403 * @ctxt: an XML parser context
404 *
405 * parse Reference declarations
406 *
407 * [66] CharRef ::= '&#' [0-9]+ ';' |
408 * '&#x' [0-9a-fA-F]+ ';'
409 *
410 * [ WFC: Legal Character ]
411 * Characters referred to using character references must match the
412 * production for Char.
413 *
414 * Returns the value parsed (as an int), 0 in case of error
415 */
416int
417xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000418 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000419 int count = 0;
420
421 if (ctxt->token != 0) {
422 val = ctxt->token;
423 ctxt->token = 0;
424 return(val);
425 }
426 /*
427 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
428 */
429 if ((RAW == '&') && (NXT(1) == '#') &&
430 (NXT(2) == 'x')) {
431 SKIP(3);
432 GROW;
433 while (RAW != ';') { /* loop blocked by count */
434 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
435 val = val * 16 + (CUR - '0');
436 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
437 val = val * 16 + (CUR - 'a') + 10;
438 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
439 val = val * 16 + (CUR - 'A') + 10;
440 else {
441 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
443 ctxt->sax->error(ctxt->userData,
444 "xmlParseCharRef: invalid hexadecimal value\n");
445 ctxt->wellFormed = 0;
446 ctxt->disableSAX = 1;
447 val = 0;
448 break;
449 }
450 NEXT;
451 count++;
452 }
453 if (RAW == ';') {
454 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
455 ctxt->nbChars ++;
456 ctxt->input->cur++;
457 }
458 } else if ((RAW == '&') && (NXT(1) == '#')) {
459 SKIP(2);
460 GROW;
461 while (RAW != ';') { /* loop blocked by count */
462 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
463 val = val * 10 + (CUR - '0');
464 else {
465 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
467 ctxt->sax->error(ctxt->userData,
468 "xmlParseCharRef: invalid decimal value\n");
469 ctxt->wellFormed = 0;
470 ctxt->disableSAX = 1;
471 val = 0;
472 break;
473 }
474 NEXT;
475 count++;
476 }
477 if (RAW == ';') {
478 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
479 ctxt->nbChars ++;
480 ctxt->input->cur++;
481 }
482 } else {
483 ctxt->errNo = XML_ERR_INVALID_CHARREF;
484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
485 ctxt->sax->error(ctxt->userData,
486 "xmlParseCharRef: invalid value\n");
487 ctxt->wellFormed = 0;
488 ctxt->disableSAX = 1;
489 }
490
491 /*
492 * [ WFC: Legal Character ]
493 * Characters referred to using character references must match the
494 * production for Char.
495 */
496 if (IS_CHAR(val)) {
497 return(val);
498 } else {
499 ctxt->errNo = XML_ERR_INVALID_CHAR;
500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
501 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
502 val);
503 ctxt->wellFormed = 0;
504 ctxt->disableSAX = 1;
505 }
506 return(0);
507}
508
509/**
510 * xmlParseStringCharRef:
511 * @ctxt: an XML parser context
512 * @str: a pointer to an index in the string
513 *
514 * parse Reference declarations, variant parsing from a string rather
515 * than an an input flow.
516 *
517 * [66] CharRef ::= '&#' [0-9]+ ';' |
518 * '&#x' [0-9a-fA-F]+ ';'
519 *
520 * [ WFC: Legal Character ]
521 * Characters referred to using character references must match the
522 * production for Char.
523 *
524 * Returns the value parsed (as an int), 0 in case of error, str will be
525 * updated to the current value of the index
526 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000527static int
Owen Taylor3473f882001-02-23 17:55:21 +0000528xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
529 const xmlChar *ptr;
530 xmlChar cur;
531 int val = 0;
532
533 if ((str == NULL) || (*str == NULL)) return(0);
534 ptr = *str;
535 cur = *ptr;
536 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
537 ptr += 3;
538 cur = *ptr;
539 while (cur != ';') { /* Non input consuming loop */
540 if ((cur >= '0') && (cur <= '9'))
541 val = val * 16 + (cur - '0');
542 else if ((cur >= 'a') && (cur <= 'f'))
543 val = val * 16 + (cur - 'a') + 10;
544 else if ((cur >= 'A') && (cur <= 'F'))
545 val = val * 16 + (cur - 'A') + 10;
546 else {
547 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
549 ctxt->sax->error(ctxt->userData,
550 "xmlParseStringCharRef: invalid hexadecimal value\n");
551 ctxt->wellFormed = 0;
552 ctxt->disableSAX = 1;
553 val = 0;
554 break;
555 }
556 ptr++;
557 cur = *ptr;
558 }
559 if (cur == ';')
560 ptr++;
561 } else if ((cur == '&') && (ptr[1] == '#')){
562 ptr += 2;
563 cur = *ptr;
564 while (cur != ';') { /* Non input consuming loops */
565 if ((cur >= '0') && (cur <= '9'))
566 val = val * 10 + (cur - '0');
567 else {
568 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
570 ctxt->sax->error(ctxt->userData,
571 "xmlParseStringCharRef: invalid decimal value\n");
572 ctxt->wellFormed = 0;
573 ctxt->disableSAX = 1;
574 val = 0;
575 break;
576 }
577 ptr++;
578 cur = *ptr;
579 }
580 if (cur == ';')
581 ptr++;
582 } else {
583 ctxt->errNo = XML_ERR_INVALID_CHARREF;
584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
585 ctxt->sax->error(ctxt->userData,
586 "xmlParseCharRef: invalid value\n");
587 ctxt->wellFormed = 0;
588 ctxt->disableSAX = 1;
589 return(0);
590 }
591 *str = ptr;
592
593 /*
594 * [ WFC: Legal Character ]
595 * Characters referred to using character references must match the
596 * production for Char.
597 */
598 if (IS_CHAR(val)) {
599 return(val);
600 } else {
601 ctxt->errNo = XML_ERR_INVALID_CHAR;
602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
603 ctxt->sax->error(ctxt->userData,
604 "CharRef: invalid xmlChar value %d\n", val);
605 ctxt->wellFormed = 0;
606 ctxt->disableSAX = 1;
607 }
608 return(0);
609}
610
611/**
612 * xmlParserHandlePEReference:
613 * @ctxt: the parser context
614 *
615 * [69] PEReference ::= '%' Name ';'
616 *
617 * [ WFC: No Recursion ]
618 * A parsed entity must not contain a recursive
619 * reference to itself, either directly or indirectly.
620 *
621 * [ WFC: Entity Declared ]
622 * In a document without any DTD, a document with only an internal DTD
623 * subset which contains no parameter entity references, or a document
624 * with "standalone='yes'", ... ... The declaration of a parameter
625 * entity must precede any reference to it...
626 *
627 * [ VC: Entity Declared ]
628 * In a document with an external subset or external parameter entities
629 * with "standalone='no'", ... ... The declaration of a parameter entity
630 * must precede any reference to it...
631 *
632 * [ WFC: In DTD ]
633 * Parameter-entity references may only appear in the DTD.
634 * NOTE: misleading but this is handled.
635 *
636 * A PEReference may have been detected in the current input stream
637 * the handling is done accordingly to
638 * http://www.w3.org/TR/REC-xml#entproc
639 * i.e.
640 * - Included in literal in entity values
641 * - Included as Paraemeter Entity reference within DTDs
642 */
643void
644xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
645 xmlChar *name;
646 xmlEntityPtr entity = NULL;
647 xmlParserInputPtr input;
648
649 if (ctxt->token != 0) {
650 return;
651 }
652 if (RAW != '%') return;
653 switch(ctxt->instate) {
654 case XML_PARSER_CDATA_SECTION:
655 return;
656 case XML_PARSER_COMMENT:
657 return;
658 case XML_PARSER_START_TAG:
659 return;
660 case XML_PARSER_END_TAG:
661 return;
662 case XML_PARSER_EOF:
663 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
665 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
666 ctxt->wellFormed = 0;
667 ctxt->disableSAX = 1;
668 return;
669 case XML_PARSER_PROLOG:
670 case XML_PARSER_START:
671 case XML_PARSER_MISC:
672 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
674 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
675 ctxt->wellFormed = 0;
676 ctxt->disableSAX = 1;
677 return;
678 case XML_PARSER_ENTITY_DECL:
679 case XML_PARSER_CONTENT:
680 case XML_PARSER_ATTRIBUTE_VALUE:
681 case XML_PARSER_PI:
682 case XML_PARSER_SYSTEM_LITERAL:
683 /* we just ignore it there */
684 return;
685 case XML_PARSER_EPILOG:
686 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
688 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
689 ctxt->wellFormed = 0;
690 ctxt->disableSAX = 1;
691 return;
692 case XML_PARSER_ENTITY_VALUE:
693 /*
694 * NOTE: in the case of entity values, we don't do the
695 * substitution here since we need the literal
696 * entity value to be able to save the internal
697 * subset of the document.
698 * This will be handled by xmlStringDecodeEntities
699 */
700 return;
701 case XML_PARSER_DTD:
702 /*
703 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
704 * In the internal DTD subset, parameter-entity references
705 * can occur only where markup declarations can occur, not
706 * within markup declarations.
707 * In that case this is handled in xmlParseMarkupDecl
708 */
709 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
710 return;
711 break;
712 case XML_PARSER_IGNORE:
713 return;
714 }
715
716 NEXT;
717 name = xmlParseName(ctxt);
718 if (xmlParserDebugEntities)
719 xmlGenericError(xmlGenericErrorContext,
720 "PE Reference: %s\n", name);
721 if (name == NULL) {
722 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
724 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
725 ctxt->wellFormed = 0;
726 ctxt->disableSAX = 1;
727 } else {
728 if (RAW == ';') {
729 NEXT;
730 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
731 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
732 if (entity == NULL) {
733
734 /*
735 * [ WFC: Entity Declared ]
736 * In a document without any DTD, a document with only an
737 * internal DTD subset which contains no parameter entity
738 * references, or a document with "standalone='yes'", ...
739 * ... The declaration of a parameter entity must precede
740 * any reference to it...
741 */
742 if ((ctxt->standalone == 1) ||
743 ((ctxt->hasExternalSubset == 0) &&
744 (ctxt->hasPErefs == 0))) {
745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
746 ctxt->sax->error(ctxt->userData,
747 "PEReference: %%%s; not found\n", name);
748 ctxt->wellFormed = 0;
749 ctxt->disableSAX = 1;
750 } else {
751 /*
752 * [ VC: Entity Declared ]
753 * In a document with an external subset or external
754 * parameter entities with "standalone='no'", ...
755 * ... The declaration of a parameter entity must precede
756 * any reference to it...
757 */
758 if ((!ctxt->disableSAX) &&
759 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
760 ctxt->vctxt.error(ctxt->vctxt.userData,
761 "PEReference: %%%s; not found\n", name);
762 } else if ((!ctxt->disableSAX) &&
763 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
764 ctxt->sax->warning(ctxt->userData,
765 "PEReference: %%%s; not found\n", name);
766 ctxt->valid = 0;
767 }
768 } else {
769 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
770 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000771 xmlChar start[4];
772 xmlCharEncoding enc;
773
Owen Taylor3473f882001-02-23 17:55:21 +0000774 /*
775 * handle the extra spaces added before and after
776 * c.f. http://www.w3.org/TR/REC-xml#as-PE
777 * this is done independantly.
778 */
779 input = xmlNewEntityInputStream(ctxt, entity);
780 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000781
782 /*
783 * Get the 4 first bytes and decode the charset
784 * if enc != XML_CHAR_ENCODING_NONE
785 * plug some encoding conversion routines.
786 */
787 GROW
788 start[0] = RAW;
789 start[1] = NXT(1);
790 start[2] = NXT(2);
791 start[3] = NXT(3);
792 enc = xmlDetectCharEncoding(start, 4);
793 if (enc != XML_CHAR_ENCODING_NONE) {
794 xmlSwitchEncoding(ctxt, enc);
795 }
796
Owen Taylor3473f882001-02-23 17:55:21 +0000797 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
798 (RAW == '<') && (NXT(1) == '?') &&
799 (NXT(2) == 'x') && (NXT(3) == 'm') &&
800 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
801 xmlParseTextDecl(ctxt);
802 }
803 if (ctxt->token == 0)
804 ctxt->token = ' ';
805 } else {
806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
807 ctxt->sax->error(ctxt->userData,
808 "xmlHandlePEReference: %s is not a parameter entity\n",
809 name);
810 ctxt->wellFormed = 0;
811 ctxt->disableSAX = 1;
812 }
813 }
814 } else {
815 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
817 ctxt->sax->error(ctxt->userData,
818 "xmlHandlePEReference: expecting ';'\n");
819 ctxt->wellFormed = 0;
820 ctxt->disableSAX = 1;
821 }
822 xmlFree(name);
823 }
824}
825
826/*
827 * Macro used to grow the current buffer.
828 */
829#define growBuffer(buffer) { \
830 buffer##_size *= 2; \
831 buffer = (xmlChar *) \
832 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
833 if (buffer == NULL) { \
834 perror("realloc failed"); \
835 return(NULL); \
836 } \
837}
838
839/**
840 * xmlStringDecodeEntities:
841 * @ctxt: the parser context
842 * @str: the input string
843 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
844 * @end: an end marker xmlChar, 0 if none
845 * @end2: an end marker xmlChar, 0 if none
846 * @end3: an end marker xmlChar, 0 if none
847 *
848 * Takes a entity string content and process to do the adequate subtitutions.
849 *
850 * [67] Reference ::= EntityRef | CharRef
851 *
852 * [69] PEReference ::= '%' Name ';'
853 *
854 * Returns A newly allocated string with the substitution done. The caller
855 * must deallocate it !
856 */
857xmlChar *
858xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
859 xmlChar end, xmlChar end2, xmlChar end3) {
860 xmlChar *buffer = NULL;
861 int buffer_size = 0;
862
863 xmlChar *current = NULL;
864 xmlEntityPtr ent;
865 int c,l;
866 int nbchars = 0;
867
868 if (str == NULL)
869 return(NULL);
870
871 if (ctxt->depth > 40) {
872 ctxt->errNo = XML_ERR_ENTITY_LOOP;
873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
874 ctxt->sax->error(ctxt->userData,
875 "Detected entity reference loop\n");
876 ctxt->wellFormed = 0;
877 ctxt->disableSAX = 1;
878 return(NULL);
879 }
880
881 /*
882 * allocate a translation buffer.
883 */
884 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
885 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
886 if (buffer == NULL) {
887 perror("xmlDecodeEntities: malloc failed");
888 return(NULL);
889 }
890
891 /*
892 * Ok loop until we reach one of the ending char or a size limit.
893 * we are operating on already parsed values.
894 */
895 c = CUR_SCHAR(str, l);
896 while ((c != 0) && (c != end) && /* non input consuming loop */
897 (c != end2) && (c != end3)) {
898
899 if (c == 0) break;
900 if ((c == '&') && (str[1] == '#')) {
901 int val = xmlParseStringCharRef(ctxt, &str);
902 if (val != 0) {
903 COPY_BUF(0,buffer,nbchars,val);
904 }
905 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
906 if (xmlParserDebugEntities)
907 xmlGenericError(xmlGenericErrorContext,
908 "String decoding Entity Reference: %.30s\n",
909 str);
910 ent = xmlParseStringEntityRef(ctxt, &str);
911 if ((ent != NULL) &&
912 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
913 if (ent->content != NULL) {
914 COPY_BUF(0,buffer,nbchars,ent->content[0]);
915 } else {
916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
917 ctxt->sax->error(ctxt->userData,
918 "internal error entity has no content\n");
919 }
920 } else if ((ent != NULL) && (ent->content != NULL)) {
921 xmlChar *rep;
922
923 ctxt->depth++;
924 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
925 0, 0, 0);
926 ctxt->depth--;
927 if (rep != NULL) {
928 current = rep;
929 while (*current != 0) { /* non input consuming loop */
930 buffer[nbchars++] = *current++;
931 if (nbchars >
932 buffer_size - XML_PARSER_BUFFER_SIZE) {
933 growBuffer(buffer);
934 }
935 }
936 xmlFree(rep);
937 }
938 } else if (ent != NULL) {
939 int i = xmlStrlen(ent->name);
940 const xmlChar *cur = ent->name;
941
942 buffer[nbchars++] = '&';
943 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
944 growBuffer(buffer);
945 }
946 for (;i > 0;i--)
947 buffer[nbchars++] = *cur++;
948 buffer[nbchars++] = ';';
949 }
950 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
951 if (xmlParserDebugEntities)
952 xmlGenericError(xmlGenericErrorContext,
953 "String decoding PE Reference: %.30s\n", str);
954 ent = xmlParseStringPEReference(ctxt, &str);
955 if (ent != NULL) {
956 xmlChar *rep;
957
958 ctxt->depth++;
959 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
960 0, 0, 0);
961 ctxt->depth--;
962 if (rep != NULL) {
963 current = rep;
964 while (*current != 0) { /* non input consuming loop */
965 buffer[nbchars++] = *current++;
966 if (nbchars >
967 buffer_size - XML_PARSER_BUFFER_SIZE) {
968 growBuffer(buffer);
969 }
970 }
971 xmlFree(rep);
972 }
973 }
974 } else {
975 COPY_BUF(l,buffer,nbchars,c);
976 str += l;
977 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
978 growBuffer(buffer);
979 }
980 }
981 c = CUR_SCHAR(str, l);
982 }
983 buffer[nbchars++] = 0;
984 return(buffer);
985}
986
987
988/************************************************************************
989 * *
990 * Commodity functions to handle xmlChars *
991 * *
992 ************************************************************************/
993
994/**
995 * xmlStrndup:
996 * @cur: the input xmlChar *
997 * @len: the len of @cur
998 *
999 * a strndup for array of xmlChar's
1000 *
1001 * Returns a new xmlChar * or NULL
1002 */
1003xmlChar *
1004xmlStrndup(const xmlChar *cur, int len) {
1005 xmlChar *ret;
1006
1007 if ((cur == NULL) || (len < 0)) return(NULL);
1008 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1009 if (ret == NULL) {
1010 xmlGenericError(xmlGenericErrorContext,
1011 "malloc of %ld byte failed\n",
1012 (len + 1) * (long)sizeof(xmlChar));
1013 return(NULL);
1014 }
1015 memcpy(ret, cur, len * sizeof(xmlChar));
1016 ret[len] = 0;
1017 return(ret);
1018}
1019
1020/**
1021 * xmlStrdup:
1022 * @cur: the input xmlChar *
1023 *
1024 * a strdup for array of xmlChar's. Since they are supposed to be
1025 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1026 * a termination mark of '0'.
1027 *
1028 * Returns a new xmlChar * or NULL
1029 */
1030xmlChar *
1031xmlStrdup(const xmlChar *cur) {
1032 const xmlChar *p = cur;
1033
1034 if (cur == NULL) return(NULL);
1035 while (*p != 0) p++; /* non input consuming */
1036 return(xmlStrndup(cur, p - cur));
1037}
1038
1039/**
1040 * xmlCharStrndup:
1041 * @cur: the input char *
1042 * @len: the len of @cur
1043 *
1044 * a strndup for char's to xmlChar's
1045 *
1046 * Returns a new xmlChar * or NULL
1047 */
1048
1049xmlChar *
1050xmlCharStrndup(const char *cur, int len) {
1051 int i;
1052 xmlChar *ret;
1053
1054 if ((cur == NULL) || (len < 0)) return(NULL);
1055 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1056 if (ret == NULL) {
1057 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1058 (len + 1) * (long)sizeof(xmlChar));
1059 return(NULL);
1060 }
1061 for (i = 0;i < len;i++)
1062 ret[i] = (xmlChar) cur[i];
1063 ret[len] = 0;
1064 return(ret);
1065}
1066
1067/**
1068 * xmlCharStrdup:
1069 * @cur: the input char *
1070 * @len: the len of @cur
1071 *
1072 * a strdup for char's to xmlChar's
1073 *
1074 * Returns a new xmlChar * or NULL
1075 */
1076
1077xmlChar *
1078xmlCharStrdup(const char *cur) {
1079 const char *p = cur;
1080
1081 if (cur == NULL) return(NULL);
1082 while (*p != '\0') p++; /* non input consuming */
1083 return(xmlCharStrndup(cur, p - cur));
1084}
1085
1086/**
1087 * xmlStrcmp:
1088 * @str1: the first xmlChar *
1089 * @str2: the second xmlChar *
1090 *
1091 * a strcmp for xmlChar's
1092 *
1093 * Returns the integer result of the comparison
1094 */
1095
1096int
1097xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1098 register int tmp;
1099
1100 if (str1 == str2) return(0);
1101 if (str1 == NULL) return(-1);
1102 if (str2 == NULL) return(1);
1103 do {
1104 tmp = *str1++ - *str2;
1105 if (tmp != 0) return(tmp);
1106 } while (*str2++ != 0);
1107 return 0;
1108}
1109
1110/**
1111 * xmlStrEqual:
1112 * @str1: the first xmlChar *
1113 * @str2: the second xmlChar *
1114 *
1115 * Check if both string are equal of have same content
1116 * Should be a bit more readable and faster than xmlStrEqual()
1117 *
1118 * Returns 1 if they are equal, 0 if they are different
1119 */
1120
1121int
1122xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1123 if (str1 == str2) return(1);
1124 if (str1 == NULL) return(0);
1125 if (str2 == NULL) return(0);
1126 do {
1127 if (*str1++ != *str2) return(0);
1128 } while (*str2++);
1129 return(1);
1130}
1131
1132/**
1133 * xmlStrncmp:
1134 * @str1: the first xmlChar *
1135 * @str2: the second xmlChar *
1136 * @len: the max comparison length
1137 *
1138 * a strncmp for xmlChar's
1139 *
1140 * Returns the integer result of the comparison
1141 */
1142
1143int
1144xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1145 register int tmp;
1146
1147 if (len <= 0) return(0);
1148 if (str1 == str2) return(0);
1149 if (str1 == NULL) return(-1);
1150 if (str2 == NULL) return(1);
1151 do {
1152 tmp = *str1++ - *str2;
1153 if (tmp != 0 || --len == 0) return(tmp);
1154 } while (*str2++ != 0);
1155 return 0;
1156}
1157
1158static xmlChar casemap[256] = {
1159 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1160 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1161 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1162 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1163 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1164 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1165 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1166 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1167 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1168 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1169 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1170 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1171 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1172 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1173 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1174 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1175 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1176 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1177 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1178 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1179 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1180 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1181 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1182 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1183 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1184 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1185 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1186 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1187 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1188 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1189 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1190 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1191};
1192
1193/**
1194 * xmlStrcasecmp:
1195 * @str1: the first xmlChar *
1196 * @str2: the second xmlChar *
1197 *
1198 * a strcasecmp for xmlChar's
1199 *
1200 * Returns the integer result of the comparison
1201 */
1202
1203int
1204xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1205 register int tmp;
1206
1207 if (str1 == str2) return(0);
1208 if (str1 == NULL) return(-1);
1209 if (str2 == NULL) return(1);
1210 do {
1211 tmp = casemap[*str1++] - casemap[*str2];
1212 if (tmp != 0) return(tmp);
1213 } while (*str2++ != 0);
1214 return 0;
1215}
1216
1217/**
1218 * xmlStrncasecmp:
1219 * @str1: the first xmlChar *
1220 * @str2: the second xmlChar *
1221 * @len: the max comparison length
1222 *
1223 * a strncasecmp for xmlChar's
1224 *
1225 * Returns the integer result of the comparison
1226 */
1227
1228int
1229xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1230 register int tmp;
1231
1232 if (len <= 0) return(0);
1233 if (str1 == str2) return(0);
1234 if (str1 == NULL) return(-1);
1235 if (str2 == NULL) return(1);
1236 do {
1237 tmp = casemap[*str1++] - casemap[*str2];
1238 if (tmp != 0 || --len == 0) return(tmp);
1239 } while (*str2++ != 0);
1240 return 0;
1241}
1242
1243/**
1244 * xmlStrchr:
1245 * @str: the xmlChar * array
1246 * @val: the xmlChar to search
1247 *
1248 * a strchr for xmlChar's
1249 *
1250 * Returns the xmlChar * for the first occurence or NULL.
1251 */
1252
1253const xmlChar *
1254xmlStrchr(const xmlChar *str, xmlChar val) {
1255 if (str == NULL) return(NULL);
1256 while (*str != 0) { /* non input consuming */
1257 if (*str == val) return((xmlChar *) str);
1258 str++;
1259 }
1260 return(NULL);
1261}
1262
1263/**
1264 * xmlStrstr:
1265 * @str: the xmlChar * array (haystack)
1266 * @val: the xmlChar to search (needle)
1267 *
1268 * a strstr for xmlChar's
1269 *
1270 * Returns the xmlChar * for the first occurence or NULL.
1271 */
1272
1273const xmlChar *
1274xmlStrstr(const xmlChar *str, xmlChar *val) {
1275 int n;
1276
1277 if (str == NULL) return(NULL);
1278 if (val == NULL) return(NULL);
1279 n = xmlStrlen(val);
1280
1281 if (n == 0) return(str);
1282 while (*str != 0) { /* non input consuming */
1283 if (*str == *val) {
1284 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1285 }
1286 str++;
1287 }
1288 return(NULL);
1289}
1290
1291/**
1292 * xmlStrcasestr:
1293 * @str: the xmlChar * array (haystack)
1294 * @val: the xmlChar to search (needle)
1295 *
1296 * a case-ignoring strstr for xmlChar's
1297 *
1298 * Returns the xmlChar * for the first occurence or NULL.
1299 */
1300
1301const xmlChar *
1302xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1303 int n;
1304
1305 if (str == NULL) return(NULL);
1306 if (val == NULL) return(NULL);
1307 n = xmlStrlen(val);
1308
1309 if (n == 0) return(str);
1310 while (*str != 0) { /* non input consuming */
1311 if (casemap[*str] == casemap[*val])
1312 if (!xmlStrncasecmp(str, val, n)) return(str);
1313 str++;
1314 }
1315 return(NULL);
1316}
1317
1318/**
1319 * xmlStrsub:
1320 * @str: the xmlChar * array (haystack)
1321 * @start: the index of the first char (zero based)
1322 * @len: the length of the substring
1323 *
1324 * Extract a substring of a given string
1325 *
1326 * Returns the xmlChar * for the first occurence or NULL.
1327 */
1328
1329xmlChar *
1330xmlStrsub(const xmlChar *str, int start, int len) {
1331 int i;
1332
1333 if (str == NULL) return(NULL);
1334 if (start < 0) return(NULL);
1335 if (len < 0) return(NULL);
1336
1337 for (i = 0;i < start;i++) {
1338 if (*str == 0) return(NULL);
1339 str++;
1340 }
1341 if (*str == 0) return(NULL);
1342 return(xmlStrndup(str, len));
1343}
1344
1345/**
1346 * xmlStrlen:
1347 * @str: the xmlChar * array
1348 *
1349 * length of a xmlChar's string
1350 *
1351 * Returns the number of xmlChar contained in the ARRAY.
1352 */
1353
1354int
1355xmlStrlen(const xmlChar *str) {
1356 int len = 0;
1357
1358 if (str == NULL) return(0);
1359 while (*str != 0) { /* non input consuming */
1360 str++;
1361 len++;
1362 }
1363 return(len);
1364}
1365
1366/**
1367 * xmlStrncat:
1368 * @cur: the original xmlChar * array
1369 * @add: the xmlChar * array added
1370 * @len: the length of @add
1371 *
1372 * a strncat for array of xmlChar's, it will extend cur with the len
1373 * first bytes of @add.
1374 *
1375 * Returns a new xmlChar *, the original @cur is reallocated if needed
1376 * and should not be freed
1377 */
1378
1379xmlChar *
1380xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1381 int size;
1382 xmlChar *ret;
1383
1384 if ((add == NULL) || (len == 0))
1385 return(cur);
1386 if (cur == NULL)
1387 return(xmlStrndup(add, len));
1388
1389 size = xmlStrlen(cur);
1390 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1391 if (ret == NULL) {
1392 xmlGenericError(xmlGenericErrorContext,
1393 "xmlStrncat: realloc of %ld byte failed\n",
1394 (size + len + 1) * (long)sizeof(xmlChar));
1395 return(cur);
1396 }
1397 memcpy(&ret[size], add, len * sizeof(xmlChar));
1398 ret[size + len] = 0;
1399 return(ret);
1400}
1401
1402/**
1403 * xmlStrcat:
1404 * @cur: the original xmlChar * array
1405 * @add: the xmlChar * array added
1406 *
1407 * a strcat for array of xmlChar's. Since they are supposed to be
1408 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1409 * a termination mark of '0'.
1410 *
1411 * Returns a new xmlChar * containing the concatenated string.
1412 */
1413xmlChar *
1414xmlStrcat(xmlChar *cur, const xmlChar *add) {
1415 const xmlChar *p = add;
1416
1417 if (add == NULL) return(cur);
1418 if (cur == NULL)
1419 return(xmlStrdup(add));
1420
1421 while (*p != 0) p++; /* non input consuming */
1422 return(xmlStrncat(cur, add, p - add));
1423}
1424
1425/************************************************************************
1426 * *
1427 * Commodity functions, cleanup needed ? *
1428 * *
1429 ************************************************************************/
1430
1431/**
1432 * areBlanks:
1433 * @ctxt: an XML parser context
1434 * @str: a xmlChar *
1435 * @len: the size of @str
1436 *
1437 * Is this a sequence of blank chars that one can ignore ?
1438 *
1439 * Returns 1 if ignorable 0 otherwise.
1440 */
1441
1442static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1443 int i, ret;
1444 xmlNodePtr lastChild;
1445
Daniel Veillard2f362242001-03-02 17:36:21 +00001446 if (ctxt->keepBlanks)
1447 return(0);
1448
Owen Taylor3473f882001-02-23 17:55:21 +00001449 /*
1450 * Check for xml:space value.
1451 */
1452 if (*(ctxt->space) == 1)
1453 return(0);
1454
1455 /*
1456 * Check that the string is made of blanks
1457 */
1458 for (i = 0;i < len;i++)
1459 if (!(IS_BLANK(str[i]))) return(0);
1460
1461 /*
1462 * Look if the element is mixed content in the Dtd if available
1463 */
1464 if (ctxt->myDoc != NULL) {
1465 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1466 if (ret == 0) return(1);
1467 if (ret == 1) return(0);
1468 }
1469
1470 /*
1471 * Otherwise, heuristic :-\
1472 */
Owen Taylor3473f882001-02-23 17:55:21 +00001473 if (RAW != '<') return(0);
1474 if (ctxt->node == NULL) return(0);
1475 if ((ctxt->node->children == NULL) &&
1476 (RAW == '<') && (NXT(1) == '/')) return(0);
1477
1478 lastChild = xmlGetLastChild(ctxt->node);
1479 if (lastChild == NULL) {
1480 if (ctxt->node->content != NULL) return(0);
1481 } else if (xmlNodeIsText(lastChild))
1482 return(0);
1483 else if ((ctxt->node->children != NULL) &&
1484 (xmlNodeIsText(ctxt->node->children)))
1485 return(0);
1486 return(1);
1487}
1488
1489/*
1490 * Forward definition for recusive behaviour.
1491 */
1492void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1493void xmlParseReference(xmlParserCtxtPtr ctxt);
1494
1495/************************************************************************
1496 * *
1497 * Extra stuff for namespace support *
1498 * Relates to http://www.w3.org/TR/WD-xml-names *
1499 * *
1500 ************************************************************************/
1501
1502/**
1503 * xmlSplitQName:
1504 * @ctxt: an XML parser context
1505 * @name: an XML parser context
1506 * @prefix: a xmlChar **
1507 *
1508 * parse an UTF8 encoded XML qualified name string
1509 *
1510 * [NS 5] QName ::= (Prefix ':')? LocalPart
1511 *
1512 * [NS 6] Prefix ::= NCName
1513 *
1514 * [NS 7] LocalPart ::= NCName
1515 *
1516 * Returns the local part, and prefix is updated
1517 * to get the Prefix if any.
1518 */
1519
1520xmlChar *
1521xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1522 xmlChar buf[XML_MAX_NAMELEN + 5];
1523 xmlChar *buffer = NULL;
1524 int len = 0;
1525 int max = XML_MAX_NAMELEN;
1526 xmlChar *ret = NULL;
1527 const xmlChar *cur = name;
1528 int c;
1529
1530 *prefix = NULL;
1531
1532#ifndef XML_XML_NAMESPACE
1533 /* xml: prefix is not really a namespace */
1534 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1535 (cur[2] == 'l') && (cur[3] == ':'))
1536 return(xmlStrdup(name));
1537#endif
1538
1539 /* nasty but valid */
1540 if (cur[0] == ':')
1541 return(xmlStrdup(name));
1542
1543 c = *cur++;
1544 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1545 buf[len++] = c;
1546 c = *cur++;
1547 }
1548 if (len >= max) {
1549 /*
1550 * Okay someone managed to make a huge name, so he's ready to pay
1551 * for the processing speed.
1552 */
1553 max = len * 2;
1554
1555 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1556 if (buffer == NULL) {
1557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1558 ctxt->sax->error(ctxt->userData,
1559 "xmlSplitQName: out of memory\n");
1560 return(NULL);
1561 }
1562 memcpy(buffer, buf, len);
1563 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1564 if (len + 10 > max) {
1565 max *= 2;
1566 buffer = (xmlChar *) xmlRealloc(buffer,
1567 max * sizeof(xmlChar));
1568 if (buffer == NULL) {
1569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1570 ctxt->sax->error(ctxt->userData,
1571 "xmlSplitQName: out of memory\n");
1572 return(NULL);
1573 }
1574 }
1575 buffer[len++] = c;
1576 c = *cur++;
1577 }
1578 buffer[len] = 0;
1579 }
1580
1581 if (buffer == NULL)
1582 ret = xmlStrndup(buf, len);
1583 else {
1584 ret = buffer;
1585 buffer = NULL;
1586 max = XML_MAX_NAMELEN;
1587 }
1588
1589
1590 if (c == ':') {
1591 c = *cur++;
1592 if (c == 0) return(ret);
1593 *prefix = ret;
1594 len = 0;
1595
1596 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1597 buf[len++] = c;
1598 c = *cur++;
1599 }
1600 if (len >= max) {
1601 /*
1602 * Okay someone managed to make a huge name, so he's ready to pay
1603 * for the processing speed.
1604 */
1605 max = len * 2;
1606
1607 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1608 if (buffer == NULL) {
1609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1610 ctxt->sax->error(ctxt->userData,
1611 "xmlSplitQName: out of memory\n");
1612 return(NULL);
1613 }
1614 memcpy(buffer, buf, len);
1615 while (c != 0) { /* tested bigname2.xml */
1616 if (len + 10 > max) {
1617 max *= 2;
1618 buffer = (xmlChar *) xmlRealloc(buffer,
1619 max * sizeof(xmlChar));
1620 if (buffer == NULL) {
1621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1622 ctxt->sax->error(ctxt->userData,
1623 "xmlSplitQName: out of memory\n");
1624 return(NULL);
1625 }
1626 }
1627 buffer[len++] = c;
1628 c = *cur++;
1629 }
1630 buffer[len] = 0;
1631 }
1632
1633 if (buffer == NULL)
1634 ret = xmlStrndup(buf, len);
1635 else {
1636 ret = buffer;
1637 }
1638 }
1639
1640 return(ret);
1641}
1642
1643/************************************************************************
1644 * *
1645 * The parser itself *
1646 * Relates to http://www.w3.org/TR/REC-xml *
1647 * *
1648 ************************************************************************/
1649
Daniel Veillard76d66f42001-05-16 21:05:17 +00001650static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001651/**
1652 * xmlParseName:
1653 * @ctxt: an XML parser context
1654 *
1655 * parse an XML name.
1656 *
1657 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1658 * CombiningChar | Extender
1659 *
1660 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1661 *
1662 * [6] Names ::= Name (S Name)*
1663 *
1664 * Returns the Name parsed or NULL
1665 */
1666
1667xmlChar *
1668xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001669 const xmlChar *in;
1670 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001671 int count = 0;
1672
1673 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001674
1675 /*
1676 * Accelerator for simple ASCII names
1677 */
1678 in = ctxt->input->cur;
1679 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1680 ((*in >= 0x41) && (*in <= 0x5A)) ||
1681 (*in == '_') || (*in == ':')) {
1682 in++;
1683 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1684 ((*in >= 0x41) && (*in <= 0x5A)) ||
1685 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001686 (*in == '_') || (*in == '-') ||
1687 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001688 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001689 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001690 count = in - ctxt->input->cur;
1691 ret = xmlStrndup(ctxt->input->cur, count);
1692 ctxt->input->cur = in;
1693 return(ret);
1694 }
1695 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001696 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001697}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001698
Daniel Veillard76d66f42001-05-16 21:05:17 +00001699static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001700xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1701 xmlChar buf[XML_MAX_NAMELEN + 5];
1702 int len = 0, l;
1703 int c;
1704 int count = 0;
1705
1706 /*
1707 * Handler for more complex cases
1708 */
1709 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001710 c = CUR_CHAR(l);
1711 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1712 (!IS_LETTER(c) && (c != '_') &&
1713 (c != ':'))) {
1714 return(NULL);
1715 }
1716
1717 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1718 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1719 (c == '.') || (c == '-') ||
1720 (c == '_') || (c == ':') ||
1721 (IS_COMBINING(c)) ||
1722 (IS_EXTENDER(c)))) {
1723 if (count++ > 100) {
1724 count = 0;
1725 GROW;
1726 }
1727 COPY_BUF(l,buf,len,c);
1728 NEXTL(l);
1729 c = CUR_CHAR(l);
1730 if (len >= XML_MAX_NAMELEN) {
1731 /*
1732 * Okay someone managed to make a huge name, so he's ready to pay
1733 * for the processing speed.
1734 */
1735 xmlChar *buffer;
1736 int max = len * 2;
1737
1738 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1739 if (buffer == NULL) {
1740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1741 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001742 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001743 return(NULL);
1744 }
1745 memcpy(buffer, buf, len);
1746 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1747 (c == '.') || (c == '-') ||
1748 (c == '_') || (c == ':') ||
1749 (IS_COMBINING(c)) ||
1750 (IS_EXTENDER(c))) {
1751 if (count++ > 100) {
1752 count = 0;
1753 GROW;
1754 }
1755 if (len + 10 > max) {
1756 max *= 2;
1757 buffer = (xmlChar *) xmlRealloc(buffer,
1758 max * sizeof(xmlChar));
1759 if (buffer == NULL) {
1760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1761 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001762 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001763 return(NULL);
1764 }
1765 }
1766 COPY_BUF(l,buffer,len,c);
1767 NEXTL(l);
1768 c = CUR_CHAR(l);
1769 }
1770 buffer[len] = 0;
1771 return(buffer);
1772 }
1773 }
1774 return(xmlStrndup(buf, len));
1775}
1776
1777/**
1778 * xmlParseStringName:
1779 * @ctxt: an XML parser context
1780 * @str: a pointer to the string pointer (IN/OUT)
1781 *
1782 * parse an XML name.
1783 *
1784 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1785 * CombiningChar | Extender
1786 *
1787 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1788 *
1789 * [6] Names ::= Name (S Name)*
1790 *
1791 * Returns the Name parsed or NULL. The str pointer
1792 * is updated to the current location in the string.
1793 */
1794
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001795static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001796xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1797 xmlChar buf[XML_MAX_NAMELEN + 5];
1798 const xmlChar *cur = *str;
1799 int len = 0, l;
1800 int c;
1801
1802 c = CUR_SCHAR(cur, l);
1803 if (!IS_LETTER(c) && (c != '_') &&
1804 (c != ':')) {
1805 return(NULL);
1806 }
1807
1808 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1809 (c == '.') || (c == '-') ||
1810 (c == '_') || (c == ':') ||
1811 (IS_COMBINING(c)) ||
1812 (IS_EXTENDER(c))) {
1813 COPY_BUF(l,buf,len,c);
1814 cur += l;
1815 c = CUR_SCHAR(cur, l);
1816 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1817 /*
1818 * Okay someone managed to make a huge name, so he's ready to pay
1819 * for the processing speed.
1820 */
1821 xmlChar *buffer;
1822 int max = len * 2;
1823
1824 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1825 if (buffer == NULL) {
1826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1827 ctxt->sax->error(ctxt->userData,
1828 "xmlParseStringName: out of memory\n");
1829 return(NULL);
1830 }
1831 memcpy(buffer, buf, len);
1832 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1833 (c == '.') || (c == '-') ||
1834 (c == '_') || (c == ':') ||
1835 (IS_COMBINING(c)) ||
1836 (IS_EXTENDER(c))) {
1837 if (len + 10 > max) {
1838 max *= 2;
1839 buffer = (xmlChar *) xmlRealloc(buffer,
1840 max * sizeof(xmlChar));
1841 if (buffer == NULL) {
1842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1843 ctxt->sax->error(ctxt->userData,
1844 "xmlParseStringName: out of memory\n");
1845 return(NULL);
1846 }
1847 }
1848 COPY_BUF(l,buffer,len,c);
1849 cur += l;
1850 c = CUR_SCHAR(cur, l);
1851 }
1852 buffer[len] = 0;
1853 *str = cur;
1854 return(buffer);
1855 }
1856 }
1857 *str = cur;
1858 return(xmlStrndup(buf, len));
1859}
1860
1861/**
1862 * xmlParseNmtoken:
1863 * @ctxt: an XML parser context
1864 *
1865 * parse an XML Nmtoken.
1866 *
1867 * [7] Nmtoken ::= (NameChar)+
1868 *
1869 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1870 *
1871 * Returns the Nmtoken parsed or NULL
1872 */
1873
1874xmlChar *
1875xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1876 xmlChar buf[XML_MAX_NAMELEN + 5];
1877 int len = 0, l;
1878 int c;
1879 int count = 0;
1880
1881 GROW;
1882 c = CUR_CHAR(l);
1883
1884 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1885 (c == '.') || (c == '-') ||
1886 (c == '_') || (c == ':') ||
1887 (IS_COMBINING(c)) ||
1888 (IS_EXTENDER(c))) {
1889 if (count++ > 100) {
1890 count = 0;
1891 GROW;
1892 }
1893 COPY_BUF(l,buf,len,c);
1894 NEXTL(l);
1895 c = CUR_CHAR(l);
1896 if (len >= XML_MAX_NAMELEN) {
1897 /*
1898 * Okay someone managed to make a huge token, so he's ready to pay
1899 * for the processing speed.
1900 */
1901 xmlChar *buffer;
1902 int max = len * 2;
1903
1904 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1905 if (buffer == NULL) {
1906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1907 ctxt->sax->error(ctxt->userData,
1908 "xmlParseNmtoken: out of memory\n");
1909 return(NULL);
1910 }
1911 memcpy(buffer, buf, len);
1912 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1913 (c == '.') || (c == '-') ||
1914 (c == '_') || (c == ':') ||
1915 (IS_COMBINING(c)) ||
1916 (IS_EXTENDER(c))) {
1917 if (count++ > 100) {
1918 count = 0;
1919 GROW;
1920 }
1921 if (len + 10 > max) {
1922 max *= 2;
1923 buffer = (xmlChar *) xmlRealloc(buffer,
1924 max * sizeof(xmlChar));
1925 if (buffer == NULL) {
1926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1927 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001928 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001929 return(NULL);
1930 }
1931 }
1932 COPY_BUF(l,buffer,len,c);
1933 NEXTL(l);
1934 c = CUR_CHAR(l);
1935 }
1936 buffer[len] = 0;
1937 return(buffer);
1938 }
1939 }
1940 if (len == 0)
1941 return(NULL);
1942 return(xmlStrndup(buf, len));
1943}
1944
1945/**
1946 * xmlParseEntityValue:
1947 * @ctxt: an XML parser context
1948 * @orig: if non-NULL store a copy of the original entity value
1949 *
1950 * parse a value for ENTITY declarations
1951 *
1952 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1953 * "'" ([^%&'] | PEReference | Reference)* "'"
1954 *
1955 * Returns the EntityValue parsed with reference substitued or NULL
1956 */
1957
1958xmlChar *
1959xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1960 xmlChar *buf = NULL;
1961 int len = 0;
1962 int size = XML_PARSER_BUFFER_SIZE;
1963 int c, l;
1964 xmlChar stop;
1965 xmlChar *ret = NULL;
1966 const xmlChar *cur = NULL;
1967 xmlParserInputPtr input;
1968
1969 if (RAW == '"') stop = '"';
1970 else if (RAW == '\'') stop = '\'';
1971 else {
1972 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1974 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1975 ctxt->wellFormed = 0;
1976 ctxt->disableSAX = 1;
1977 return(NULL);
1978 }
1979 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1980 if (buf == NULL) {
1981 xmlGenericError(xmlGenericErrorContext,
1982 "malloc of %d byte failed\n", size);
1983 return(NULL);
1984 }
1985
1986 /*
1987 * The content of the entity definition is copied in a buffer.
1988 */
1989
1990 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1991 input = ctxt->input;
1992 GROW;
1993 NEXT;
1994 c = CUR_CHAR(l);
1995 /*
1996 * NOTE: 4.4.5 Included in Literal
1997 * When a parameter entity reference appears in a literal entity
1998 * value, ... a single or double quote character in the replacement
1999 * text is always treated as a normal data character and will not
2000 * terminate the literal.
2001 * In practice it means we stop the loop only when back at parsing
2002 * the initial entity and the quote is found
2003 */
2004 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2005 (ctxt->input != input))) {
2006 if (len + 5 >= size) {
2007 size *= 2;
2008 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2009 if (buf == NULL) {
2010 xmlGenericError(xmlGenericErrorContext,
2011 "realloc of %d byte failed\n", size);
2012 return(NULL);
2013 }
2014 }
2015 COPY_BUF(l,buf,len,c);
2016 NEXTL(l);
2017 /*
2018 * Pop-up of finished entities.
2019 */
2020 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2021 xmlPopInput(ctxt);
2022
2023 GROW;
2024 c = CUR_CHAR(l);
2025 if (c == 0) {
2026 GROW;
2027 c = CUR_CHAR(l);
2028 }
2029 }
2030 buf[len] = 0;
2031
2032 /*
2033 * Raise problem w.r.t. '&' and '%' being used in non-entities
2034 * reference constructs. Note Charref will be handled in
2035 * xmlStringDecodeEntities()
2036 */
2037 cur = buf;
2038 while (*cur != 0) { /* non input consuming */
2039 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2040 xmlChar *name;
2041 xmlChar tmp = *cur;
2042
2043 cur++;
2044 name = xmlParseStringName(ctxt, &cur);
2045 if ((name == NULL) || (*cur != ';')) {
2046 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2048 ctxt->sax->error(ctxt->userData,
2049 "EntityValue: '%c' forbidden except for entities references\n",
2050 tmp);
2051 ctxt->wellFormed = 0;
2052 ctxt->disableSAX = 1;
2053 }
2054 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2055 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2057 ctxt->sax->error(ctxt->userData,
2058 "EntityValue: PEReferences forbidden in internal subset\n",
2059 tmp);
2060 ctxt->wellFormed = 0;
2061 ctxt->disableSAX = 1;
2062 }
2063 if (name != NULL)
2064 xmlFree(name);
2065 }
2066 cur++;
2067 }
2068
2069 /*
2070 * Then PEReference entities are substituted.
2071 */
2072 if (c != stop) {
2073 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2075 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2076 ctxt->wellFormed = 0;
2077 ctxt->disableSAX = 1;
2078 xmlFree(buf);
2079 } else {
2080 NEXT;
2081 /*
2082 * NOTE: 4.4.7 Bypassed
2083 * When a general entity reference appears in the EntityValue in
2084 * an entity declaration, it is bypassed and left as is.
2085 * so XML_SUBSTITUTE_REF is not set here.
2086 */
2087 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2088 0, 0, 0);
2089 if (orig != NULL)
2090 *orig = buf;
2091 else
2092 xmlFree(buf);
2093 }
2094
2095 return(ret);
2096}
2097
2098/**
2099 * xmlParseAttValue:
2100 * @ctxt: an XML parser context
2101 *
2102 * parse a value for an attribute
2103 * Note: the parser won't do substitution of entities here, this
2104 * will be handled later in xmlStringGetNodeList
2105 *
2106 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2107 * "'" ([^<&'] | Reference)* "'"
2108 *
2109 * 3.3.3 Attribute-Value Normalization:
2110 * Before the value of an attribute is passed to the application or
2111 * checked for validity, the XML processor must normalize it as follows:
2112 * - a character reference is processed by appending the referenced
2113 * character to the attribute value
2114 * - an entity reference is processed by recursively processing the
2115 * replacement text of the entity
2116 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2117 * appending #x20 to the normalized value, except that only a single
2118 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2119 * parsed entity or the literal entity value of an internal parsed entity
2120 * - other characters are processed by appending them to the normalized value
2121 * If the declared value is not CDATA, then the XML processor must further
2122 * process the normalized attribute value by discarding any leading and
2123 * trailing space (#x20) characters, and by replacing sequences of space
2124 * (#x20) characters by a single space (#x20) character.
2125 * All attributes for which no declaration has been read should be treated
2126 * by a non-validating parser as if declared CDATA.
2127 *
2128 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2129 */
2130
2131xmlChar *
2132xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2133 xmlChar limit = 0;
2134 xmlChar *buf = NULL;
2135 int len = 0;
2136 int buf_size = 0;
2137 int c, l;
2138 xmlChar *current = NULL;
2139 xmlEntityPtr ent;
2140
2141
2142 SHRINK;
2143 if (NXT(0) == '"') {
2144 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2145 limit = '"';
2146 NEXT;
2147 } else if (NXT(0) == '\'') {
2148 limit = '\'';
2149 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2150 NEXT;
2151 } else {
2152 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2154 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2155 ctxt->wellFormed = 0;
2156 ctxt->disableSAX = 1;
2157 return(NULL);
2158 }
2159
2160 /*
2161 * allocate a translation buffer.
2162 */
2163 buf_size = XML_PARSER_BUFFER_SIZE;
2164 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2165 if (buf == NULL) {
2166 perror("xmlParseAttValue: malloc failed");
2167 return(NULL);
2168 }
2169
2170 /*
2171 * Ok loop until we reach one of the ending char or a size limit.
2172 */
2173 c = CUR_CHAR(l);
2174 while (((NXT(0) != limit) && /* checked */
2175 (c != '<')) || (ctxt->token != 0)) {
2176 if (c == 0) break;
2177 if (ctxt->token == '&') {
2178 /*
2179 * The reparsing will be done in xmlStringGetNodeList()
2180 * called by the attribute() function in SAX.c
2181 */
2182 static xmlChar buffer[6] = "&#38;";
2183
2184 if (len > buf_size - 10) {
2185 growBuffer(buf);
2186 }
2187 current = &buffer[0];
2188 while (*current != 0) { /* non input consuming */
2189 buf[len++] = *current++;
2190 }
2191 ctxt->token = 0;
2192 } else if (c == '&') {
2193 if (NXT(1) == '#') {
2194 int val = xmlParseCharRef(ctxt);
2195 if (val == '&') {
2196 /*
2197 * The reparsing will be done in xmlStringGetNodeList()
2198 * called by the attribute() function in SAX.c
2199 */
2200 static xmlChar buffer[6] = "&#38;";
2201
2202 if (len > buf_size - 10) {
2203 growBuffer(buf);
2204 }
2205 current = &buffer[0];
2206 while (*current != 0) { /* non input consuming */
2207 buf[len++] = *current++;
2208 }
2209 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002210 if (len > buf_size - 10) {
2211 growBuffer(buf);
2212 }
Owen Taylor3473f882001-02-23 17:55:21 +00002213 len += xmlCopyChar(0, &buf[len], val);
2214 }
2215 } else {
2216 ent = xmlParseEntityRef(ctxt);
2217 if ((ent != NULL) &&
2218 (ctxt->replaceEntities != 0)) {
2219 xmlChar *rep;
2220
2221 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2222 rep = xmlStringDecodeEntities(ctxt, ent->content,
2223 XML_SUBSTITUTE_REF, 0, 0, 0);
2224 if (rep != NULL) {
2225 current = rep;
2226 while (*current != 0) { /* non input consuming */
2227 buf[len++] = *current++;
2228 if (len > buf_size - 10) {
2229 growBuffer(buf);
2230 }
2231 }
2232 xmlFree(rep);
2233 }
2234 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002235 if (len > buf_size - 10) {
2236 growBuffer(buf);
2237 }
Owen Taylor3473f882001-02-23 17:55:21 +00002238 if (ent->content != NULL)
2239 buf[len++] = ent->content[0];
2240 }
2241 } else if (ent != NULL) {
2242 int i = xmlStrlen(ent->name);
2243 const xmlChar *cur = ent->name;
2244
2245 /*
2246 * This may look absurd but is needed to detect
2247 * entities problems
2248 */
2249 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2250 (ent->content != NULL)) {
2251 xmlChar *rep;
2252 rep = xmlStringDecodeEntities(ctxt, ent->content,
2253 XML_SUBSTITUTE_REF, 0, 0, 0);
2254 if (rep != NULL)
2255 xmlFree(rep);
2256 }
2257
2258 /*
2259 * Just output the reference
2260 */
2261 buf[len++] = '&';
2262 if (len > buf_size - i - 10) {
2263 growBuffer(buf);
2264 }
2265 for (;i > 0;i--)
2266 buf[len++] = *cur++;
2267 buf[len++] = ';';
2268 }
2269 }
2270 } else {
2271 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2272 COPY_BUF(l,buf,len,0x20);
2273 if (len > buf_size - 10) {
2274 growBuffer(buf);
2275 }
2276 } else {
2277 COPY_BUF(l,buf,len,c);
2278 if (len > buf_size - 10) {
2279 growBuffer(buf);
2280 }
2281 }
2282 NEXTL(l);
2283 }
2284 GROW;
2285 c = CUR_CHAR(l);
2286 }
2287 buf[len++] = 0;
2288 if (RAW == '<') {
2289 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2290 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2291 ctxt->sax->error(ctxt->userData,
2292 "Unescaped '<' not allowed in attributes values\n");
2293 ctxt->wellFormed = 0;
2294 ctxt->disableSAX = 1;
2295 } else if (RAW != limit) {
2296 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2298 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2299 ctxt->wellFormed = 0;
2300 ctxt->disableSAX = 1;
2301 } else
2302 NEXT;
2303 return(buf);
2304}
2305
2306/**
2307 * xmlParseSystemLiteral:
2308 * @ctxt: an XML parser context
2309 *
2310 * parse an XML Literal
2311 *
2312 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2313 *
2314 * Returns the SystemLiteral parsed or NULL
2315 */
2316
2317xmlChar *
2318xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2319 xmlChar *buf = NULL;
2320 int len = 0;
2321 int size = XML_PARSER_BUFFER_SIZE;
2322 int cur, l;
2323 xmlChar stop;
2324 int state = ctxt->instate;
2325 int count = 0;
2326
2327 SHRINK;
2328 if (RAW == '"') {
2329 NEXT;
2330 stop = '"';
2331 } else if (RAW == '\'') {
2332 NEXT;
2333 stop = '\'';
2334 } else {
2335 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2337 ctxt->sax->error(ctxt->userData,
2338 "SystemLiteral \" or ' expected\n");
2339 ctxt->wellFormed = 0;
2340 ctxt->disableSAX = 1;
2341 return(NULL);
2342 }
2343
2344 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2345 if (buf == NULL) {
2346 xmlGenericError(xmlGenericErrorContext,
2347 "malloc of %d byte failed\n", size);
2348 return(NULL);
2349 }
2350 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2351 cur = CUR_CHAR(l);
2352 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2353 if (len + 5 >= size) {
2354 size *= 2;
2355 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2356 if (buf == NULL) {
2357 xmlGenericError(xmlGenericErrorContext,
2358 "realloc of %d byte failed\n", size);
2359 ctxt->instate = (xmlParserInputState) state;
2360 return(NULL);
2361 }
2362 }
2363 count++;
2364 if (count > 50) {
2365 GROW;
2366 count = 0;
2367 }
2368 COPY_BUF(l,buf,len,cur);
2369 NEXTL(l);
2370 cur = CUR_CHAR(l);
2371 if (cur == 0) {
2372 GROW;
2373 SHRINK;
2374 cur = CUR_CHAR(l);
2375 }
2376 }
2377 buf[len] = 0;
2378 ctxt->instate = (xmlParserInputState) state;
2379 if (!IS_CHAR(cur)) {
2380 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2382 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2383 ctxt->wellFormed = 0;
2384 ctxt->disableSAX = 1;
2385 } else {
2386 NEXT;
2387 }
2388 return(buf);
2389}
2390
2391/**
2392 * xmlParsePubidLiteral:
2393 * @ctxt: an XML parser context
2394 *
2395 * parse an XML public literal
2396 *
2397 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2398 *
2399 * Returns the PubidLiteral parsed or NULL.
2400 */
2401
2402xmlChar *
2403xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2404 xmlChar *buf = NULL;
2405 int len = 0;
2406 int size = XML_PARSER_BUFFER_SIZE;
2407 xmlChar cur;
2408 xmlChar stop;
2409 int count = 0;
2410
2411 SHRINK;
2412 if (RAW == '"') {
2413 NEXT;
2414 stop = '"';
2415 } else if (RAW == '\'') {
2416 NEXT;
2417 stop = '\'';
2418 } else {
2419 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2420 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2421 ctxt->sax->error(ctxt->userData,
2422 "SystemLiteral \" or ' expected\n");
2423 ctxt->wellFormed = 0;
2424 ctxt->disableSAX = 1;
2425 return(NULL);
2426 }
2427 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2428 if (buf == NULL) {
2429 xmlGenericError(xmlGenericErrorContext,
2430 "malloc of %d byte failed\n", size);
2431 return(NULL);
2432 }
2433 cur = CUR;
2434 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2435 if (len + 1 >= size) {
2436 size *= 2;
2437 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2438 if (buf == NULL) {
2439 xmlGenericError(xmlGenericErrorContext,
2440 "realloc of %d byte failed\n", size);
2441 return(NULL);
2442 }
2443 }
2444 buf[len++] = cur;
2445 count++;
2446 if (count > 50) {
2447 GROW;
2448 count = 0;
2449 }
2450 NEXT;
2451 cur = CUR;
2452 if (cur == 0) {
2453 GROW;
2454 SHRINK;
2455 cur = CUR;
2456 }
2457 }
2458 buf[len] = 0;
2459 if (cur != stop) {
2460 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2462 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2463 ctxt->wellFormed = 0;
2464 ctxt->disableSAX = 1;
2465 } else {
2466 NEXT;
2467 }
2468 return(buf);
2469}
2470
Daniel Veillard48b2f892001-02-25 16:11:03 +00002471void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002472/**
2473 * xmlParseCharData:
2474 * @ctxt: an XML parser context
2475 * @cdata: int indicating whether we are within a CDATA section
2476 *
2477 * parse a CharData section.
2478 * if we are within a CDATA section ']]>' marks an end of section.
2479 *
2480 * The right angle bracket (>) may be represented using the string "&gt;",
2481 * and must, for compatibility, be escaped using "&gt;" or a character
2482 * reference when it appears in the string "]]>" in content, when that
2483 * string is not marking the end of a CDATA section.
2484 *
2485 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2486 */
2487
2488void
2489xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002490 const xmlChar *in;
2491 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002492 int line = ctxt->input->line;
2493 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002494
2495 SHRINK;
2496 GROW;
2497 /*
2498 * Accelerated common case where input don't need to be
2499 * modified before passing it to the handler.
2500 */
2501 if ((ctxt->token == 0) && (!cdata)) {
2502 in = ctxt->input->cur;
2503 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002504get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002505 while (((*in >= 0x20) && (*in != '<') &&
2506 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2507 in++;
2508 if (*in == 0xA) {
2509 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002510 in++;
2511 while (*in == 0xA) {
2512 ctxt->input->line++;
2513 in++;
2514 }
2515 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002516 }
2517 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002518 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002519 if (IS_BLANK(*ctxt->input->cur)) {
2520 const xmlChar *tmp = ctxt->input->cur;
2521 ctxt->input->cur = in;
2522 if (areBlanks(ctxt, tmp, nbchar)) {
2523 if (ctxt->sax->ignorableWhitespace != NULL)
2524 ctxt->sax->ignorableWhitespace(ctxt->userData,
2525 tmp, nbchar);
2526 } else {
2527 if (ctxt->sax->characters != NULL)
2528 ctxt->sax->characters(ctxt->userData,
2529 tmp, nbchar);
2530 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002531 line = ctxt->input->line;
2532 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002533 } else {
2534 if (ctxt->sax->characters != NULL)
2535 ctxt->sax->characters(ctxt->userData,
2536 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002537 line = ctxt->input->line;
2538 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002539 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002540 }
2541 ctxt->input->cur = in;
2542 if (*in == 0xD) {
2543 in++;
2544 if (*in == 0xA) {
2545 ctxt->input->cur = in;
2546 in++;
2547 ctxt->input->line++;
2548 continue; /* while */
2549 }
2550 in--;
2551 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002552 if (*in == '<') {
2553 return;
2554 }
2555 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002556 return;
2557 }
2558 SHRINK;
2559 GROW;
2560 in = ctxt->input->cur;
2561 } while ((*in >= 0x20) && (*in <= 0x7F));
2562 nbchar = 0;
2563 }
Daniel Veillard50582112001-03-26 22:52:16 +00002564 ctxt->input->line = line;
2565 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002566 xmlParseCharDataComplex(ctxt, cdata);
2567}
2568
2569void
2570xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002571 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2572 int nbchar = 0;
2573 int cur, l;
2574 int count = 0;
2575
2576 SHRINK;
2577 GROW;
2578 cur = CUR_CHAR(l);
2579 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2580 ((cur != '&') || (ctxt->token == '&')) &&
2581 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2582 if ((cur == ']') && (NXT(1) == ']') &&
2583 (NXT(2) == '>')) {
2584 if (cdata) break;
2585 else {
2586 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2588 ctxt->sax->error(ctxt->userData,
2589 "Sequence ']]>' not allowed in content\n");
2590 /* Should this be relaxed ??? I see a "must here */
2591 ctxt->wellFormed = 0;
2592 ctxt->disableSAX = 1;
2593 }
2594 }
2595 COPY_BUF(l,buf,nbchar,cur);
2596 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2597 /*
2598 * Ok the segment is to be consumed as chars.
2599 */
2600 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2601 if (areBlanks(ctxt, buf, nbchar)) {
2602 if (ctxt->sax->ignorableWhitespace != NULL)
2603 ctxt->sax->ignorableWhitespace(ctxt->userData,
2604 buf, nbchar);
2605 } else {
2606 if (ctxt->sax->characters != NULL)
2607 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2608 }
2609 }
2610 nbchar = 0;
2611 }
2612 count++;
2613 if (count > 50) {
2614 GROW;
2615 count = 0;
2616 }
2617 NEXTL(l);
2618 cur = CUR_CHAR(l);
2619 }
2620 if (nbchar != 0) {
2621 /*
2622 * Ok the segment is to be consumed as chars.
2623 */
2624 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2625 if (areBlanks(ctxt, buf, nbchar)) {
2626 if (ctxt->sax->ignorableWhitespace != NULL)
2627 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2628 } else {
2629 if (ctxt->sax->characters != NULL)
2630 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2631 }
2632 }
2633 }
2634}
2635
2636/**
2637 * xmlParseExternalID:
2638 * @ctxt: an XML parser context
2639 * @publicID: a xmlChar** receiving PubidLiteral
2640 * @strict: indicate whether we should restrict parsing to only
2641 * production [75], see NOTE below
2642 *
2643 * Parse an External ID or a Public ID
2644 *
2645 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2646 * 'PUBLIC' S PubidLiteral S SystemLiteral
2647 *
2648 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2649 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2650 *
2651 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2652 *
2653 * Returns the function returns SystemLiteral and in the second
2654 * case publicID receives PubidLiteral, is strict is off
2655 * it is possible to return NULL and have publicID set.
2656 */
2657
2658xmlChar *
2659xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2660 xmlChar *URI = NULL;
2661
2662 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002663
2664 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002665 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2666 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2667 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2668 SKIP(6);
2669 if (!IS_BLANK(CUR)) {
2670 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2672 ctxt->sax->error(ctxt->userData,
2673 "Space required after 'SYSTEM'\n");
2674 ctxt->wellFormed = 0;
2675 ctxt->disableSAX = 1;
2676 }
2677 SKIP_BLANKS;
2678 URI = xmlParseSystemLiteral(ctxt);
2679 if (URI == NULL) {
2680 ctxt->errNo = XML_ERR_URI_REQUIRED;
2681 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2682 ctxt->sax->error(ctxt->userData,
2683 "xmlParseExternalID: SYSTEM, no URI\n");
2684 ctxt->wellFormed = 0;
2685 ctxt->disableSAX = 1;
2686 }
2687 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2688 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2689 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2690 SKIP(6);
2691 if (!IS_BLANK(CUR)) {
2692 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2694 ctxt->sax->error(ctxt->userData,
2695 "Space required after 'PUBLIC'\n");
2696 ctxt->wellFormed = 0;
2697 ctxt->disableSAX = 1;
2698 }
2699 SKIP_BLANKS;
2700 *publicID = xmlParsePubidLiteral(ctxt);
2701 if (*publicID == NULL) {
2702 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2704 ctxt->sax->error(ctxt->userData,
2705 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2706 ctxt->wellFormed = 0;
2707 ctxt->disableSAX = 1;
2708 }
2709 if (strict) {
2710 /*
2711 * We don't handle [83] so "S SystemLiteral" is required.
2712 */
2713 if (!IS_BLANK(CUR)) {
2714 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2716 ctxt->sax->error(ctxt->userData,
2717 "Space required after the Public Identifier\n");
2718 ctxt->wellFormed = 0;
2719 ctxt->disableSAX = 1;
2720 }
2721 } else {
2722 /*
2723 * We handle [83] so we return immediately, if
2724 * "S SystemLiteral" is not detected. From a purely parsing
2725 * point of view that's a nice mess.
2726 */
2727 const xmlChar *ptr;
2728 GROW;
2729
2730 ptr = CUR_PTR;
2731 if (!IS_BLANK(*ptr)) return(NULL);
2732
2733 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2734 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2735 }
2736 SKIP_BLANKS;
2737 URI = xmlParseSystemLiteral(ctxt);
2738 if (URI == NULL) {
2739 ctxt->errNo = XML_ERR_URI_REQUIRED;
2740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2741 ctxt->sax->error(ctxt->userData,
2742 "xmlParseExternalID: PUBLIC, no URI\n");
2743 ctxt->wellFormed = 0;
2744 ctxt->disableSAX = 1;
2745 }
2746 }
2747 return(URI);
2748}
2749
2750/**
2751 * xmlParseComment:
2752 * @ctxt: an XML parser context
2753 *
2754 * Skip an XML (SGML) comment <!-- .... -->
2755 * The spec says that "For compatibility, the string "--" (double-hyphen)
2756 * must not occur within comments. "
2757 *
2758 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2759 */
2760void
2761xmlParseComment(xmlParserCtxtPtr ctxt) {
2762 xmlChar *buf = NULL;
2763 int len;
2764 int size = XML_PARSER_BUFFER_SIZE;
2765 int q, ql;
2766 int r, rl;
2767 int cur, l;
2768 xmlParserInputState state;
2769 xmlParserInputPtr input = ctxt->input;
2770 int count = 0;
2771
2772 /*
2773 * Check that there is a comment right here.
2774 */
2775 if ((RAW != '<') || (NXT(1) != '!') ||
2776 (NXT(2) != '-') || (NXT(3) != '-')) return;
2777
2778 state = ctxt->instate;
2779 ctxt->instate = XML_PARSER_COMMENT;
2780 SHRINK;
2781 SKIP(4);
2782 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2783 if (buf == NULL) {
2784 xmlGenericError(xmlGenericErrorContext,
2785 "malloc of %d byte failed\n", size);
2786 ctxt->instate = state;
2787 return;
2788 }
2789 q = CUR_CHAR(ql);
2790 NEXTL(ql);
2791 r = CUR_CHAR(rl);
2792 NEXTL(rl);
2793 cur = CUR_CHAR(l);
2794 len = 0;
2795 while (IS_CHAR(cur) && /* checked */
2796 ((cur != '>') ||
2797 (r != '-') || (q != '-'))) {
2798 if ((r == '-') && (q == '-') && (len > 1)) {
2799 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2801 ctxt->sax->error(ctxt->userData,
2802 "Comment must not contain '--' (double-hyphen)`\n");
2803 ctxt->wellFormed = 0;
2804 ctxt->disableSAX = 1;
2805 }
2806 if (len + 5 >= size) {
2807 size *= 2;
2808 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2809 if (buf == NULL) {
2810 xmlGenericError(xmlGenericErrorContext,
2811 "realloc of %d byte failed\n", size);
2812 ctxt->instate = state;
2813 return;
2814 }
2815 }
2816 COPY_BUF(ql,buf,len,q);
2817 q = r;
2818 ql = rl;
2819 r = cur;
2820 rl = l;
2821
2822 count++;
2823 if (count > 50) {
2824 GROW;
2825 count = 0;
2826 }
2827 NEXTL(l);
2828 cur = CUR_CHAR(l);
2829 if (cur == 0) {
2830 SHRINK;
2831 GROW;
2832 cur = CUR_CHAR(l);
2833 }
2834 }
2835 buf[len] = 0;
2836 if (!IS_CHAR(cur)) {
2837 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2839 ctxt->sax->error(ctxt->userData,
2840 "Comment not terminated \n<!--%.50s\n", buf);
2841 ctxt->wellFormed = 0;
2842 ctxt->disableSAX = 1;
2843 xmlFree(buf);
2844 } else {
2845 if (input != ctxt->input) {
2846 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2848 ctxt->sax->error(ctxt->userData,
2849"Comment doesn't start and stop in the same entity\n");
2850 ctxt->wellFormed = 0;
2851 ctxt->disableSAX = 1;
2852 }
2853 NEXT;
2854 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2855 (!ctxt->disableSAX))
2856 ctxt->sax->comment(ctxt->userData, buf);
2857 xmlFree(buf);
2858 }
2859 ctxt->instate = state;
2860}
2861
2862/**
2863 * xmlParsePITarget:
2864 * @ctxt: an XML parser context
2865 *
2866 * parse the name of a PI
2867 *
2868 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2869 *
2870 * Returns the PITarget name or NULL
2871 */
2872
2873xmlChar *
2874xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2875 xmlChar *name;
2876
2877 name = xmlParseName(ctxt);
2878 if ((name != NULL) &&
2879 ((name[0] == 'x') || (name[0] == 'X')) &&
2880 ((name[1] == 'm') || (name[1] == 'M')) &&
2881 ((name[2] == 'l') || (name[2] == 'L'))) {
2882 int i;
2883 if ((name[0] == 'x') && (name[1] == 'm') &&
2884 (name[2] == 'l') && (name[3] == 0)) {
2885 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2887 ctxt->sax->error(ctxt->userData,
2888 "XML declaration allowed only at the start of the document\n");
2889 ctxt->wellFormed = 0;
2890 ctxt->disableSAX = 1;
2891 return(name);
2892 } else if (name[3] == 0) {
2893 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2895 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2896 ctxt->wellFormed = 0;
2897 ctxt->disableSAX = 1;
2898 return(name);
2899 }
2900 for (i = 0;;i++) {
2901 if (xmlW3CPIs[i] == NULL) break;
2902 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2903 return(name);
2904 }
2905 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2906 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2907 ctxt->sax->warning(ctxt->userData,
2908 "xmlParsePItarget: invalid name prefix 'xml'\n");
2909 }
2910 }
2911 return(name);
2912}
2913
2914/**
2915 * xmlParsePI:
2916 * @ctxt: an XML parser context
2917 *
2918 * parse an XML Processing Instruction.
2919 *
2920 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2921 *
2922 * The processing is transfered to SAX once parsed.
2923 */
2924
2925void
2926xmlParsePI(xmlParserCtxtPtr ctxt) {
2927 xmlChar *buf = NULL;
2928 int len = 0;
2929 int size = XML_PARSER_BUFFER_SIZE;
2930 int cur, l;
2931 xmlChar *target;
2932 xmlParserInputState state;
2933 int count = 0;
2934
2935 if ((RAW == '<') && (NXT(1) == '?')) {
2936 xmlParserInputPtr input = ctxt->input;
2937 state = ctxt->instate;
2938 ctxt->instate = XML_PARSER_PI;
2939 /*
2940 * this is a Processing Instruction.
2941 */
2942 SKIP(2);
2943 SHRINK;
2944
2945 /*
2946 * Parse the target name and check for special support like
2947 * namespace.
2948 */
2949 target = xmlParsePITarget(ctxt);
2950 if (target != NULL) {
2951 if ((RAW == '?') && (NXT(1) == '>')) {
2952 if (input != ctxt->input) {
2953 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2955 ctxt->sax->error(ctxt->userData,
2956 "PI declaration doesn't start and stop in the same entity\n");
2957 ctxt->wellFormed = 0;
2958 ctxt->disableSAX = 1;
2959 }
2960 SKIP(2);
2961
2962 /*
2963 * SAX: PI detected.
2964 */
2965 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2966 (ctxt->sax->processingInstruction != NULL))
2967 ctxt->sax->processingInstruction(ctxt->userData,
2968 target, NULL);
2969 ctxt->instate = state;
2970 xmlFree(target);
2971 return;
2972 }
2973 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2974 if (buf == NULL) {
2975 xmlGenericError(xmlGenericErrorContext,
2976 "malloc of %d byte failed\n", size);
2977 ctxt->instate = state;
2978 return;
2979 }
2980 cur = CUR;
2981 if (!IS_BLANK(cur)) {
2982 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2984 ctxt->sax->error(ctxt->userData,
2985 "xmlParsePI: PI %s space expected\n", target);
2986 ctxt->wellFormed = 0;
2987 ctxt->disableSAX = 1;
2988 }
2989 SKIP_BLANKS;
2990 cur = CUR_CHAR(l);
2991 while (IS_CHAR(cur) && /* checked */
2992 ((cur != '?') || (NXT(1) != '>'))) {
2993 if (len + 5 >= size) {
2994 size *= 2;
2995 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2996 if (buf == NULL) {
2997 xmlGenericError(xmlGenericErrorContext,
2998 "realloc of %d byte failed\n", size);
2999 ctxt->instate = state;
3000 return;
3001 }
3002 }
3003 count++;
3004 if (count > 50) {
3005 GROW;
3006 count = 0;
3007 }
3008 COPY_BUF(l,buf,len,cur);
3009 NEXTL(l);
3010 cur = CUR_CHAR(l);
3011 if (cur == 0) {
3012 SHRINK;
3013 GROW;
3014 cur = CUR_CHAR(l);
3015 }
3016 }
3017 buf[len] = 0;
3018 if (cur != '?') {
3019 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3021 ctxt->sax->error(ctxt->userData,
3022 "xmlParsePI: PI %s never end ...\n", target);
3023 ctxt->wellFormed = 0;
3024 ctxt->disableSAX = 1;
3025 } else {
3026 if (input != ctxt->input) {
3027 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3029 ctxt->sax->error(ctxt->userData,
3030 "PI declaration doesn't start and stop in the same entity\n");
3031 ctxt->wellFormed = 0;
3032 ctxt->disableSAX = 1;
3033 }
3034 SKIP(2);
3035
3036 /*
3037 * SAX: PI detected.
3038 */
3039 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3040 (ctxt->sax->processingInstruction != NULL))
3041 ctxt->sax->processingInstruction(ctxt->userData,
3042 target, buf);
3043 }
3044 xmlFree(buf);
3045 xmlFree(target);
3046 } else {
3047 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3049 ctxt->sax->error(ctxt->userData,
3050 "xmlParsePI : no target name\n");
3051 ctxt->wellFormed = 0;
3052 ctxt->disableSAX = 1;
3053 }
3054 ctxt->instate = state;
3055 }
3056}
3057
3058/**
3059 * xmlParseNotationDecl:
3060 * @ctxt: an XML parser context
3061 *
3062 * parse a notation declaration
3063 *
3064 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3065 *
3066 * Hence there is actually 3 choices:
3067 * 'PUBLIC' S PubidLiteral
3068 * 'PUBLIC' S PubidLiteral S SystemLiteral
3069 * and 'SYSTEM' S SystemLiteral
3070 *
3071 * See the NOTE on xmlParseExternalID().
3072 */
3073
3074void
3075xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3076 xmlChar *name;
3077 xmlChar *Pubid;
3078 xmlChar *Systemid;
3079
3080 if ((RAW == '<') && (NXT(1) == '!') &&
3081 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3082 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3083 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3084 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3085 xmlParserInputPtr input = ctxt->input;
3086 SHRINK;
3087 SKIP(10);
3088 if (!IS_BLANK(CUR)) {
3089 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3091 ctxt->sax->error(ctxt->userData,
3092 "Space required after '<!NOTATION'\n");
3093 ctxt->wellFormed = 0;
3094 ctxt->disableSAX = 1;
3095 return;
3096 }
3097 SKIP_BLANKS;
3098
Daniel Veillard76d66f42001-05-16 21:05:17 +00003099 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003100 if (name == NULL) {
3101 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3103 ctxt->sax->error(ctxt->userData,
3104 "NOTATION: Name expected here\n");
3105 ctxt->wellFormed = 0;
3106 ctxt->disableSAX = 1;
3107 return;
3108 }
3109 if (!IS_BLANK(CUR)) {
3110 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3112 ctxt->sax->error(ctxt->userData,
3113 "Space required after the NOTATION name'\n");
3114 ctxt->wellFormed = 0;
3115 ctxt->disableSAX = 1;
3116 return;
3117 }
3118 SKIP_BLANKS;
3119
3120 /*
3121 * Parse the IDs.
3122 */
3123 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3124 SKIP_BLANKS;
3125
3126 if (RAW == '>') {
3127 if (input != ctxt->input) {
3128 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3130 ctxt->sax->error(ctxt->userData,
3131"Notation declaration doesn't start and stop in the same entity\n");
3132 ctxt->wellFormed = 0;
3133 ctxt->disableSAX = 1;
3134 }
3135 NEXT;
3136 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3137 (ctxt->sax->notationDecl != NULL))
3138 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3139 } else {
3140 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3142 ctxt->sax->error(ctxt->userData,
3143 "'>' required to close NOTATION declaration\n");
3144 ctxt->wellFormed = 0;
3145 ctxt->disableSAX = 1;
3146 }
3147 xmlFree(name);
3148 if (Systemid != NULL) xmlFree(Systemid);
3149 if (Pubid != NULL) xmlFree(Pubid);
3150 }
3151}
3152
3153/**
3154 * xmlParseEntityDecl:
3155 * @ctxt: an XML parser context
3156 *
3157 * parse <!ENTITY declarations
3158 *
3159 * [70] EntityDecl ::= GEDecl | PEDecl
3160 *
3161 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3162 *
3163 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3164 *
3165 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3166 *
3167 * [74] PEDef ::= EntityValue | ExternalID
3168 *
3169 * [76] NDataDecl ::= S 'NDATA' S Name
3170 *
3171 * [ VC: Notation Declared ]
3172 * The Name must match the declared name of a notation.
3173 */
3174
3175void
3176xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3177 xmlChar *name = NULL;
3178 xmlChar *value = NULL;
3179 xmlChar *URI = NULL, *literal = NULL;
3180 xmlChar *ndata = NULL;
3181 int isParameter = 0;
3182 xmlChar *orig = NULL;
3183
3184 GROW;
3185 if ((RAW == '<') && (NXT(1) == '!') &&
3186 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3187 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3188 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3189 xmlParserInputPtr input = ctxt->input;
3190 ctxt->instate = XML_PARSER_ENTITY_DECL;
3191 SHRINK;
3192 SKIP(8);
3193 if (!IS_BLANK(CUR)) {
3194 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3196 ctxt->sax->error(ctxt->userData,
3197 "Space required after '<!ENTITY'\n");
3198 ctxt->wellFormed = 0;
3199 ctxt->disableSAX = 1;
3200 }
3201 SKIP_BLANKS;
3202
3203 if (RAW == '%') {
3204 NEXT;
3205 if (!IS_BLANK(CUR)) {
3206 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3208 ctxt->sax->error(ctxt->userData,
3209 "Space required after '%'\n");
3210 ctxt->wellFormed = 0;
3211 ctxt->disableSAX = 1;
3212 }
3213 SKIP_BLANKS;
3214 isParameter = 1;
3215 }
3216
Daniel Veillard76d66f42001-05-16 21:05:17 +00003217 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003218 if (name == NULL) {
3219 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3220 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3221 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3222 ctxt->wellFormed = 0;
3223 ctxt->disableSAX = 1;
3224 return;
3225 }
3226 if (!IS_BLANK(CUR)) {
3227 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3229 ctxt->sax->error(ctxt->userData,
3230 "Space required after the entity name\n");
3231 ctxt->wellFormed = 0;
3232 ctxt->disableSAX = 1;
3233 }
3234 SKIP_BLANKS;
3235
3236 /*
3237 * handle the various case of definitions...
3238 */
3239 if (isParameter) {
3240 if ((RAW == '"') || (RAW == '\'')) {
3241 value = xmlParseEntityValue(ctxt, &orig);
3242 if (value) {
3243 if ((ctxt->sax != NULL) &&
3244 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3245 ctxt->sax->entityDecl(ctxt->userData, name,
3246 XML_INTERNAL_PARAMETER_ENTITY,
3247 NULL, NULL, value);
3248 }
3249 } else {
3250 URI = xmlParseExternalID(ctxt, &literal, 1);
3251 if ((URI == NULL) && (literal == NULL)) {
3252 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3254 ctxt->sax->error(ctxt->userData,
3255 "Entity value required\n");
3256 ctxt->wellFormed = 0;
3257 ctxt->disableSAX = 1;
3258 }
3259 if (URI) {
3260 xmlURIPtr uri;
3261
3262 uri = xmlParseURI((const char *) URI);
3263 if (uri == NULL) {
3264 ctxt->errNo = XML_ERR_INVALID_URI;
3265 if ((ctxt->sax != NULL) &&
3266 (!ctxt->disableSAX) &&
3267 (ctxt->sax->error != NULL))
3268 ctxt->sax->error(ctxt->userData,
3269 "Invalid URI: %s\n", URI);
3270 ctxt->wellFormed = 0;
3271 } else {
3272 if (uri->fragment != NULL) {
3273 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3274 if ((ctxt->sax != NULL) &&
3275 (!ctxt->disableSAX) &&
3276 (ctxt->sax->error != NULL))
3277 ctxt->sax->error(ctxt->userData,
3278 "Fragment not allowed: %s\n", URI);
3279 ctxt->wellFormed = 0;
3280 } else {
3281 if ((ctxt->sax != NULL) &&
3282 (!ctxt->disableSAX) &&
3283 (ctxt->sax->entityDecl != NULL))
3284 ctxt->sax->entityDecl(ctxt->userData, name,
3285 XML_EXTERNAL_PARAMETER_ENTITY,
3286 literal, URI, NULL);
3287 }
3288 xmlFreeURI(uri);
3289 }
3290 }
3291 }
3292 } else {
3293 if ((RAW == '"') || (RAW == '\'')) {
3294 value = xmlParseEntityValue(ctxt, &orig);
3295 if ((ctxt->sax != NULL) &&
3296 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3297 ctxt->sax->entityDecl(ctxt->userData, name,
3298 XML_INTERNAL_GENERAL_ENTITY,
3299 NULL, NULL, value);
3300 } else {
3301 URI = xmlParseExternalID(ctxt, &literal, 1);
3302 if ((URI == NULL) && (literal == NULL)) {
3303 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3305 ctxt->sax->error(ctxt->userData,
3306 "Entity value required\n");
3307 ctxt->wellFormed = 0;
3308 ctxt->disableSAX = 1;
3309 }
3310 if (URI) {
3311 xmlURIPtr uri;
3312
3313 uri = xmlParseURI((const char *)URI);
3314 if (uri == NULL) {
3315 ctxt->errNo = XML_ERR_INVALID_URI;
3316 if ((ctxt->sax != NULL) &&
3317 (!ctxt->disableSAX) &&
3318 (ctxt->sax->error != NULL))
3319 ctxt->sax->error(ctxt->userData,
3320 "Invalid URI: %s\n", URI);
3321 ctxt->wellFormed = 0;
3322 } else {
3323 if (uri->fragment != NULL) {
3324 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3325 if ((ctxt->sax != NULL) &&
3326 (!ctxt->disableSAX) &&
3327 (ctxt->sax->error != NULL))
3328 ctxt->sax->error(ctxt->userData,
3329 "Fragment not allowed: %s\n", URI);
3330 ctxt->wellFormed = 0;
3331 }
3332 xmlFreeURI(uri);
3333 }
3334 }
3335 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3336 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3338 ctxt->sax->error(ctxt->userData,
3339 "Space required before 'NDATA'\n");
3340 ctxt->wellFormed = 0;
3341 ctxt->disableSAX = 1;
3342 }
3343 SKIP_BLANKS;
3344 if ((RAW == 'N') && (NXT(1) == 'D') &&
3345 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3346 (NXT(4) == 'A')) {
3347 SKIP(5);
3348 if (!IS_BLANK(CUR)) {
3349 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3351 ctxt->sax->error(ctxt->userData,
3352 "Space required after 'NDATA'\n");
3353 ctxt->wellFormed = 0;
3354 ctxt->disableSAX = 1;
3355 }
3356 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003357 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003358 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3359 (ctxt->sax->unparsedEntityDecl != NULL))
3360 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3361 literal, URI, ndata);
3362 } else {
3363 if ((ctxt->sax != NULL) &&
3364 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3365 ctxt->sax->entityDecl(ctxt->userData, name,
3366 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3367 literal, URI, NULL);
3368 }
3369 }
3370 }
3371 SKIP_BLANKS;
3372 if (RAW != '>') {
3373 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3375 ctxt->sax->error(ctxt->userData,
3376 "xmlParseEntityDecl: entity %s not terminated\n", name);
3377 ctxt->wellFormed = 0;
3378 ctxt->disableSAX = 1;
3379 } else {
3380 if (input != ctxt->input) {
3381 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3383 ctxt->sax->error(ctxt->userData,
3384"Entity declaration doesn't start and stop in the same entity\n");
3385 ctxt->wellFormed = 0;
3386 ctxt->disableSAX = 1;
3387 }
3388 NEXT;
3389 }
3390 if (orig != NULL) {
3391 /*
3392 * Ugly mechanism to save the raw entity value.
3393 */
3394 xmlEntityPtr cur = NULL;
3395
3396 if (isParameter) {
3397 if ((ctxt->sax != NULL) &&
3398 (ctxt->sax->getParameterEntity != NULL))
3399 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3400 } else {
3401 if ((ctxt->sax != NULL) &&
3402 (ctxt->sax->getEntity != NULL))
3403 cur = ctxt->sax->getEntity(ctxt->userData, name);
3404 }
3405 if (cur != NULL) {
3406 if (cur->orig != NULL)
3407 xmlFree(orig);
3408 else
3409 cur->orig = orig;
3410 } else
3411 xmlFree(orig);
3412 }
3413 if (name != NULL) xmlFree(name);
3414 if (value != NULL) xmlFree(value);
3415 if (URI != NULL) xmlFree(URI);
3416 if (literal != NULL) xmlFree(literal);
3417 if (ndata != NULL) xmlFree(ndata);
3418 }
3419}
3420
3421/**
3422 * xmlParseDefaultDecl:
3423 * @ctxt: an XML parser context
3424 * @value: Receive a possible fixed default value for the attribute
3425 *
3426 * Parse an attribute default declaration
3427 *
3428 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3429 *
3430 * [ VC: Required Attribute ]
3431 * if the default declaration is the keyword #REQUIRED, then the
3432 * attribute must be specified for all elements of the type in the
3433 * attribute-list declaration.
3434 *
3435 * [ VC: Attribute Default Legal ]
3436 * The declared default value must meet the lexical constraints of
3437 * the declared attribute type c.f. xmlValidateAttributeDecl()
3438 *
3439 * [ VC: Fixed Attribute Default ]
3440 * if an attribute has a default value declared with the #FIXED
3441 * keyword, instances of that attribute must match the default value.
3442 *
3443 * [ WFC: No < in Attribute Values ]
3444 * handled in xmlParseAttValue()
3445 *
3446 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3447 * or XML_ATTRIBUTE_FIXED.
3448 */
3449
3450int
3451xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3452 int val;
3453 xmlChar *ret;
3454
3455 *value = NULL;
3456 if ((RAW == '#') && (NXT(1) == 'R') &&
3457 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3458 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3459 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3460 (NXT(8) == 'D')) {
3461 SKIP(9);
3462 return(XML_ATTRIBUTE_REQUIRED);
3463 }
3464 if ((RAW == '#') && (NXT(1) == 'I') &&
3465 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3466 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3467 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3468 SKIP(8);
3469 return(XML_ATTRIBUTE_IMPLIED);
3470 }
3471 val = XML_ATTRIBUTE_NONE;
3472 if ((RAW == '#') && (NXT(1) == 'F') &&
3473 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3474 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3475 SKIP(6);
3476 val = XML_ATTRIBUTE_FIXED;
3477 if (!IS_BLANK(CUR)) {
3478 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3480 ctxt->sax->error(ctxt->userData,
3481 "Space required after '#FIXED'\n");
3482 ctxt->wellFormed = 0;
3483 ctxt->disableSAX = 1;
3484 }
3485 SKIP_BLANKS;
3486 }
3487 ret = xmlParseAttValue(ctxt);
3488 ctxt->instate = XML_PARSER_DTD;
3489 if (ret == NULL) {
3490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3491 ctxt->sax->error(ctxt->userData,
3492 "Attribute default value declaration error\n");
3493 ctxt->wellFormed = 0;
3494 ctxt->disableSAX = 1;
3495 } else
3496 *value = ret;
3497 return(val);
3498}
3499
3500/**
3501 * xmlParseNotationType:
3502 * @ctxt: an XML parser context
3503 *
3504 * parse an Notation attribute type.
3505 *
3506 * Note: the leading 'NOTATION' S part has already being parsed...
3507 *
3508 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3509 *
3510 * [ VC: Notation Attributes ]
3511 * Values of this type must match one of the notation names included
3512 * in the declaration; all notation names in the declaration must be declared.
3513 *
3514 * Returns: the notation attribute tree built while parsing
3515 */
3516
3517xmlEnumerationPtr
3518xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3519 xmlChar *name;
3520 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3521
3522 if (RAW != '(') {
3523 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3525 ctxt->sax->error(ctxt->userData,
3526 "'(' required to start 'NOTATION'\n");
3527 ctxt->wellFormed = 0;
3528 ctxt->disableSAX = 1;
3529 return(NULL);
3530 }
3531 SHRINK;
3532 do {
3533 NEXT;
3534 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003535 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003536 if (name == NULL) {
3537 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3539 ctxt->sax->error(ctxt->userData,
3540 "Name expected in NOTATION declaration\n");
3541 ctxt->wellFormed = 0;
3542 ctxt->disableSAX = 1;
3543 return(ret);
3544 }
3545 cur = xmlCreateEnumeration(name);
3546 xmlFree(name);
3547 if (cur == NULL) return(ret);
3548 if (last == NULL) ret = last = cur;
3549 else {
3550 last->next = cur;
3551 last = cur;
3552 }
3553 SKIP_BLANKS;
3554 } while (RAW == '|');
3555 if (RAW != ')') {
3556 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3558 ctxt->sax->error(ctxt->userData,
3559 "')' required to finish NOTATION declaration\n");
3560 ctxt->wellFormed = 0;
3561 ctxt->disableSAX = 1;
3562 if ((last != NULL) && (last != ret))
3563 xmlFreeEnumeration(last);
3564 return(ret);
3565 }
3566 NEXT;
3567 return(ret);
3568}
3569
3570/**
3571 * xmlParseEnumerationType:
3572 * @ctxt: an XML parser context
3573 *
3574 * parse an Enumeration attribute type.
3575 *
3576 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3577 *
3578 * [ VC: Enumeration ]
3579 * Values of this type must match one of the Nmtoken tokens in
3580 * the declaration
3581 *
3582 * Returns: the enumeration attribute tree built while parsing
3583 */
3584
3585xmlEnumerationPtr
3586xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3587 xmlChar *name;
3588 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3589
3590 if (RAW != '(') {
3591 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3593 ctxt->sax->error(ctxt->userData,
3594 "'(' required to start ATTLIST enumeration\n");
3595 ctxt->wellFormed = 0;
3596 ctxt->disableSAX = 1;
3597 return(NULL);
3598 }
3599 SHRINK;
3600 do {
3601 NEXT;
3602 SKIP_BLANKS;
3603 name = xmlParseNmtoken(ctxt);
3604 if (name == NULL) {
3605 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3607 ctxt->sax->error(ctxt->userData,
3608 "NmToken expected in ATTLIST enumeration\n");
3609 ctxt->wellFormed = 0;
3610 ctxt->disableSAX = 1;
3611 return(ret);
3612 }
3613 cur = xmlCreateEnumeration(name);
3614 xmlFree(name);
3615 if (cur == NULL) return(ret);
3616 if (last == NULL) ret = last = cur;
3617 else {
3618 last->next = cur;
3619 last = cur;
3620 }
3621 SKIP_BLANKS;
3622 } while (RAW == '|');
3623 if (RAW != ')') {
3624 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3626 ctxt->sax->error(ctxt->userData,
3627 "')' required to finish ATTLIST enumeration\n");
3628 ctxt->wellFormed = 0;
3629 ctxt->disableSAX = 1;
3630 return(ret);
3631 }
3632 NEXT;
3633 return(ret);
3634}
3635
3636/**
3637 * xmlParseEnumeratedType:
3638 * @ctxt: an XML parser context
3639 * @tree: the enumeration tree built while parsing
3640 *
3641 * parse an Enumerated attribute type.
3642 *
3643 * [57] EnumeratedType ::= NotationType | Enumeration
3644 *
3645 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3646 *
3647 *
3648 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3649 */
3650
3651int
3652xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3653 if ((RAW == 'N') && (NXT(1) == 'O') &&
3654 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3655 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3656 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3657 SKIP(8);
3658 if (!IS_BLANK(CUR)) {
3659 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3661 ctxt->sax->error(ctxt->userData,
3662 "Space required after 'NOTATION'\n");
3663 ctxt->wellFormed = 0;
3664 ctxt->disableSAX = 1;
3665 return(0);
3666 }
3667 SKIP_BLANKS;
3668 *tree = xmlParseNotationType(ctxt);
3669 if (*tree == NULL) return(0);
3670 return(XML_ATTRIBUTE_NOTATION);
3671 }
3672 *tree = xmlParseEnumerationType(ctxt);
3673 if (*tree == NULL) return(0);
3674 return(XML_ATTRIBUTE_ENUMERATION);
3675}
3676
3677/**
3678 * xmlParseAttributeType:
3679 * @ctxt: an XML parser context
3680 * @tree: the enumeration tree built while parsing
3681 *
3682 * parse the Attribute list def for an element
3683 *
3684 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3685 *
3686 * [55] StringType ::= 'CDATA'
3687 *
3688 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3689 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3690 *
3691 * Validity constraints for attribute values syntax are checked in
3692 * xmlValidateAttributeValue()
3693 *
3694 * [ VC: ID ]
3695 * Values of type ID must match the Name production. A name must not
3696 * appear more than once in an XML document as a value of this type;
3697 * i.e., ID values must uniquely identify the elements which bear them.
3698 *
3699 * [ VC: One ID per Element Type ]
3700 * No element type may have more than one ID attribute specified.
3701 *
3702 * [ VC: ID Attribute Default ]
3703 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3704 *
3705 * [ VC: IDREF ]
3706 * Values of type IDREF must match the Name production, and values
3707 * of type IDREFS must match Names; each IDREF Name must match the value
3708 * of an ID attribute on some element in the XML document; i.e. IDREF
3709 * values must match the value of some ID attribute.
3710 *
3711 * [ VC: Entity Name ]
3712 * Values of type ENTITY must match the Name production, values
3713 * of type ENTITIES must match Names; each Entity Name must match the
3714 * name of an unparsed entity declared in the DTD.
3715 *
3716 * [ VC: Name Token ]
3717 * Values of type NMTOKEN must match the Nmtoken production; values
3718 * of type NMTOKENS must match Nmtokens.
3719 *
3720 * Returns the attribute type
3721 */
3722int
3723xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3724 SHRINK;
3725 if ((RAW == 'C') && (NXT(1) == 'D') &&
3726 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3727 (NXT(4) == 'A')) {
3728 SKIP(5);
3729 return(XML_ATTRIBUTE_CDATA);
3730 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3731 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3732 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3733 SKIP(6);
3734 return(XML_ATTRIBUTE_IDREFS);
3735 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3736 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3737 (NXT(4) == 'F')) {
3738 SKIP(5);
3739 return(XML_ATTRIBUTE_IDREF);
3740 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3741 SKIP(2);
3742 return(XML_ATTRIBUTE_ID);
3743 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3744 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3745 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3746 SKIP(6);
3747 return(XML_ATTRIBUTE_ENTITY);
3748 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3749 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3750 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3751 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3752 SKIP(8);
3753 return(XML_ATTRIBUTE_ENTITIES);
3754 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3755 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3756 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3757 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3758 SKIP(8);
3759 return(XML_ATTRIBUTE_NMTOKENS);
3760 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3761 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3762 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3763 (NXT(6) == 'N')) {
3764 SKIP(7);
3765 return(XML_ATTRIBUTE_NMTOKEN);
3766 }
3767 return(xmlParseEnumeratedType(ctxt, tree));
3768}
3769
3770/**
3771 * xmlParseAttributeListDecl:
3772 * @ctxt: an XML parser context
3773 *
3774 * : parse the Attribute list def for an element
3775 *
3776 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3777 *
3778 * [53] AttDef ::= S Name S AttType S DefaultDecl
3779 *
3780 */
3781void
3782xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3783 xmlChar *elemName;
3784 xmlChar *attrName;
3785 xmlEnumerationPtr tree;
3786
3787 if ((RAW == '<') && (NXT(1) == '!') &&
3788 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3789 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3790 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3791 (NXT(8) == 'T')) {
3792 xmlParserInputPtr input = ctxt->input;
3793
3794 SKIP(9);
3795 if (!IS_BLANK(CUR)) {
3796 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3798 ctxt->sax->error(ctxt->userData,
3799 "Space required after '<!ATTLIST'\n");
3800 ctxt->wellFormed = 0;
3801 ctxt->disableSAX = 1;
3802 }
3803 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003804 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003805 if (elemName == NULL) {
3806 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3808 ctxt->sax->error(ctxt->userData,
3809 "ATTLIST: no name for Element\n");
3810 ctxt->wellFormed = 0;
3811 ctxt->disableSAX = 1;
3812 return;
3813 }
3814 SKIP_BLANKS;
3815 GROW;
3816 while (RAW != '>') {
3817 const xmlChar *check = CUR_PTR;
3818 int type;
3819 int def;
3820 xmlChar *defaultValue = NULL;
3821
3822 GROW;
3823 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003824 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003825 if (attrName == NULL) {
3826 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3828 ctxt->sax->error(ctxt->userData,
3829 "ATTLIST: no name for Attribute\n");
3830 ctxt->wellFormed = 0;
3831 ctxt->disableSAX = 1;
3832 break;
3833 }
3834 GROW;
3835 if (!IS_BLANK(CUR)) {
3836 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3838 ctxt->sax->error(ctxt->userData,
3839 "Space required after the attribute name\n");
3840 ctxt->wellFormed = 0;
3841 ctxt->disableSAX = 1;
3842 if (attrName != NULL)
3843 xmlFree(attrName);
3844 if (defaultValue != NULL)
3845 xmlFree(defaultValue);
3846 break;
3847 }
3848 SKIP_BLANKS;
3849
3850 type = xmlParseAttributeType(ctxt, &tree);
3851 if (type <= 0) {
3852 if (attrName != NULL)
3853 xmlFree(attrName);
3854 if (defaultValue != NULL)
3855 xmlFree(defaultValue);
3856 break;
3857 }
3858
3859 GROW;
3860 if (!IS_BLANK(CUR)) {
3861 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3863 ctxt->sax->error(ctxt->userData,
3864 "Space required after the attribute type\n");
3865 ctxt->wellFormed = 0;
3866 ctxt->disableSAX = 1;
3867 if (attrName != NULL)
3868 xmlFree(attrName);
3869 if (defaultValue != NULL)
3870 xmlFree(defaultValue);
3871 if (tree != NULL)
3872 xmlFreeEnumeration(tree);
3873 break;
3874 }
3875 SKIP_BLANKS;
3876
3877 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3878 if (def <= 0) {
3879 if (attrName != NULL)
3880 xmlFree(attrName);
3881 if (defaultValue != NULL)
3882 xmlFree(defaultValue);
3883 if (tree != NULL)
3884 xmlFreeEnumeration(tree);
3885 break;
3886 }
3887
3888 GROW;
3889 if (RAW != '>') {
3890 if (!IS_BLANK(CUR)) {
3891 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3893 ctxt->sax->error(ctxt->userData,
3894 "Space required after the attribute default value\n");
3895 ctxt->wellFormed = 0;
3896 ctxt->disableSAX = 1;
3897 if (attrName != NULL)
3898 xmlFree(attrName);
3899 if (defaultValue != NULL)
3900 xmlFree(defaultValue);
3901 if (tree != NULL)
3902 xmlFreeEnumeration(tree);
3903 break;
3904 }
3905 SKIP_BLANKS;
3906 }
3907 if (check == CUR_PTR) {
3908 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3910 ctxt->sax->error(ctxt->userData,
3911 "xmlParseAttributeListDecl: detected internal error\n");
3912 if (attrName != NULL)
3913 xmlFree(attrName);
3914 if (defaultValue != NULL)
3915 xmlFree(defaultValue);
3916 if (tree != NULL)
3917 xmlFreeEnumeration(tree);
3918 break;
3919 }
3920 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3921 (ctxt->sax->attributeDecl != NULL))
3922 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3923 type, def, defaultValue, tree);
3924 if (attrName != NULL)
3925 xmlFree(attrName);
3926 if (defaultValue != NULL)
3927 xmlFree(defaultValue);
3928 GROW;
3929 }
3930 if (RAW == '>') {
3931 if (input != ctxt->input) {
3932 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3934 ctxt->sax->error(ctxt->userData,
3935"Attribute list declaration doesn't start and stop in the same entity\n");
3936 ctxt->wellFormed = 0;
3937 ctxt->disableSAX = 1;
3938 }
3939 NEXT;
3940 }
3941
3942 xmlFree(elemName);
3943 }
3944}
3945
3946/**
3947 * xmlParseElementMixedContentDecl:
3948 * @ctxt: an XML parser context
3949 *
3950 * parse the declaration for a Mixed Element content
3951 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3952 *
3953 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3954 * '(' S? '#PCDATA' S? ')'
3955 *
3956 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3957 *
3958 * [ VC: No Duplicate Types ]
3959 * The same name must not appear more than once in a single
3960 * mixed-content declaration.
3961 *
3962 * returns: the list of the xmlElementContentPtr describing the element choices
3963 */
3964xmlElementContentPtr
3965xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3966 xmlElementContentPtr ret = NULL, cur = NULL, n;
3967 xmlChar *elem = NULL;
3968
3969 GROW;
3970 if ((RAW == '#') && (NXT(1) == 'P') &&
3971 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3972 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3973 (NXT(6) == 'A')) {
3974 SKIP(7);
3975 SKIP_BLANKS;
3976 SHRINK;
3977 if (RAW == ')') {
3978 ctxt->entity = ctxt->input;
3979 NEXT;
3980 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3981 if (RAW == '*') {
3982 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3983 NEXT;
3984 }
3985 return(ret);
3986 }
3987 if ((RAW == '(') || (RAW == '|')) {
3988 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3989 if (ret == NULL) return(NULL);
3990 }
3991 while (RAW == '|') {
3992 NEXT;
3993 if (elem == NULL) {
3994 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3995 if (ret == NULL) return(NULL);
3996 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003997 if (cur != NULL)
3998 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003999 cur = ret;
4000 } else {
4001 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4002 if (n == NULL) return(NULL);
4003 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004004 if (n->c1 != NULL)
4005 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004006 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004007 if (n != NULL)
4008 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004009 cur = n;
4010 xmlFree(elem);
4011 }
4012 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004013 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004014 if (elem == NULL) {
4015 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4017 ctxt->sax->error(ctxt->userData,
4018 "xmlParseElementMixedContentDecl : Name expected\n");
4019 ctxt->wellFormed = 0;
4020 ctxt->disableSAX = 1;
4021 xmlFreeElementContent(cur);
4022 return(NULL);
4023 }
4024 SKIP_BLANKS;
4025 GROW;
4026 }
4027 if ((RAW == ')') && (NXT(1) == '*')) {
4028 if (elem != NULL) {
4029 cur->c2 = xmlNewElementContent(elem,
4030 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004031 if (cur->c2 != NULL)
4032 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004033 xmlFree(elem);
4034 }
4035 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4036 ctxt->entity = ctxt->input;
4037 SKIP(2);
4038 } else {
4039 if (elem != NULL) xmlFree(elem);
4040 xmlFreeElementContent(ret);
4041 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4042 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4043 ctxt->sax->error(ctxt->userData,
4044 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4045 ctxt->wellFormed = 0;
4046 ctxt->disableSAX = 1;
4047 return(NULL);
4048 }
4049
4050 } else {
4051 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4053 ctxt->sax->error(ctxt->userData,
4054 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4055 ctxt->wellFormed = 0;
4056 ctxt->disableSAX = 1;
4057 }
4058 return(ret);
4059}
4060
4061/**
4062 * xmlParseElementChildrenContentDecl:
4063 * @ctxt: an XML parser context
4064 *
4065 * parse the declaration for a Mixed Element content
4066 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4067 *
4068 *
4069 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4070 *
4071 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4072 *
4073 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4074 *
4075 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4076 *
4077 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4078 * TODO Parameter-entity replacement text must be properly nested
4079 * with parenthetized groups. That is to say, if either of the
4080 * opening or closing parentheses in a choice, seq, or Mixed
4081 * construct is contained in the replacement text for a parameter
4082 * entity, both must be contained in the same replacement text. For
4083 * interoperability, if a parameter-entity reference appears in a
4084 * choice, seq, or Mixed construct, its replacement text should not
4085 * be empty, and neither the first nor last non-blank character of
4086 * the replacement text should be a connector (| or ,).
4087 *
4088 * returns: the tree of xmlElementContentPtr describing the element
4089 * hierarchy.
4090 */
4091xmlElementContentPtr
4092#ifdef VMS
4093xmlParseElementChildrenContentD
4094#else
4095xmlParseElementChildrenContentDecl
4096#endif
4097(xmlParserCtxtPtr ctxt) {
4098 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4099 xmlChar *elem;
4100 xmlChar type = 0;
4101
4102 SKIP_BLANKS;
4103 GROW;
4104 if (RAW == '(') {
4105 /* Recurse on first child */
4106 NEXT;
4107 SKIP_BLANKS;
4108 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4109 SKIP_BLANKS;
4110 GROW;
4111 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004112 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004113 if (elem == NULL) {
4114 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4116 ctxt->sax->error(ctxt->userData,
4117 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4118 ctxt->wellFormed = 0;
4119 ctxt->disableSAX = 1;
4120 return(NULL);
4121 }
4122 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4123 GROW;
4124 if (RAW == '?') {
4125 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4126 NEXT;
4127 } else if (RAW == '*') {
4128 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4129 NEXT;
4130 } else if (RAW == '+') {
4131 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4132 NEXT;
4133 } else {
4134 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4135 }
4136 xmlFree(elem);
4137 GROW;
4138 }
4139 SKIP_BLANKS;
4140 SHRINK;
4141 while (RAW != ')') {
4142 /*
4143 * Each loop we parse one separator and one element.
4144 */
4145 if (RAW == ',') {
4146 if (type == 0) type = CUR;
4147
4148 /*
4149 * Detect "Name | Name , Name" error
4150 */
4151 else if (type != CUR) {
4152 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4154 ctxt->sax->error(ctxt->userData,
4155 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4156 type);
4157 ctxt->wellFormed = 0;
4158 ctxt->disableSAX = 1;
4159 if ((op != NULL) && (op != ret))
4160 xmlFreeElementContent(op);
4161 if ((last != NULL) && (last != ret) &&
4162 (last != ret->c1) && (last != ret->c2))
4163 xmlFreeElementContent(last);
4164 if (ret != NULL)
4165 xmlFreeElementContent(ret);
4166 return(NULL);
4167 }
4168 NEXT;
4169
4170 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4171 if (op == NULL) {
4172 xmlFreeElementContent(ret);
4173 return(NULL);
4174 }
4175 if (last == NULL) {
4176 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004177 if (ret != NULL)
4178 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004179 ret = cur = op;
4180 } else {
4181 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004182 if (op != NULL)
4183 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004184 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004185 if (last != NULL)
4186 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004187 cur =op;
4188 last = NULL;
4189 }
4190 } else if (RAW == '|') {
4191 if (type == 0) type = CUR;
4192
4193 /*
4194 * Detect "Name , Name | Name" error
4195 */
4196 else if (type != CUR) {
4197 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4199 ctxt->sax->error(ctxt->userData,
4200 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4201 type);
4202 ctxt->wellFormed = 0;
4203 ctxt->disableSAX = 1;
4204 if ((op != NULL) && (op != ret) && (op != last))
4205 xmlFreeElementContent(op);
4206 if ((last != NULL) && (last != ret) &&
4207 (last != ret->c1) && (last != ret->c2))
4208 xmlFreeElementContent(last);
4209 if (ret != NULL)
4210 xmlFreeElementContent(ret);
4211 return(NULL);
4212 }
4213 NEXT;
4214
4215 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4216 if (op == NULL) {
4217 if ((op != NULL) && (op != ret))
4218 xmlFreeElementContent(op);
4219 if ((last != NULL) && (last != ret) &&
4220 (last != ret->c1) && (last != ret->c2))
4221 xmlFreeElementContent(last);
4222 if (ret != NULL)
4223 xmlFreeElementContent(ret);
4224 return(NULL);
4225 }
4226 if (last == NULL) {
4227 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004228 if (ret != NULL)
4229 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004230 ret = cur = op;
4231 } else {
4232 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004233 if (op != NULL)
4234 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004235 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004236 if (last != NULL)
4237 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004238 cur =op;
4239 last = NULL;
4240 }
4241 } else {
4242 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4244 ctxt->sax->error(ctxt->userData,
4245 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4246 ctxt->wellFormed = 0;
4247 ctxt->disableSAX = 1;
4248 if ((op != NULL) && (op != ret))
4249 xmlFreeElementContent(op);
4250 if ((last != NULL) && (last != ret) &&
4251 (last != ret->c1) && (last != ret->c2))
4252 xmlFreeElementContent(last);
4253 if (ret != NULL)
4254 xmlFreeElementContent(ret);
4255 return(NULL);
4256 }
4257 GROW;
4258 SKIP_BLANKS;
4259 GROW;
4260 if (RAW == '(') {
4261 /* Recurse on second child */
4262 NEXT;
4263 SKIP_BLANKS;
4264 last = xmlParseElementChildrenContentDecl(ctxt);
4265 SKIP_BLANKS;
4266 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004267 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004268 if (elem == NULL) {
4269 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4271 ctxt->sax->error(ctxt->userData,
4272 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4273 ctxt->wellFormed = 0;
4274 ctxt->disableSAX = 1;
4275 if ((op != NULL) && (op != ret))
4276 xmlFreeElementContent(op);
4277 if ((last != NULL) && (last != ret) &&
4278 (last != ret->c1) && (last != ret->c2))
4279 xmlFreeElementContent(last);
4280 if (ret != NULL)
4281 xmlFreeElementContent(ret);
4282 return(NULL);
4283 }
4284 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4285 xmlFree(elem);
4286 if (RAW == '?') {
4287 last->ocur = XML_ELEMENT_CONTENT_OPT;
4288 NEXT;
4289 } else if (RAW == '*') {
4290 last->ocur = XML_ELEMENT_CONTENT_MULT;
4291 NEXT;
4292 } else if (RAW == '+') {
4293 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4294 NEXT;
4295 } else {
4296 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4297 }
4298 }
4299 SKIP_BLANKS;
4300 GROW;
4301 }
4302 if ((cur != NULL) && (last != NULL)) {
4303 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004304 if (last != NULL)
4305 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004306 }
4307 ctxt->entity = ctxt->input;
4308 NEXT;
4309 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004310 if (ret != NULL)
4311 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004312 NEXT;
4313 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004314 if (ret != NULL)
4315 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004316 NEXT;
4317 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004318 if (ret != NULL)
4319 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004320 NEXT;
4321 }
4322 return(ret);
4323}
4324
4325/**
4326 * xmlParseElementContentDecl:
4327 * @ctxt: an XML parser context
4328 * @name: the name of the element being defined.
4329 * @result: the Element Content pointer will be stored here if any
4330 *
4331 * parse the declaration for an Element content either Mixed or Children,
4332 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4333 *
4334 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4335 *
4336 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4337 */
4338
4339int
4340xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4341 xmlElementContentPtr *result) {
4342
4343 xmlElementContentPtr tree = NULL;
4344 xmlParserInputPtr input = ctxt->input;
4345 int res;
4346
4347 *result = NULL;
4348
4349 if (RAW != '(') {
4350 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4352 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004353 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004354 ctxt->wellFormed = 0;
4355 ctxt->disableSAX = 1;
4356 return(-1);
4357 }
4358 NEXT;
4359 GROW;
4360 SKIP_BLANKS;
4361 if ((RAW == '#') && (NXT(1) == 'P') &&
4362 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4363 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4364 (NXT(6) == 'A')) {
4365 tree = xmlParseElementMixedContentDecl(ctxt);
4366 res = XML_ELEMENT_TYPE_MIXED;
4367 } else {
4368 tree = xmlParseElementChildrenContentDecl(ctxt);
4369 res = XML_ELEMENT_TYPE_ELEMENT;
4370 }
4371 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4372 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4374 ctxt->sax->error(ctxt->userData,
4375"Element content declaration doesn't start and stop in the same entity\n");
4376 ctxt->wellFormed = 0;
4377 ctxt->disableSAX = 1;
4378 }
4379 SKIP_BLANKS;
4380 *result = tree;
4381 return(res);
4382}
4383
4384/**
4385 * xmlParseElementDecl:
4386 * @ctxt: an XML parser context
4387 *
4388 * parse an Element declaration.
4389 *
4390 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4391 *
4392 * [ VC: Unique Element Type Declaration ]
4393 * No element type may be declared more than once
4394 *
4395 * Returns the type of the element, or -1 in case of error
4396 */
4397int
4398xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4399 xmlChar *name;
4400 int ret = -1;
4401 xmlElementContentPtr content = NULL;
4402
4403 GROW;
4404 if ((RAW == '<') && (NXT(1) == '!') &&
4405 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4406 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4407 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4408 (NXT(8) == 'T')) {
4409 xmlParserInputPtr input = ctxt->input;
4410
4411 SKIP(9);
4412 if (!IS_BLANK(CUR)) {
4413 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4415 ctxt->sax->error(ctxt->userData,
4416 "Space required after 'ELEMENT'\n");
4417 ctxt->wellFormed = 0;
4418 ctxt->disableSAX = 1;
4419 }
4420 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004421 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004422 if (name == NULL) {
4423 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4425 ctxt->sax->error(ctxt->userData,
4426 "xmlParseElementDecl: no name for Element\n");
4427 ctxt->wellFormed = 0;
4428 ctxt->disableSAX = 1;
4429 return(-1);
4430 }
4431 while ((RAW == 0) && (ctxt->inputNr > 1))
4432 xmlPopInput(ctxt);
4433 if (!IS_BLANK(CUR)) {
4434 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4436 ctxt->sax->error(ctxt->userData,
4437 "Space required after the element name\n");
4438 ctxt->wellFormed = 0;
4439 ctxt->disableSAX = 1;
4440 }
4441 SKIP_BLANKS;
4442 if ((RAW == 'E') && (NXT(1) == 'M') &&
4443 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4444 (NXT(4) == 'Y')) {
4445 SKIP(5);
4446 /*
4447 * Element must always be empty.
4448 */
4449 ret = XML_ELEMENT_TYPE_EMPTY;
4450 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4451 (NXT(2) == 'Y')) {
4452 SKIP(3);
4453 /*
4454 * Element is a generic container.
4455 */
4456 ret = XML_ELEMENT_TYPE_ANY;
4457 } else if (RAW == '(') {
4458 ret = xmlParseElementContentDecl(ctxt, name, &content);
4459 } else {
4460 /*
4461 * [ WFC: PEs in Internal Subset ] error handling.
4462 */
4463 if ((RAW == '%') && (ctxt->external == 0) &&
4464 (ctxt->inputNr == 1)) {
4465 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4467 ctxt->sax->error(ctxt->userData,
4468 "PEReference: forbidden within markup decl in internal subset\n");
4469 } else {
4470 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4472 ctxt->sax->error(ctxt->userData,
4473 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4474 }
4475 ctxt->wellFormed = 0;
4476 ctxt->disableSAX = 1;
4477 if (name != NULL) xmlFree(name);
4478 return(-1);
4479 }
4480
4481 SKIP_BLANKS;
4482 /*
4483 * Pop-up of finished entities.
4484 */
4485 while ((RAW == 0) && (ctxt->inputNr > 1))
4486 xmlPopInput(ctxt);
4487 SKIP_BLANKS;
4488
4489 if (RAW != '>') {
4490 ctxt->errNo = XML_ERR_GT_REQUIRED;
4491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4492 ctxt->sax->error(ctxt->userData,
4493 "xmlParseElementDecl: expected '>' at the end\n");
4494 ctxt->wellFormed = 0;
4495 ctxt->disableSAX = 1;
4496 } else {
4497 if (input != ctxt->input) {
4498 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4500 ctxt->sax->error(ctxt->userData,
4501"Element declaration doesn't start and stop in the same entity\n");
4502 ctxt->wellFormed = 0;
4503 ctxt->disableSAX = 1;
4504 }
4505
4506 NEXT;
4507 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4508 (ctxt->sax->elementDecl != NULL))
4509 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4510 content);
4511 }
4512 if (content != NULL) {
4513 xmlFreeElementContent(content);
4514 }
4515 if (name != NULL) {
4516 xmlFree(name);
4517 }
4518 }
4519 return(ret);
4520}
4521
4522/**
4523 * xmlParseMarkupDecl:
4524 * @ctxt: an XML parser context
4525 *
4526 * parse Markup declarations
4527 *
4528 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4529 * NotationDecl | PI | Comment
4530 *
4531 * [ VC: Proper Declaration/PE Nesting ]
4532 * Parameter-entity replacement text must be properly nested with
4533 * markup declarations. That is to say, if either the first character
4534 * or the last character of a markup declaration (markupdecl above) is
4535 * contained in the replacement text for a parameter-entity reference,
4536 * both must be contained in the same replacement text.
4537 *
4538 * [ WFC: PEs in Internal Subset ]
4539 * In the internal DTD subset, parameter-entity references can occur
4540 * only where markup declarations can occur, not within markup declarations.
4541 * (This does not apply to references that occur in external parameter
4542 * entities or to the external subset.)
4543 */
4544void
4545xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4546 GROW;
4547 xmlParseElementDecl(ctxt);
4548 xmlParseAttributeListDecl(ctxt);
4549 xmlParseEntityDecl(ctxt);
4550 xmlParseNotationDecl(ctxt);
4551 xmlParsePI(ctxt);
4552 xmlParseComment(ctxt);
4553 /*
4554 * This is only for internal subset. On external entities,
4555 * the replacement is done before parsing stage
4556 */
4557 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4558 xmlParsePEReference(ctxt);
4559 ctxt->instate = XML_PARSER_DTD;
4560}
4561
4562/**
4563 * xmlParseTextDecl:
4564 * @ctxt: an XML parser context
4565 *
4566 * parse an XML declaration header for external entities
4567 *
4568 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4569 *
4570 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4571 */
4572
4573void
4574xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4575 xmlChar *version;
4576
4577 /*
4578 * We know that '<?xml' is here.
4579 */
4580 if ((RAW == '<') && (NXT(1) == '?') &&
4581 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4582 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4583 SKIP(5);
4584 } else {
4585 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4587 ctxt->sax->error(ctxt->userData,
4588 "Text declaration '<?xml' required\n");
4589 ctxt->wellFormed = 0;
4590 ctxt->disableSAX = 1;
4591
4592 return;
4593 }
4594
4595 if (!IS_BLANK(CUR)) {
4596 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4598 ctxt->sax->error(ctxt->userData,
4599 "Space needed after '<?xml'\n");
4600 ctxt->wellFormed = 0;
4601 ctxt->disableSAX = 1;
4602 }
4603 SKIP_BLANKS;
4604
4605 /*
4606 * We may have the VersionInfo here.
4607 */
4608 version = xmlParseVersionInfo(ctxt);
4609 if (version == NULL)
4610 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4611 ctxt->input->version = version;
4612
4613 /*
4614 * We must have the encoding declaration
4615 */
4616 if (!IS_BLANK(CUR)) {
4617 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4619 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4620 ctxt->wellFormed = 0;
4621 ctxt->disableSAX = 1;
4622 }
4623 xmlParseEncodingDecl(ctxt);
4624 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4625 /*
4626 * The XML REC instructs us to stop parsing right here
4627 */
4628 return;
4629 }
4630
4631 SKIP_BLANKS;
4632 if ((RAW == '?') && (NXT(1) == '>')) {
4633 SKIP(2);
4634 } else if (RAW == '>') {
4635 /* Deprecated old WD ... */
4636 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4638 ctxt->sax->error(ctxt->userData,
4639 "XML declaration must end-up with '?>'\n");
4640 ctxt->wellFormed = 0;
4641 ctxt->disableSAX = 1;
4642 NEXT;
4643 } else {
4644 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4646 ctxt->sax->error(ctxt->userData,
4647 "parsing XML declaration: '?>' expected\n");
4648 ctxt->wellFormed = 0;
4649 ctxt->disableSAX = 1;
4650 MOVETO_ENDTAG(CUR_PTR);
4651 NEXT;
4652 }
4653}
4654
4655/*
4656 * xmlParseConditionalSections
4657 * @ctxt: an XML parser context
4658 *
4659 * [61] conditionalSect ::= includeSect | ignoreSect
4660 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4661 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4662 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4663 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4664 */
4665
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004666static void
Owen Taylor3473f882001-02-23 17:55:21 +00004667xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4668 SKIP(3);
4669 SKIP_BLANKS;
4670 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4671 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4672 (NXT(6) == 'E')) {
4673 SKIP(7);
4674 SKIP_BLANKS;
4675 if (RAW != '[') {
4676 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4678 ctxt->sax->error(ctxt->userData,
4679 "XML conditional section '[' expected\n");
4680 ctxt->wellFormed = 0;
4681 ctxt->disableSAX = 1;
4682 } else {
4683 NEXT;
4684 }
4685 if (xmlParserDebugEntities) {
4686 if ((ctxt->input != NULL) && (ctxt->input->filename))
4687 xmlGenericError(xmlGenericErrorContext,
4688 "%s(%d): ", ctxt->input->filename,
4689 ctxt->input->line);
4690 xmlGenericError(xmlGenericErrorContext,
4691 "Entering INCLUDE Conditional Section\n");
4692 }
4693
4694 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4695 (NXT(2) != '>'))) {
4696 const xmlChar *check = CUR_PTR;
4697 int cons = ctxt->input->consumed;
4698 int tok = ctxt->token;
4699
4700 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4701 xmlParseConditionalSections(ctxt);
4702 } else if (IS_BLANK(CUR)) {
4703 NEXT;
4704 } else if (RAW == '%') {
4705 xmlParsePEReference(ctxt);
4706 } else
4707 xmlParseMarkupDecl(ctxt);
4708
4709 /*
4710 * Pop-up of finished entities.
4711 */
4712 while ((RAW == 0) && (ctxt->inputNr > 1))
4713 xmlPopInput(ctxt);
4714
4715 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4716 (tok == ctxt->token)) {
4717 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4719 ctxt->sax->error(ctxt->userData,
4720 "Content error in the external subset\n");
4721 ctxt->wellFormed = 0;
4722 ctxt->disableSAX = 1;
4723 break;
4724 }
4725 }
4726 if (xmlParserDebugEntities) {
4727 if ((ctxt->input != NULL) && (ctxt->input->filename))
4728 xmlGenericError(xmlGenericErrorContext,
4729 "%s(%d): ", ctxt->input->filename,
4730 ctxt->input->line);
4731 xmlGenericError(xmlGenericErrorContext,
4732 "Leaving INCLUDE Conditional Section\n");
4733 }
4734
4735 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4736 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4737 int state;
4738 int instate;
4739 int depth = 0;
4740
4741 SKIP(6);
4742 SKIP_BLANKS;
4743 if (RAW != '[') {
4744 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4746 ctxt->sax->error(ctxt->userData,
4747 "XML conditional section '[' expected\n");
4748 ctxt->wellFormed = 0;
4749 ctxt->disableSAX = 1;
4750 } else {
4751 NEXT;
4752 }
4753 if (xmlParserDebugEntities) {
4754 if ((ctxt->input != NULL) && (ctxt->input->filename))
4755 xmlGenericError(xmlGenericErrorContext,
4756 "%s(%d): ", ctxt->input->filename,
4757 ctxt->input->line);
4758 xmlGenericError(xmlGenericErrorContext,
4759 "Entering IGNORE Conditional Section\n");
4760 }
4761
4762 /*
4763 * Parse up to the end of the conditionnal section
4764 * But disable SAX event generating DTD building in the meantime
4765 */
4766 state = ctxt->disableSAX;
4767 instate = ctxt->instate;
4768 ctxt->disableSAX = 1;
4769 ctxt->instate = XML_PARSER_IGNORE;
4770
4771 while (depth >= 0) {
4772 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4773 depth++;
4774 SKIP(3);
4775 continue;
4776 }
4777 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4778 if (--depth >= 0) SKIP(3);
4779 continue;
4780 }
4781 NEXT;
4782 continue;
4783 }
4784
4785 ctxt->disableSAX = state;
4786 ctxt->instate = instate;
4787
4788 if (xmlParserDebugEntities) {
4789 if ((ctxt->input != NULL) && (ctxt->input->filename))
4790 xmlGenericError(xmlGenericErrorContext,
4791 "%s(%d): ", ctxt->input->filename,
4792 ctxt->input->line);
4793 xmlGenericError(xmlGenericErrorContext,
4794 "Leaving IGNORE Conditional Section\n");
4795 }
4796
4797 } else {
4798 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4800 ctxt->sax->error(ctxt->userData,
4801 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4802 ctxt->wellFormed = 0;
4803 ctxt->disableSAX = 1;
4804 }
4805
4806 if (RAW == 0)
4807 SHRINK;
4808
4809 if (RAW == 0) {
4810 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4812 ctxt->sax->error(ctxt->userData,
4813 "XML conditional section not closed\n");
4814 ctxt->wellFormed = 0;
4815 ctxt->disableSAX = 1;
4816 } else {
4817 SKIP(3);
4818 }
4819}
4820
4821/**
4822 * xmlParseExternalSubset:
4823 * @ctxt: an XML parser context
4824 * @ExternalID: the external identifier
4825 * @SystemID: the system identifier (or URL)
4826 *
4827 * parse Markup declarations from an external subset
4828 *
4829 * [30] extSubset ::= textDecl? extSubsetDecl
4830 *
4831 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4832 */
4833void
4834xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4835 const xmlChar *SystemID) {
4836 GROW;
4837 if ((RAW == '<') && (NXT(1) == '?') &&
4838 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4839 (NXT(4) == 'l')) {
4840 xmlParseTextDecl(ctxt);
4841 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4842 /*
4843 * The XML REC instructs us to stop parsing right here
4844 */
4845 ctxt->instate = XML_PARSER_EOF;
4846 return;
4847 }
4848 }
4849 if (ctxt->myDoc == NULL) {
4850 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4851 }
4852 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4853 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4854
4855 ctxt->instate = XML_PARSER_DTD;
4856 ctxt->external = 1;
4857 while (((RAW == '<') && (NXT(1) == '?')) ||
4858 ((RAW == '<') && (NXT(1) == '!')) ||
4859 IS_BLANK(CUR)) {
4860 const xmlChar *check = CUR_PTR;
4861 int cons = ctxt->input->consumed;
4862 int tok = ctxt->token;
4863
4864 GROW;
4865 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4866 xmlParseConditionalSections(ctxt);
4867 } else if (IS_BLANK(CUR)) {
4868 NEXT;
4869 } else if (RAW == '%') {
4870 xmlParsePEReference(ctxt);
4871 } else
4872 xmlParseMarkupDecl(ctxt);
4873
4874 /*
4875 * Pop-up of finished entities.
4876 */
4877 while ((RAW == 0) && (ctxt->inputNr > 1))
4878 xmlPopInput(ctxt);
4879
4880 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4881 (tok == ctxt->token)) {
4882 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4884 ctxt->sax->error(ctxt->userData,
4885 "Content error in the external subset\n");
4886 ctxt->wellFormed = 0;
4887 ctxt->disableSAX = 1;
4888 break;
4889 }
4890 }
4891
4892 if (RAW != 0) {
4893 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4895 ctxt->sax->error(ctxt->userData,
4896 "Extra content at the end of the document\n");
4897 ctxt->wellFormed = 0;
4898 ctxt->disableSAX = 1;
4899 }
4900
4901}
4902
4903/**
4904 * xmlParseReference:
4905 * @ctxt: an XML parser context
4906 *
4907 * parse and handle entity references in content, depending on the SAX
4908 * interface, this may end-up in a call to character() if this is a
4909 * CharRef, a predefined entity, if there is no reference() callback.
4910 * or if the parser was asked to switch to that mode.
4911 *
4912 * [67] Reference ::= EntityRef | CharRef
4913 */
4914void
4915xmlParseReference(xmlParserCtxtPtr ctxt) {
4916 xmlEntityPtr ent;
4917 xmlChar *val;
4918 if (RAW != '&') return;
4919
4920 if (NXT(1) == '#') {
4921 int i = 0;
4922 xmlChar out[10];
4923 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004924 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004925
4926 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4927 /*
4928 * So we are using non-UTF-8 buffers
4929 * Check that the char fit on 8bits, if not
4930 * generate a CharRef.
4931 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004932 if (value <= 0xFF) {
4933 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004934 out[1] = 0;
4935 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4936 (!ctxt->disableSAX))
4937 ctxt->sax->characters(ctxt->userData, out, 1);
4938 } else {
4939 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004940 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004941 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004942 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004943 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4944 (!ctxt->disableSAX))
4945 ctxt->sax->reference(ctxt->userData, out);
4946 }
4947 } else {
4948 /*
4949 * Just encode the value in UTF-8
4950 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004951 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004952 out[i] = 0;
4953 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4954 (!ctxt->disableSAX))
4955 ctxt->sax->characters(ctxt->userData, out, i);
4956 }
4957 } else {
4958 ent = xmlParseEntityRef(ctxt);
4959 if (ent == NULL) return;
4960 if ((ent->name != NULL) &&
4961 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4962 xmlNodePtr list = NULL;
4963 int ret;
4964
4965
4966 /*
4967 * The first reference to the entity trigger a parsing phase
4968 * where the ent->children is filled with the result from
4969 * the parsing.
4970 */
4971 if (ent->children == NULL) {
4972 xmlChar *value;
4973 value = ent->content;
4974
4975 /*
4976 * Check that this entity is well formed
4977 */
4978 if ((value != NULL) &&
4979 (value[1] == 0) && (value[0] == '<') &&
4980 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4981 /*
4982 * DONE: get definite answer on this !!!
4983 * Lots of entity decls are used to declare a single
4984 * char
4985 * <!ENTITY lt "<">
4986 * Which seems to be valid since
4987 * 2.4: The ampersand character (&) and the left angle
4988 * bracket (<) may appear in their literal form only
4989 * when used ... They are also legal within the literal
4990 * entity value of an internal entity declaration;i
4991 * see "4.3.2 Well-Formed Parsed Entities".
4992 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4993 * Looking at the OASIS test suite and James Clark
4994 * tests, this is broken. However the XML REC uses
4995 * it. Is the XML REC not well-formed ????
4996 * This is a hack to avoid this problem
4997 *
4998 * ANSWER: since lt gt amp .. are already defined,
4999 * this is a redefinition and hence the fact that the
5000 * contentis not well balanced is not a Wf error, this
5001 * is lousy but acceptable.
5002 */
5003 list = xmlNewDocText(ctxt->myDoc, value);
5004 if (list != NULL) {
5005 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5006 (ent->children == NULL)) {
5007 ent->children = list;
5008 ent->last = list;
5009 list->parent = (xmlNodePtr) ent;
5010 } else {
5011 xmlFreeNodeList(list);
5012 }
5013 } else if (list != NULL) {
5014 xmlFreeNodeList(list);
5015 }
5016 } else {
5017 /*
5018 * 4.3.2: An internal general parsed entity is well-formed
5019 * if its replacement text matches the production labeled
5020 * content.
5021 */
5022 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5023 ctxt->depth++;
5024 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5025 ctxt->sax, NULL, ctxt->depth,
5026 value, &list);
5027 ctxt->depth--;
5028 } else if (ent->etype ==
5029 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5030 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005031 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005032 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005033 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005034 ctxt->depth--;
5035 } else {
5036 ret = -1;
5037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5038 ctxt->sax->error(ctxt->userData,
5039 "Internal: invalid entity type\n");
5040 }
5041 if (ret == XML_ERR_ENTITY_LOOP) {
5042 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5044 ctxt->sax->error(ctxt->userData,
5045 "Detected entity reference loop\n");
5046 ctxt->wellFormed = 0;
5047 ctxt->disableSAX = 1;
5048 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005049 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5050 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005051 (ent->children == NULL)) {
5052 ent->children = list;
5053 while (list != NULL) {
5054 list->parent = (xmlNodePtr) ent;
5055 if (list->next == NULL)
5056 ent->last = list;
5057 list = list->next;
5058 }
5059 } else {
5060 xmlFreeNodeList(list);
5061 }
5062 } else if (ret > 0) {
5063 ctxt->errNo = ret;
5064 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5065 ctxt->sax->error(ctxt->userData,
5066 "Entity value required\n");
5067 ctxt->wellFormed = 0;
5068 ctxt->disableSAX = 1;
5069 } else if (list != NULL) {
5070 xmlFreeNodeList(list);
5071 }
5072 }
5073 }
5074 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5075 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5076 /*
5077 * Create a node.
5078 */
5079 ctxt->sax->reference(ctxt->userData, ent->name);
5080 return;
5081 } else if (ctxt->replaceEntities) {
5082 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5083 /*
5084 * Seems we are generating the DOM content, do
5085 * a simple tree copy
5086 */
5087 xmlNodePtr new;
5088 new = xmlCopyNodeList(ent->children);
5089
5090 xmlAddChildList(ctxt->node, new);
5091 /*
5092 * This is to avoid a nasty side effect, see
5093 * characters() in SAX.c
5094 */
5095 ctxt->nodemem = 0;
5096 ctxt->nodelen = 0;
5097 return;
5098 } else {
5099 /*
5100 * Probably running in SAX mode
5101 */
5102 xmlParserInputPtr input;
5103
5104 input = xmlNewEntityInputStream(ctxt, ent);
5105 xmlPushInput(ctxt, input);
5106 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5107 (RAW == '<') && (NXT(1) == '?') &&
5108 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5109 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5110 xmlParseTextDecl(ctxt);
5111 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5112 /*
5113 * The XML REC instructs us to stop parsing right here
5114 */
5115 ctxt->instate = XML_PARSER_EOF;
5116 return;
5117 }
5118 if (input->standalone == 1) {
5119 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5120 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5121 ctxt->sax->error(ctxt->userData,
5122 "external parsed entities cannot be standalone\n");
5123 ctxt->wellFormed = 0;
5124 ctxt->disableSAX = 1;
5125 }
5126 }
5127 return;
5128 }
5129 }
5130 } else {
5131 val = ent->content;
5132 if (val == NULL) return;
5133 /*
5134 * inline the entity.
5135 */
5136 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5137 (!ctxt->disableSAX))
5138 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5139 }
5140 }
5141}
5142
5143/**
5144 * xmlParseEntityRef:
5145 * @ctxt: an XML parser context
5146 *
5147 * parse ENTITY references declarations
5148 *
5149 * [68] EntityRef ::= '&' Name ';'
5150 *
5151 * [ WFC: Entity Declared ]
5152 * In a document without any DTD, a document with only an internal DTD
5153 * subset which contains no parameter entity references, or a document
5154 * with "standalone='yes'", the Name given in the entity reference
5155 * must match that in an entity declaration, except that well-formed
5156 * documents need not declare any of the following entities: amp, lt,
5157 * gt, apos, quot. The declaration of a parameter entity must precede
5158 * any reference to it. Similarly, the declaration of a general entity
5159 * must precede any reference to it which appears in a default value in an
5160 * attribute-list declaration. Note that if entities are declared in the
5161 * external subset or in external parameter entities, a non-validating
5162 * processor is not obligated to read and process their declarations;
5163 * for such documents, the rule that an entity must be declared is a
5164 * well-formedness constraint only if standalone='yes'.
5165 *
5166 * [ WFC: Parsed Entity ]
5167 * An entity reference must not contain the name of an unparsed entity
5168 *
5169 * Returns the xmlEntityPtr if found, or NULL otherwise.
5170 */
5171xmlEntityPtr
5172xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5173 xmlChar *name;
5174 xmlEntityPtr ent = NULL;
5175
5176 GROW;
5177
5178 if (RAW == '&') {
5179 NEXT;
5180 name = xmlParseName(ctxt);
5181 if (name == NULL) {
5182 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5184 ctxt->sax->error(ctxt->userData,
5185 "xmlParseEntityRef: no name\n");
5186 ctxt->wellFormed = 0;
5187 ctxt->disableSAX = 1;
5188 } else {
5189 if (RAW == ';') {
5190 NEXT;
5191 /*
5192 * Ask first SAX for entity resolution, otherwise try the
5193 * predefined set.
5194 */
5195 if (ctxt->sax != NULL) {
5196 if (ctxt->sax->getEntity != NULL)
5197 ent = ctxt->sax->getEntity(ctxt->userData, name);
5198 if (ent == NULL)
5199 ent = xmlGetPredefinedEntity(name);
5200 }
5201 /*
5202 * [ WFC: Entity Declared ]
5203 * In a document without any DTD, a document with only an
5204 * internal DTD subset which contains no parameter entity
5205 * references, or a document with "standalone='yes'", the
5206 * Name given in the entity reference must match that in an
5207 * entity declaration, except that well-formed documents
5208 * need not declare any of the following entities: amp, lt,
5209 * gt, apos, quot.
5210 * The declaration of a parameter entity must precede any
5211 * reference to it.
5212 * Similarly, the declaration of a general entity must
5213 * precede any reference to it which appears in a default
5214 * value in an attribute-list declaration. Note that if
5215 * entities are declared in the external subset or in
5216 * external parameter entities, a non-validating processor
5217 * is not obligated to read and process their declarations;
5218 * for such documents, the rule that an entity must be
5219 * declared is a well-formedness constraint only if
5220 * standalone='yes'.
5221 */
5222 if (ent == NULL) {
5223 if ((ctxt->standalone == 1) ||
5224 ((ctxt->hasExternalSubset == 0) &&
5225 (ctxt->hasPErefs == 0))) {
5226 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5228 ctxt->sax->error(ctxt->userData,
5229 "Entity '%s' not defined\n", name);
5230 ctxt->wellFormed = 0;
5231 ctxt->disableSAX = 1;
5232 } else {
5233 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5234 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005235 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005236 "Entity '%s' not defined\n", name);
5237 }
5238 }
5239
5240 /*
5241 * [ WFC: Parsed Entity ]
5242 * An entity reference must not contain the name of an
5243 * unparsed entity
5244 */
5245 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5246 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5248 ctxt->sax->error(ctxt->userData,
5249 "Entity reference to unparsed entity %s\n", name);
5250 ctxt->wellFormed = 0;
5251 ctxt->disableSAX = 1;
5252 }
5253
5254 /*
5255 * [ WFC: No External Entity References ]
5256 * Attribute values cannot contain direct or indirect
5257 * entity references to external entities.
5258 */
5259 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5260 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5261 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5262 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5263 ctxt->sax->error(ctxt->userData,
5264 "Attribute references external entity '%s'\n", name);
5265 ctxt->wellFormed = 0;
5266 ctxt->disableSAX = 1;
5267 }
5268 /*
5269 * [ WFC: No < in Attribute Values ]
5270 * The replacement text of any entity referred to directly or
5271 * indirectly in an attribute value (other than "&lt;") must
5272 * not contain a <.
5273 */
5274 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5275 (ent != NULL) &&
5276 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5277 (ent->content != NULL) &&
5278 (xmlStrchr(ent->content, '<'))) {
5279 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5281 ctxt->sax->error(ctxt->userData,
5282 "'<' in entity '%s' is not allowed in attributes values\n", name);
5283 ctxt->wellFormed = 0;
5284 ctxt->disableSAX = 1;
5285 }
5286
5287 /*
5288 * Internal check, no parameter entities here ...
5289 */
5290 else {
5291 switch (ent->etype) {
5292 case XML_INTERNAL_PARAMETER_ENTITY:
5293 case XML_EXTERNAL_PARAMETER_ENTITY:
5294 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5295 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5296 ctxt->sax->error(ctxt->userData,
5297 "Attempt to reference the parameter entity '%s'\n", name);
5298 ctxt->wellFormed = 0;
5299 ctxt->disableSAX = 1;
5300 break;
5301 default:
5302 break;
5303 }
5304 }
5305
5306 /*
5307 * [ WFC: No Recursion ]
5308 * A parsed entity must not contain a recursive reference
5309 * to itself, either directly or indirectly.
5310 * Done somewhere else
5311 */
5312
5313 } else {
5314 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5316 ctxt->sax->error(ctxt->userData,
5317 "xmlParseEntityRef: expecting ';'\n");
5318 ctxt->wellFormed = 0;
5319 ctxt->disableSAX = 1;
5320 }
5321 xmlFree(name);
5322 }
5323 }
5324 return(ent);
5325}
5326
5327/**
5328 * xmlParseStringEntityRef:
5329 * @ctxt: an XML parser context
5330 * @str: a pointer to an index in the string
5331 *
5332 * parse ENTITY references declarations, but this version parses it from
5333 * a string value.
5334 *
5335 * [68] EntityRef ::= '&' Name ';'
5336 *
5337 * [ WFC: Entity Declared ]
5338 * In a document without any DTD, a document with only an internal DTD
5339 * subset which contains no parameter entity references, or a document
5340 * with "standalone='yes'", the Name given in the entity reference
5341 * must match that in an entity declaration, except that well-formed
5342 * documents need not declare any of the following entities: amp, lt,
5343 * gt, apos, quot. The declaration of a parameter entity must precede
5344 * any reference to it. Similarly, the declaration of a general entity
5345 * must precede any reference to it which appears in a default value in an
5346 * attribute-list declaration. Note that if entities are declared in the
5347 * external subset or in external parameter entities, a non-validating
5348 * processor is not obligated to read and process their declarations;
5349 * for such documents, the rule that an entity must be declared is a
5350 * well-formedness constraint only if standalone='yes'.
5351 *
5352 * [ WFC: Parsed Entity ]
5353 * An entity reference must not contain the name of an unparsed entity
5354 *
5355 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5356 * is updated to the current location in the string.
5357 */
5358xmlEntityPtr
5359xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5360 xmlChar *name;
5361 const xmlChar *ptr;
5362 xmlChar cur;
5363 xmlEntityPtr ent = NULL;
5364
5365 if ((str == NULL) || (*str == NULL))
5366 return(NULL);
5367 ptr = *str;
5368 cur = *ptr;
5369 if (cur == '&') {
5370 ptr++;
5371 cur = *ptr;
5372 name = xmlParseStringName(ctxt, &ptr);
5373 if (name == NULL) {
5374 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5376 ctxt->sax->error(ctxt->userData,
5377 "xmlParseEntityRef: no name\n");
5378 ctxt->wellFormed = 0;
5379 ctxt->disableSAX = 1;
5380 } else {
5381 if (*ptr == ';') {
5382 ptr++;
5383 /*
5384 * Ask first SAX for entity resolution, otherwise try the
5385 * predefined set.
5386 */
5387 if (ctxt->sax != NULL) {
5388 if (ctxt->sax->getEntity != NULL)
5389 ent = ctxt->sax->getEntity(ctxt->userData, name);
5390 if (ent == NULL)
5391 ent = xmlGetPredefinedEntity(name);
5392 }
5393 /*
5394 * [ WFC: Entity Declared ]
5395 * In a document without any DTD, a document with only an
5396 * internal DTD subset which contains no parameter entity
5397 * references, or a document with "standalone='yes'", the
5398 * Name given in the entity reference must match that in an
5399 * entity declaration, except that well-formed documents
5400 * need not declare any of the following entities: amp, lt,
5401 * gt, apos, quot.
5402 * The declaration of a parameter entity must precede any
5403 * reference to it.
5404 * Similarly, the declaration of a general entity must
5405 * precede any reference to it which appears in a default
5406 * value in an attribute-list declaration. Note that if
5407 * entities are declared in the external subset or in
5408 * external parameter entities, a non-validating processor
5409 * is not obligated to read and process their declarations;
5410 * for such documents, the rule that an entity must be
5411 * declared is a well-formedness constraint only if
5412 * standalone='yes'.
5413 */
5414 if (ent == NULL) {
5415 if ((ctxt->standalone == 1) ||
5416 ((ctxt->hasExternalSubset == 0) &&
5417 (ctxt->hasPErefs == 0))) {
5418 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5419 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5420 ctxt->sax->error(ctxt->userData,
5421 "Entity '%s' not defined\n", name);
5422 ctxt->wellFormed = 0;
5423 ctxt->disableSAX = 1;
5424 } else {
5425 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5426 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5427 ctxt->sax->warning(ctxt->userData,
5428 "Entity '%s' not defined\n", name);
5429 }
5430 }
5431
5432 /*
5433 * [ WFC: Parsed Entity ]
5434 * An entity reference must not contain the name of an
5435 * unparsed entity
5436 */
5437 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5438 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5440 ctxt->sax->error(ctxt->userData,
5441 "Entity reference to unparsed entity %s\n", name);
5442 ctxt->wellFormed = 0;
5443 ctxt->disableSAX = 1;
5444 }
5445
5446 /*
5447 * [ WFC: No External Entity References ]
5448 * Attribute values cannot contain direct or indirect
5449 * entity references to external entities.
5450 */
5451 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5452 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5453 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5455 ctxt->sax->error(ctxt->userData,
5456 "Attribute references external entity '%s'\n", name);
5457 ctxt->wellFormed = 0;
5458 ctxt->disableSAX = 1;
5459 }
5460 /*
5461 * [ WFC: No < in Attribute Values ]
5462 * The replacement text of any entity referred to directly or
5463 * indirectly in an attribute value (other than "&lt;") must
5464 * not contain a <.
5465 */
5466 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5467 (ent != NULL) &&
5468 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5469 (ent->content != NULL) &&
5470 (xmlStrchr(ent->content, '<'))) {
5471 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5473 ctxt->sax->error(ctxt->userData,
5474 "'<' in entity '%s' is not allowed in attributes values\n", name);
5475 ctxt->wellFormed = 0;
5476 ctxt->disableSAX = 1;
5477 }
5478
5479 /*
5480 * Internal check, no parameter entities here ...
5481 */
5482 else {
5483 switch (ent->etype) {
5484 case XML_INTERNAL_PARAMETER_ENTITY:
5485 case XML_EXTERNAL_PARAMETER_ENTITY:
5486 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5488 ctxt->sax->error(ctxt->userData,
5489 "Attempt to reference the parameter entity '%s'\n", name);
5490 ctxt->wellFormed = 0;
5491 ctxt->disableSAX = 1;
5492 break;
5493 default:
5494 break;
5495 }
5496 }
5497
5498 /*
5499 * [ WFC: No Recursion ]
5500 * A parsed entity must not contain a recursive reference
5501 * to itself, either directly or indirectly.
5502 * Done somewhwere else
5503 */
5504
5505 } else {
5506 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5508 ctxt->sax->error(ctxt->userData,
5509 "xmlParseEntityRef: expecting ';'\n");
5510 ctxt->wellFormed = 0;
5511 ctxt->disableSAX = 1;
5512 }
5513 xmlFree(name);
5514 }
5515 }
5516 *str = ptr;
5517 return(ent);
5518}
5519
5520/**
5521 * xmlParsePEReference:
5522 * @ctxt: an XML parser context
5523 *
5524 * parse PEReference declarations
5525 * The entity content is handled directly by pushing it's content as
5526 * a new input stream.
5527 *
5528 * [69] PEReference ::= '%' Name ';'
5529 *
5530 * [ WFC: No Recursion ]
5531 * A parsed entity must not contain a recursive
5532 * reference to itself, either directly or indirectly.
5533 *
5534 * [ WFC: Entity Declared ]
5535 * In a document without any DTD, a document with only an internal DTD
5536 * subset which contains no parameter entity references, or a document
5537 * with "standalone='yes'", ... ... The declaration of a parameter
5538 * entity must precede any reference to it...
5539 *
5540 * [ VC: Entity Declared ]
5541 * In a document with an external subset or external parameter entities
5542 * with "standalone='no'", ... ... The declaration of a parameter entity
5543 * must precede any reference to it...
5544 *
5545 * [ WFC: In DTD ]
5546 * Parameter-entity references may only appear in the DTD.
5547 * NOTE: misleading but this is handled.
5548 */
5549void
5550xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5551 xmlChar *name;
5552 xmlEntityPtr entity = NULL;
5553 xmlParserInputPtr input;
5554
5555 if (RAW == '%') {
5556 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005557 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005558 if (name == NULL) {
5559 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5561 ctxt->sax->error(ctxt->userData,
5562 "xmlParsePEReference: no name\n");
5563 ctxt->wellFormed = 0;
5564 ctxt->disableSAX = 1;
5565 } else {
5566 if (RAW == ';') {
5567 NEXT;
5568 if ((ctxt->sax != NULL) &&
5569 (ctxt->sax->getParameterEntity != NULL))
5570 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5571 name);
5572 if (entity == NULL) {
5573 /*
5574 * [ WFC: Entity Declared ]
5575 * In a document without any DTD, a document with only an
5576 * internal DTD subset which contains no parameter entity
5577 * references, or a document with "standalone='yes'", ...
5578 * ... The declaration of a parameter entity must precede
5579 * any reference to it...
5580 */
5581 if ((ctxt->standalone == 1) ||
5582 ((ctxt->hasExternalSubset == 0) &&
5583 (ctxt->hasPErefs == 0))) {
5584 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5585 if ((!ctxt->disableSAX) &&
5586 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5587 ctxt->sax->error(ctxt->userData,
5588 "PEReference: %%%s; not found\n", name);
5589 ctxt->wellFormed = 0;
5590 ctxt->disableSAX = 1;
5591 } else {
5592 /*
5593 * [ VC: Entity Declared ]
5594 * In a document with an external subset or external
5595 * parameter entities with "standalone='no'", ...
5596 * ... The declaration of a parameter entity must precede
5597 * any reference to it...
5598 */
5599 if ((!ctxt->disableSAX) &&
5600 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5601 ctxt->sax->warning(ctxt->userData,
5602 "PEReference: %%%s; not found\n", name);
5603 ctxt->valid = 0;
5604 }
5605 } else {
5606 /*
5607 * Internal checking in case the entity quest barfed
5608 */
5609 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5610 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5611 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5612 ctxt->sax->warning(ctxt->userData,
5613 "Internal: %%%s; is not a parameter entity\n", name);
5614 } else {
5615 /*
5616 * TODO !!!
5617 * handle the extra spaces added before and after
5618 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5619 */
5620 input = xmlNewEntityInputStream(ctxt, entity);
5621 xmlPushInput(ctxt, input);
5622 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5623 (RAW == '<') && (NXT(1) == '?') &&
5624 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5625 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5626 xmlParseTextDecl(ctxt);
5627 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5628 /*
5629 * The XML REC instructs us to stop parsing
5630 * right here
5631 */
5632 ctxt->instate = XML_PARSER_EOF;
5633 xmlFree(name);
5634 return;
5635 }
5636 }
5637 if (ctxt->token == 0)
5638 ctxt->token = ' ';
5639 }
5640 }
5641 ctxt->hasPErefs = 1;
5642 } else {
5643 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5644 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5645 ctxt->sax->error(ctxt->userData,
5646 "xmlParsePEReference: expecting ';'\n");
5647 ctxt->wellFormed = 0;
5648 ctxt->disableSAX = 1;
5649 }
5650 xmlFree(name);
5651 }
5652 }
5653}
5654
5655/**
5656 * xmlParseStringPEReference:
5657 * @ctxt: an XML parser context
5658 * @str: a pointer to an index in the string
5659 *
5660 * parse PEReference declarations
5661 *
5662 * [69] PEReference ::= '%' Name ';'
5663 *
5664 * [ WFC: No Recursion ]
5665 * A parsed entity must not contain a recursive
5666 * reference to itself, either directly or indirectly.
5667 *
5668 * [ WFC: Entity Declared ]
5669 * In a document without any DTD, a document with only an internal DTD
5670 * subset which contains no parameter entity references, or a document
5671 * with "standalone='yes'", ... ... The declaration of a parameter
5672 * entity must precede any reference to it...
5673 *
5674 * [ VC: Entity Declared ]
5675 * In a document with an external subset or external parameter entities
5676 * with "standalone='no'", ... ... The declaration of a parameter entity
5677 * must precede any reference to it...
5678 *
5679 * [ WFC: In DTD ]
5680 * Parameter-entity references may only appear in the DTD.
5681 * NOTE: misleading but this is handled.
5682 *
5683 * Returns the string of the entity content.
5684 * str is updated to the current value of the index
5685 */
5686xmlEntityPtr
5687xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5688 const xmlChar *ptr;
5689 xmlChar cur;
5690 xmlChar *name;
5691 xmlEntityPtr entity = NULL;
5692
5693 if ((str == NULL) || (*str == NULL)) return(NULL);
5694 ptr = *str;
5695 cur = *ptr;
5696 if (cur == '%') {
5697 ptr++;
5698 cur = *ptr;
5699 name = xmlParseStringName(ctxt, &ptr);
5700 if (name == NULL) {
5701 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5703 ctxt->sax->error(ctxt->userData,
5704 "xmlParseStringPEReference: no name\n");
5705 ctxt->wellFormed = 0;
5706 ctxt->disableSAX = 1;
5707 } else {
5708 cur = *ptr;
5709 if (cur == ';') {
5710 ptr++;
5711 cur = *ptr;
5712 if ((ctxt->sax != NULL) &&
5713 (ctxt->sax->getParameterEntity != NULL))
5714 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5715 name);
5716 if (entity == NULL) {
5717 /*
5718 * [ WFC: Entity Declared ]
5719 * In a document without any DTD, a document with only an
5720 * internal DTD subset which contains no parameter entity
5721 * references, or a document with "standalone='yes'", ...
5722 * ... The declaration of a parameter entity must precede
5723 * any reference to it...
5724 */
5725 if ((ctxt->standalone == 1) ||
5726 ((ctxt->hasExternalSubset == 0) &&
5727 (ctxt->hasPErefs == 0))) {
5728 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5730 ctxt->sax->error(ctxt->userData,
5731 "PEReference: %%%s; not found\n", name);
5732 ctxt->wellFormed = 0;
5733 ctxt->disableSAX = 1;
5734 } else {
5735 /*
5736 * [ VC: Entity Declared ]
5737 * In a document with an external subset or external
5738 * parameter entities with "standalone='no'", ...
5739 * ... The declaration of a parameter entity must
5740 * precede any reference to it...
5741 */
5742 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5743 ctxt->sax->warning(ctxt->userData,
5744 "PEReference: %%%s; not found\n", name);
5745 ctxt->valid = 0;
5746 }
5747 } else {
5748 /*
5749 * Internal checking in case the entity quest barfed
5750 */
5751 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5752 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5753 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5754 ctxt->sax->warning(ctxt->userData,
5755 "Internal: %%%s; is not a parameter entity\n", name);
5756 }
5757 }
5758 ctxt->hasPErefs = 1;
5759 } else {
5760 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5762 ctxt->sax->error(ctxt->userData,
5763 "xmlParseStringPEReference: expecting ';'\n");
5764 ctxt->wellFormed = 0;
5765 ctxt->disableSAX = 1;
5766 }
5767 xmlFree(name);
5768 }
5769 }
5770 *str = ptr;
5771 return(entity);
5772}
5773
5774/**
5775 * xmlParseDocTypeDecl:
5776 * @ctxt: an XML parser context
5777 *
5778 * parse a DOCTYPE declaration
5779 *
5780 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5781 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5782 *
5783 * [ VC: Root Element Type ]
5784 * The Name in the document type declaration must match the element
5785 * type of the root element.
5786 */
5787
5788void
5789xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5790 xmlChar *name = NULL;
5791 xmlChar *ExternalID = NULL;
5792 xmlChar *URI = NULL;
5793
5794 /*
5795 * We know that '<!DOCTYPE' has been detected.
5796 */
5797 SKIP(9);
5798
5799 SKIP_BLANKS;
5800
5801 /*
5802 * Parse the DOCTYPE name.
5803 */
5804 name = xmlParseName(ctxt);
5805 if (name == NULL) {
5806 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5808 ctxt->sax->error(ctxt->userData,
5809 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5810 ctxt->wellFormed = 0;
5811 ctxt->disableSAX = 1;
5812 }
5813 ctxt->intSubName = name;
5814
5815 SKIP_BLANKS;
5816
5817 /*
5818 * Check for SystemID and ExternalID
5819 */
5820 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5821
5822 if ((URI != NULL) || (ExternalID != NULL)) {
5823 ctxt->hasExternalSubset = 1;
5824 }
5825 ctxt->extSubURI = URI;
5826 ctxt->extSubSystem = ExternalID;
5827
5828 SKIP_BLANKS;
5829
5830 /*
5831 * Create and update the internal subset.
5832 */
5833 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5834 (!ctxt->disableSAX))
5835 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5836
5837 /*
5838 * Is there any internal subset declarations ?
5839 * they are handled separately in xmlParseInternalSubset()
5840 */
5841 if (RAW == '[')
5842 return;
5843
5844 /*
5845 * We should be at the end of the DOCTYPE declaration.
5846 */
5847 if (RAW != '>') {
5848 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5849 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5850 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5851 ctxt->wellFormed = 0;
5852 ctxt->disableSAX = 1;
5853 }
5854 NEXT;
5855}
5856
5857/**
5858 * xmlParseInternalsubset:
5859 * @ctxt: an XML parser context
5860 *
5861 * parse the internal subset declaration
5862 *
5863 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5864 */
5865
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005866static void
Owen Taylor3473f882001-02-23 17:55:21 +00005867xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5868 /*
5869 * Is there any DTD definition ?
5870 */
5871 if (RAW == '[') {
5872 ctxt->instate = XML_PARSER_DTD;
5873 NEXT;
5874 /*
5875 * Parse the succession of Markup declarations and
5876 * PEReferences.
5877 * Subsequence (markupdecl | PEReference | S)*
5878 */
5879 while (RAW != ']') {
5880 const xmlChar *check = CUR_PTR;
5881 int cons = ctxt->input->consumed;
5882
5883 SKIP_BLANKS;
5884 xmlParseMarkupDecl(ctxt);
5885 xmlParsePEReference(ctxt);
5886
5887 /*
5888 * Pop-up of finished entities.
5889 */
5890 while ((RAW == 0) && (ctxt->inputNr > 1))
5891 xmlPopInput(ctxt);
5892
5893 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5894 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5896 ctxt->sax->error(ctxt->userData,
5897 "xmlParseInternalSubset: error detected in Markup declaration\n");
5898 ctxt->wellFormed = 0;
5899 ctxt->disableSAX = 1;
5900 break;
5901 }
5902 }
5903 if (RAW == ']') {
5904 NEXT;
5905 SKIP_BLANKS;
5906 }
5907 }
5908
5909 /*
5910 * We should be at the end of the DOCTYPE declaration.
5911 */
5912 if (RAW != '>') {
5913 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5915 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5916 ctxt->wellFormed = 0;
5917 ctxt->disableSAX = 1;
5918 }
5919 NEXT;
5920}
5921
5922/**
5923 * xmlParseAttribute:
5924 * @ctxt: an XML parser context
5925 * @value: a xmlChar ** used to store the value of the attribute
5926 *
5927 * parse an attribute
5928 *
5929 * [41] Attribute ::= Name Eq AttValue
5930 *
5931 * [ WFC: No External Entity References ]
5932 * Attribute values cannot contain direct or indirect entity references
5933 * to external entities.
5934 *
5935 * [ WFC: No < in Attribute Values ]
5936 * The replacement text of any entity referred to directly or indirectly in
5937 * an attribute value (other than "&lt;") must not contain a <.
5938 *
5939 * [ VC: Attribute Value Type ]
5940 * The attribute must have been declared; the value must be of the type
5941 * declared for it.
5942 *
5943 * [25] Eq ::= S? '=' S?
5944 *
5945 * With namespace:
5946 *
5947 * [NS 11] Attribute ::= QName Eq AttValue
5948 *
5949 * Also the case QName == xmlns:??? is handled independently as a namespace
5950 * definition.
5951 *
5952 * Returns the attribute name, and the value in *value.
5953 */
5954
5955xmlChar *
5956xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5957 xmlChar *name, *val;
5958
5959 *value = NULL;
5960 name = xmlParseName(ctxt);
5961 if (name == NULL) {
5962 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5964 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5965 ctxt->wellFormed = 0;
5966 ctxt->disableSAX = 1;
5967 return(NULL);
5968 }
5969
5970 /*
5971 * read the value
5972 */
5973 SKIP_BLANKS;
5974 if (RAW == '=') {
5975 NEXT;
5976 SKIP_BLANKS;
5977 val = xmlParseAttValue(ctxt);
5978 ctxt->instate = XML_PARSER_CONTENT;
5979 } else {
5980 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5981 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5982 ctxt->sax->error(ctxt->userData,
5983 "Specification mandate value for attribute %s\n", name);
5984 ctxt->wellFormed = 0;
5985 ctxt->disableSAX = 1;
5986 xmlFree(name);
5987 return(NULL);
5988 }
5989
5990 /*
5991 * Check that xml:lang conforms to the specification
5992 * No more registered as an error, just generate a warning now
5993 * since this was deprecated in XML second edition
5994 */
5995 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5996 if (!xmlCheckLanguageID(val)) {
5997 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5998 ctxt->sax->warning(ctxt->userData,
5999 "Malformed value for xml:lang : %s\n", val);
6000 }
6001 }
6002
6003 /*
6004 * Check that xml:space conforms to the specification
6005 */
6006 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6007 if (xmlStrEqual(val, BAD_CAST "default"))
6008 *(ctxt->space) = 0;
6009 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6010 *(ctxt->space) = 1;
6011 else {
6012 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6014 ctxt->sax->error(ctxt->userData,
6015"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6016 val);
6017 ctxt->wellFormed = 0;
6018 ctxt->disableSAX = 1;
6019 }
6020 }
6021
6022 *value = val;
6023 return(name);
6024}
6025
6026/**
6027 * xmlParseStartTag:
6028 * @ctxt: an XML parser context
6029 *
6030 * parse a start of tag either for rule element or
6031 * EmptyElement. In both case we don't parse the tag closing chars.
6032 *
6033 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6034 *
6035 * [ WFC: Unique Att Spec ]
6036 * No attribute name may appear more than once in the same start-tag or
6037 * empty-element tag.
6038 *
6039 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6040 *
6041 * [ WFC: Unique Att Spec ]
6042 * No attribute name may appear more than once in the same start-tag or
6043 * empty-element tag.
6044 *
6045 * With namespace:
6046 *
6047 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6048 *
6049 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6050 *
6051 * Returns the element name parsed
6052 */
6053
6054xmlChar *
6055xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6056 xmlChar *name;
6057 xmlChar *attname;
6058 xmlChar *attvalue;
6059 const xmlChar **atts = NULL;
6060 int nbatts = 0;
6061 int maxatts = 0;
6062 int i;
6063
6064 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006065 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006066
6067 name = xmlParseName(ctxt);
6068 if (name == NULL) {
6069 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6070 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6071 ctxt->sax->error(ctxt->userData,
6072 "xmlParseStartTag: invalid element name\n");
6073 ctxt->wellFormed = 0;
6074 ctxt->disableSAX = 1;
6075 return(NULL);
6076 }
6077
6078 /*
6079 * Now parse the attributes, it ends up with the ending
6080 *
6081 * (S Attribute)* S?
6082 */
6083 SKIP_BLANKS;
6084 GROW;
6085
Daniel Veillard21a0f912001-02-25 19:54:14 +00006086 while ((RAW != '>') &&
6087 ((RAW != '/') || (NXT(1) != '>')) &&
6088 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006089 const xmlChar *q = CUR_PTR;
6090 int cons = ctxt->input->consumed;
6091
6092 attname = xmlParseAttribute(ctxt, &attvalue);
6093 if ((attname != NULL) && (attvalue != NULL)) {
6094 /*
6095 * [ WFC: Unique Att Spec ]
6096 * No attribute name may appear more than once in the same
6097 * start-tag or empty-element tag.
6098 */
6099 for (i = 0; i < nbatts;i += 2) {
6100 if (xmlStrEqual(atts[i], attname)) {
6101 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6103 ctxt->sax->error(ctxt->userData,
6104 "Attribute %s redefined\n",
6105 attname);
6106 ctxt->wellFormed = 0;
6107 ctxt->disableSAX = 1;
6108 xmlFree(attname);
6109 xmlFree(attvalue);
6110 goto failed;
6111 }
6112 }
6113
6114 /*
6115 * Add the pair to atts
6116 */
6117 if (atts == NULL) {
6118 maxatts = 10;
6119 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6120 if (atts == NULL) {
6121 xmlGenericError(xmlGenericErrorContext,
6122 "malloc of %ld byte failed\n",
6123 maxatts * (long)sizeof(xmlChar *));
6124 return(NULL);
6125 }
6126 } else if (nbatts + 4 > maxatts) {
6127 maxatts *= 2;
6128 atts = (const xmlChar **) xmlRealloc((void *) atts,
6129 maxatts * sizeof(xmlChar *));
6130 if (atts == NULL) {
6131 xmlGenericError(xmlGenericErrorContext,
6132 "realloc of %ld byte failed\n",
6133 maxatts * (long)sizeof(xmlChar *));
6134 return(NULL);
6135 }
6136 }
6137 atts[nbatts++] = attname;
6138 atts[nbatts++] = attvalue;
6139 atts[nbatts] = NULL;
6140 atts[nbatts + 1] = NULL;
6141 } else {
6142 if (attname != NULL)
6143 xmlFree(attname);
6144 if (attvalue != NULL)
6145 xmlFree(attvalue);
6146 }
6147
6148failed:
6149
6150 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6151 break;
6152 if (!IS_BLANK(RAW)) {
6153 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6154 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6155 ctxt->sax->error(ctxt->userData,
6156 "attributes construct error\n");
6157 ctxt->wellFormed = 0;
6158 ctxt->disableSAX = 1;
6159 }
6160 SKIP_BLANKS;
6161 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6162 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6163 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6164 ctxt->sax->error(ctxt->userData,
6165 "xmlParseStartTag: problem parsing attributes\n");
6166 ctxt->wellFormed = 0;
6167 ctxt->disableSAX = 1;
6168 break;
6169 }
6170 GROW;
6171 }
6172
6173 /*
6174 * SAX: Start of Element !
6175 */
6176 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6177 (!ctxt->disableSAX))
6178 ctxt->sax->startElement(ctxt->userData, name, atts);
6179
6180 if (atts != NULL) {
6181 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6182 xmlFree((void *) atts);
6183 }
6184 return(name);
6185}
6186
6187/**
6188 * xmlParseEndTag:
6189 * @ctxt: an XML parser context
6190 *
6191 * parse an end of tag
6192 *
6193 * [42] ETag ::= '</' Name S? '>'
6194 *
6195 * With namespace
6196 *
6197 * [NS 9] ETag ::= '</' QName S? '>'
6198 */
6199
6200void
6201xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6202 xmlChar *name;
6203 xmlChar *oldname;
6204
6205 GROW;
6206 if ((RAW != '<') || (NXT(1) != '/')) {
6207 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6209 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6210 ctxt->wellFormed = 0;
6211 ctxt->disableSAX = 1;
6212 return;
6213 }
6214 SKIP(2);
6215
6216 name = xmlParseName(ctxt);
6217
6218 /*
6219 * We should definitely be at the ending "S? '>'" part
6220 */
6221 GROW;
6222 SKIP_BLANKS;
6223 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6224 ctxt->errNo = XML_ERR_GT_REQUIRED;
6225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6226 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6227 ctxt->wellFormed = 0;
6228 ctxt->disableSAX = 1;
6229 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006230 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006231
6232 /*
6233 * [ WFC: Element Type Match ]
6234 * The Name in an element's end-tag must match the element type in the
6235 * start-tag.
6236 *
6237 */
6238 if ((name == NULL) || (ctxt->name == NULL) ||
6239 (!xmlStrEqual(name, ctxt->name))) {
6240 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6242 if ((name != NULL) && (ctxt->name != NULL)) {
6243 ctxt->sax->error(ctxt->userData,
6244 "Opening and ending tag mismatch: %s and %s\n",
6245 ctxt->name, name);
6246 } else if (ctxt->name != NULL) {
6247 ctxt->sax->error(ctxt->userData,
6248 "Ending tag eror for: %s\n", ctxt->name);
6249 } else {
6250 ctxt->sax->error(ctxt->userData,
6251 "Ending tag error: internal error ???\n");
6252 }
6253
6254 }
6255 ctxt->wellFormed = 0;
6256 ctxt->disableSAX = 1;
6257 }
6258
6259 /*
6260 * SAX: End of Tag
6261 */
6262 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6263 (!ctxt->disableSAX))
6264 ctxt->sax->endElement(ctxt->userData, name);
6265
6266 if (name != NULL)
6267 xmlFree(name);
6268 oldname = namePop(ctxt);
6269 spacePop(ctxt);
6270 if (oldname != NULL) {
6271#ifdef DEBUG_STACK
6272 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6273#endif
6274 xmlFree(oldname);
6275 }
6276 return;
6277}
6278
6279/**
6280 * xmlParseCDSect:
6281 * @ctxt: an XML parser context
6282 *
6283 * Parse escaped pure raw content.
6284 *
6285 * [18] CDSect ::= CDStart CData CDEnd
6286 *
6287 * [19] CDStart ::= '<![CDATA['
6288 *
6289 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6290 *
6291 * [21] CDEnd ::= ']]>'
6292 */
6293void
6294xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6295 xmlChar *buf = NULL;
6296 int len = 0;
6297 int size = XML_PARSER_BUFFER_SIZE;
6298 int r, rl;
6299 int s, sl;
6300 int cur, l;
6301 int count = 0;
6302
6303 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6304 (NXT(2) == '[') && (NXT(3) == 'C') &&
6305 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6306 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6307 (NXT(8) == '[')) {
6308 SKIP(9);
6309 } else
6310 return;
6311
6312 ctxt->instate = XML_PARSER_CDATA_SECTION;
6313 r = CUR_CHAR(rl);
6314 if (!IS_CHAR(r)) {
6315 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6316 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6317 ctxt->sax->error(ctxt->userData,
6318 "CData section not finished\n");
6319 ctxt->wellFormed = 0;
6320 ctxt->disableSAX = 1;
6321 ctxt->instate = XML_PARSER_CONTENT;
6322 return;
6323 }
6324 NEXTL(rl);
6325 s = CUR_CHAR(sl);
6326 if (!IS_CHAR(s)) {
6327 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6329 ctxt->sax->error(ctxt->userData,
6330 "CData section not finished\n");
6331 ctxt->wellFormed = 0;
6332 ctxt->disableSAX = 1;
6333 ctxt->instate = XML_PARSER_CONTENT;
6334 return;
6335 }
6336 NEXTL(sl);
6337 cur = CUR_CHAR(l);
6338 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6339 if (buf == NULL) {
6340 xmlGenericError(xmlGenericErrorContext,
6341 "malloc of %d byte failed\n", size);
6342 return;
6343 }
6344 while (IS_CHAR(cur) &&
6345 ((r != ']') || (s != ']') || (cur != '>'))) {
6346 if (len + 5 >= size) {
6347 size *= 2;
6348 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6349 if (buf == NULL) {
6350 xmlGenericError(xmlGenericErrorContext,
6351 "realloc of %d byte failed\n", size);
6352 return;
6353 }
6354 }
6355 COPY_BUF(rl,buf,len,r);
6356 r = s;
6357 rl = sl;
6358 s = cur;
6359 sl = l;
6360 count++;
6361 if (count > 50) {
6362 GROW;
6363 count = 0;
6364 }
6365 NEXTL(l);
6366 cur = CUR_CHAR(l);
6367 }
6368 buf[len] = 0;
6369 ctxt->instate = XML_PARSER_CONTENT;
6370 if (cur != '>') {
6371 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6373 ctxt->sax->error(ctxt->userData,
6374 "CData section not finished\n%.50s\n", buf);
6375 ctxt->wellFormed = 0;
6376 ctxt->disableSAX = 1;
6377 xmlFree(buf);
6378 return;
6379 }
6380 NEXTL(l);
6381
6382 /*
6383 * Ok the buffer is to be consumed as cdata.
6384 */
6385 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6386 if (ctxt->sax->cdataBlock != NULL)
6387 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6388 }
6389 xmlFree(buf);
6390}
6391
6392/**
6393 * xmlParseContent:
6394 * @ctxt: an XML parser context
6395 *
6396 * Parse a content:
6397 *
6398 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6399 */
6400
6401void
6402xmlParseContent(xmlParserCtxtPtr ctxt) {
6403 GROW;
6404 while (((RAW != 0) || (ctxt->token != 0)) &&
6405 ((RAW != '<') || (NXT(1) != '/'))) {
6406 const xmlChar *test = CUR_PTR;
6407 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006408 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006409 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006410
6411 /*
6412 * Handle possible processed charrefs.
6413 */
6414 if (ctxt->token != 0) {
6415 xmlParseCharData(ctxt, 0);
6416 }
6417 /*
6418 * First case : a Processing Instruction.
6419 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006420 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006421 xmlParsePI(ctxt);
6422 }
6423
6424 /*
6425 * Second case : a CDSection
6426 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006427 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006428 (NXT(2) == '[') && (NXT(3) == 'C') &&
6429 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6430 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6431 (NXT(8) == '[')) {
6432 xmlParseCDSect(ctxt);
6433 }
6434
6435 /*
6436 * Third case : a comment
6437 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006438 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006439 (NXT(2) == '-') && (NXT(3) == '-')) {
6440 xmlParseComment(ctxt);
6441 ctxt->instate = XML_PARSER_CONTENT;
6442 }
6443
6444 /*
6445 * Fourth case : a sub-element.
6446 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006447 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006448 xmlParseElement(ctxt);
6449 }
6450
6451 /*
6452 * Fifth case : a reference. If if has not been resolved,
6453 * parsing returns it's Name, create the node
6454 */
6455
Daniel Veillard21a0f912001-02-25 19:54:14 +00006456 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006457 xmlParseReference(ctxt);
6458 }
6459
6460 /*
6461 * Last case, text. Note that References are handled directly.
6462 */
6463 else {
6464 xmlParseCharData(ctxt, 0);
6465 }
6466
6467 GROW;
6468 /*
6469 * Pop-up of finished entities.
6470 */
6471 while ((RAW == 0) && (ctxt->inputNr > 1))
6472 xmlPopInput(ctxt);
6473 SHRINK;
6474
6475 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6476 (tok == ctxt->token)) {
6477 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6479 ctxt->sax->error(ctxt->userData,
6480 "detected an error in element content\n");
6481 ctxt->wellFormed = 0;
6482 ctxt->disableSAX = 1;
6483 ctxt->instate = XML_PARSER_EOF;
6484 break;
6485 }
6486 }
6487}
6488
6489/**
6490 * xmlParseElement:
6491 * @ctxt: an XML parser context
6492 *
6493 * parse an XML element, this is highly recursive
6494 *
6495 * [39] element ::= EmptyElemTag | STag content ETag
6496 *
6497 * [ WFC: Element Type Match ]
6498 * The Name in an element's end-tag must match the element type in the
6499 * start-tag.
6500 *
6501 * [ VC: Element Valid ]
6502 * An element is valid if there is a declaration matching elementdecl
6503 * where the Name matches the element type and one of the following holds:
6504 * - The declaration matches EMPTY and the element has no content.
6505 * - The declaration matches children and the sequence of child elements
6506 * belongs to the language generated by the regular expression in the
6507 * content model, with optional white space (characters matching the
6508 * nonterminal S) between each pair of child elements.
6509 * - The declaration matches Mixed and the content consists of character
6510 * data and child elements whose types match names in the content model.
6511 * - The declaration matches ANY, and the types of any child elements have
6512 * been declared.
6513 */
6514
6515void
6516xmlParseElement(xmlParserCtxtPtr ctxt) {
6517 const xmlChar *openTag = CUR_PTR;
6518 xmlChar *name;
6519 xmlChar *oldname;
6520 xmlParserNodeInfo node_info;
6521 xmlNodePtr ret;
6522
6523 /* Capture start position */
6524 if (ctxt->record_info) {
6525 node_info.begin_pos = ctxt->input->consumed +
6526 (CUR_PTR - ctxt->input->base);
6527 node_info.begin_line = ctxt->input->line;
6528 }
6529
6530 if (ctxt->spaceNr == 0)
6531 spacePush(ctxt, -1);
6532 else
6533 spacePush(ctxt, *ctxt->space);
6534
6535 name = xmlParseStartTag(ctxt);
6536 if (name == NULL) {
6537 spacePop(ctxt);
6538 return;
6539 }
6540 namePush(ctxt, name);
6541 ret = ctxt->node;
6542
6543 /*
6544 * [ VC: Root Element Type ]
6545 * The Name in the document type declaration must match the element
6546 * type of the root element.
6547 */
6548 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6549 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6550 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6551
6552 /*
6553 * Check for an Empty Element.
6554 */
6555 if ((RAW == '/') && (NXT(1) == '>')) {
6556 SKIP(2);
6557 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6558 (!ctxt->disableSAX))
6559 ctxt->sax->endElement(ctxt->userData, name);
6560 oldname = namePop(ctxt);
6561 spacePop(ctxt);
6562 if (oldname != NULL) {
6563#ifdef DEBUG_STACK
6564 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6565#endif
6566 xmlFree(oldname);
6567 }
6568 if ( ret != NULL && ctxt->record_info ) {
6569 node_info.end_pos = ctxt->input->consumed +
6570 (CUR_PTR - ctxt->input->base);
6571 node_info.end_line = ctxt->input->line;
6572 node_info.node = ret;
6573 xmlParserAddNodeInfo(ctxt, &node_info);
6574 }
6575 return;
6576 }
6577 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006578 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006579 } else {
6580 ctxt->errNo = XML_ERR_GT_REQUIRED;
6581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6582 ctxt->sax->error(ctxt->userData,
6583 "Couldn't find end of Start Tag\n%.30s\n",
6584 openTag);
6585 ctxt->wellFormed = 0;
6586 ctxt->disableSAX = 1;
6587
6588 /*
6589 * end of parsing of this node.
6590 */
6591 nodePop(ctxt);
6592 oldname = namePop(ctxt);
6593 spacePop(ctxt);
6594 if (oldname != NULL) {
6595#ifdef DEBUG_STACK
6596 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6597#endif
6598 xmlFree(oldname);
6599 }
6600
6601 /*
6602 * Capture end position and add node
6603 */
6604 if ( ret != NULL && ctxt->record_info ) {
6605 node_info.end_pos = ctxt->input->consumed +
6606 (CUR_PTR - ctxt->input->base);
6607 node_info.end_line = ctxt->input->line;
6608 node_info.node = ret;
6609 xmlParserAddNodeInfo(ctxt, &node_info);
6610 }
6611 return;
6612 }
6613
6614 /*
6615 * Parse the content of the element:
6616 */
6617 xmlParseContent(ctxt);
6618 if (!IS_CHAR(RAW)) {
6619 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6621 ctxt->sax->error(ctxt->userData,
6622 "Premature end of data in tag %.30s\n", openTag);
6623 ctxt->wellFormed = 0;
6624 ctxt->disableSAX = 1;
6625
6626 /*
6627 * end of parsing of this node.
6628 */
6629 nodePop(ctxt);
6630 oldname = namePop(ctxt);
6631 spacePop(ctxt);
6632 if (oldname != NULL) {
6633#ifdef DEBUG_STACK
6634 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6635#endif
6636 xmlFree(oldname);
6637 }
6638 return;
6639 }
6640
6641 /*
6642 * parse the end of tag: '</' should be here.
6643 */
6644 xmlParseEndTag(ctxt);
6645
6646 /*
6647 * Capture end position and add node
6648 */
6649 if ( ret != NULL && ctxt->record_info ) {
6650 node_info.end_pos = ctxt->input->consumed +
6651 (CUR_PTR - ctxt->input->base);
6652 node_info.end_line = ctxt->input->line;
6653 node_info.node = ret;
6654 xmlParserAddNodeInfo(ctxt, &node_info);
6655 }
6656}
6657
6658/**
6659 * xmlParseVersionNum:
6660 * @ctxt: an XML parser context
6661 *
6662 * parse the XML version value.
6663 *
6664 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6665 *
6666 * Returns the string giving the XML version number, or NULL
6667 */
6668xmlChar *
6669xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6670 xmlChar *buf = NULL;
6671 int len = 0;
6672 int size = 10;
6673 xmlChar cur;
6674
6675 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6676 if (buf == NULL) {
6677 xmlGenericError(xmlGenericErrorContext,
6678 "malloc of %d byte failed\n", size);
6679 return(NULL);
6680 }
6681 cur = CUR;
6682 while (((cur >= 'a') && (cur <= 'z')) ||
6683 ((cur >= 'A') && (cur <= 'Z')) ||
6684 ((cur >= '0') && (cur <= '9')) ||
6685 (cur == '_') || (cur == '.') ||
6686 (cur == ':') || (cur == '-')) {
6687 if (len + 1 >= size) {
6688 size *= 2;
6689 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6690 if (buf == NULL) {
6691 xmlGenericError(xmlGenericErrorContext,
6692 "realloc of %d byte failed\n", size);
6693 return(NULL);
6694 }
6695 }
6696 buf[len++] = cur;
6697 NEXT;
6698 cur=CUR;
6699 }
6700 buf[len] = 0;
6701 return(buf);
6702}
6703
6704/**
6705 * xmlParseVersionInfo:
6706 * @ctxt: an XML parser context
6707 *
6708 * parse the XML version.
6709 *
6710 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6711 *
6712 * [25] Eq ::= S? '=' S?
6713 *
6714 * Returns the version string, e.g. "1.0"
6715 */
6716
6717xmlChar *
6718xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6719 xmlChar *version = NULL;
6720 const xmlChar *q;
6721
6722 if ((RAW == 'v') && (NXT(1) == 'e') &&
6723 (NXT(2) == 'r') && (NXT(3) == 's') &&
6724 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6725 (NXT(6) == 'n')) {
6726 SKIP(7);
6727 SKIP_BLANKS;
6728 if (RAW != '=') {
6729 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6731 ctxt->sax->error(ctxt->userData,
6732 "xmlParseVersionInfo : expected '='\n");
6733 ctxt->wellFormed = 0;
6734 ctxt->disableSAX = 1;
6735 return(NULL);
6736 }
6737 NEXT;
6738 SKIP_BLANKS;
6739 if (RAW == '"') {
6740 NEXT;
6741 q = CUR_PTR;
6742 version = xmlParseVersionNum(ctxt);
6743 if (RAW != '"') {
6744 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6746 ctxt->sax->error(ctxt->userData,
6747 "String not closed\n%.50s\n", q);
6748 ctxt->wellFormed = 0;
6749 ctxt->disableSAX = 1;
6750 } else
6751 NEXT;
6752 } else if (RAW == '\''){
6753 NEXT;
6754 q = CUR_PTR;
6755 version = xmlParseVersionNum(ctxt);
6756 if (RAW != '\'') {
6757 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6759 ctxt->sax->error(ctxt->userData,
6760 "String not closed\n%.50s\n", q);
6761 ctxt->wellFormed = 0;
6762 ctxt->disableSAX = 1;
6763 } else
6764 NEXT;
6765 } else {
6766 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6768 ctxt->sax->error(ctxt->userData,
6769 "xmlParseVersionInfo : expected ' or \"\n");
6770 ctxt->wellFormed = 0;
6771 ctxt->disableSAX = 1;
6772 }
6773 }
6774 return(version);
6775}
6776
6777/**
6778 * xmlParseEncName:
6779 * @ctxt: an XML parser context
6780 *
6781 * parse the XML encoding name
6782 *
6783 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6784 *
6785 * Returns the encoding name value or NULL
6786 */
6787xmlChar *
6788xmlParseEncName(xmlParserCtxtPtr ctxt) {
6789 xmlChar *buf = NULL;
6790 int len = 0;
6791 int size = 10;
6792 xmlChar cur;
6793
6794 cur = CUR;
6795 if (((cur >= 'a') && (cur <= 'z')) ||
6796 ((cur >= 'A') && (cur <= 'Z'))) {
6797 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6798 if (buf == NULL) {
6799 xmlGenericError(xmlGenericErrorContext,
6800 "malloc of %d byte failed\n", size);
6801 return(NULL);
6802 }
6803
6804 buf[len++] = cur;
6805 NEXT;
6806 cur = CUR;
6807 while (((cur >= 'a') && (cur <= 'z')) ||
6808 ((cur >= 'A') && (cur <= 'Z')) ||
6809 ((cur >= '0') && (cur <= '9')) ||
6810 (cur == '.') || (cur == '_') ||
6811 (cur == '-')) {
6812 if (len + 1 >= size) {
6813 size *= 2;
6814 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6815 if (buf == NULL) {
6816 xmlGenericError(xmlGenericErrorContext,
6817 "realloc of %d byte failed\n", size);
6818 return(NULL);
6819 }
6820 }
6821 buf[len++] = cur;
6822 NEXT;
6823 cur = CUR;
6824 if (cur == 0) {
6825 SHRINK;
6826 GROW;
6827 cur = CUR;
6828 }
6829 }
6830 buf[len] = 0;
6831 } else {
6832 ctxt->errNo = XML_ERR_ENCODING_NAME;
6833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6834 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6835 ctxt->wellFormed = 0;
6836 ctxt->disableSAX = 1;
6837 }
6838 return(buf);
6839}
6840
6841/**
6842 * xmlParseEncodingDecl:
6843 * @ctxt: an XML parser context
6844 *
6845 * parse the XML encoding declaration
6846 *
6847 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6848 *
6849 * this setups the conversion filters.
6850 *
6851 * Returns the encoding value or NULL
6852 */
6853
6854xmlChar *
6855xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6856 xmlChar *encoding = NULL;
6857 const xmlChar *q;
6858
6859 SKIP_BLANKS;
6860 if ((RAW == 'e') && (NXT(1) == 'n') &&
6861 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6862 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6863 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6864 SKIP(8);
6865 SKIP_BLANKS;
6866 if (RAW != '=') {
6867 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6869 ctxt->sax->error(ctxt->userData,
6870 "xmlParseEncodingDecl : expected '='\n");
6871 ctxt->wellFormed = 0;
6872 ctxt->disableSAX = 1;
6873 return(NULL);
6874 }
6875 NEXT;
6876 SKIP_BLANKS;
6877 if (RAW == '"') {
6878 NEXT;
6879 q = CUR_PTR;
6880 encoding = xmlParseEncName(ctxt);
6881 if (RAW != '"') {
6882 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6884 ctxt->sax->error(ctxt->userData,
6885 "String not closed\n%.50s\n", q);
6886 ctxt->wellFormed = 0;
6887 ctxt->disableSAX = 1;
6888 } else
6889 NEXT;
6890 } else if (RAW == '\''){
6891 NEXT;
6892 q = CUR_PTR;
6893 encoding = xmlParseEncName(ctxt);
6894 if (RAW != '\'') {
6895 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6896 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6897 ctxt->sax->error(ctxt->userData,
6898 "String not closed\n%.50s\n", q);
6899 ctxt->wellFormed = 0;
6900 ctxt->disableSAX = 1;
6901 } else
6902 NEXT;
6903 } else if (RAW == '"'){
6904 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6906 ctxt->sax->error(ctxt->userData,
6907 "xmlParseEncodingDecl : expected ' or \"\n");
6908 ctxt->wellFormed = 0;
6909 ctxt->disableSAX = 1;
6910 }
6911 if (encoding != NULL) {
6912 xmlCharEncoding enc;
6913 xmlCharEncodingHandlerPtr handler;
6914
6915 if (ctxt->input->encoding != NULL)
6916 xmlFree((xmlChar *) ctxt->input->encoding);
6917 ctxt->input->encoding = encoding;
6918
6919 enc = xmlParseCharEncoding((const char *) encoding);
6920 /*
6921 * registered set of known encodings
6922 */
6923 if (enc != XML_CHAR_ENCODING_ERROR) {
6924 xmlSwitchEncoding(ctxt, enc);
6925 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6926 xmlFree(encoding);
6927 return(NULL);
6928 }
6929 } else {
6930 /*
6931 * fallback for unknown encodings
6932 */
6933 handler = xmlFindCharEncodingHandler((const char *) encoding);
6934 if (handler != NULL) {
6935 xmlSwitchToEncoding(ctxt, handler);
6936 } else {
6937 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6939 ctxt->sax->error(ctxt->userData,
6940 "Unsupported encoding %s\n", encoding);
6941 return(NULL);
6942 }
6943 }
6944 }
6945 }
6946 return(encoding);
6947}
6948
6949/**
6950 * xmlParseSDDecl:
6951 * @ctxt: an XML parser context
6952 *
6953 * parse the XML standalone declaration
6954 *
6955 * [32] SDDecl ::= S 'standalone' Eq
6956 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6957 *
6958 * [ VC: Standalone Document Declaration ]
6959 * TODO The standalone document declaration must have the value "no"
6960 * if any external markup declarations contain declarations of:
6961 * - attributes with default values, if elements to which these
6962 * attributes apply appear in the document without specifications
6963 * of values for these attributes, or
6964 * - entities (other than amp, lt, gt, apos, quot), if references
6965 * to those entities appear in the document, or
6966 * - attributes with values subject to normalization, where the
6967 * attribute appears in the document with a value which will change
6968 * as a result of normalization, or
6969 * - element types with element content, if white space occurs directly
6970 * within any instance of those types.
6971 *
6972 * Returns 1 if standalone, 0 otherwise
6973 */
6974
6975int
6976xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6977 int standalone = -1;
6978
6979 SKIP_BLANKS;
6980 if ((RAW == 's') && (NXT(1) == 't') &&
6981 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6982 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6983 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6984 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6985 SKIP(10);
6986 SKIP_BLANKS;
6987 if (RAW != '=') {
6988 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6990 ctxt->sax->error(ctxt->userData,
6991 "XML standalone declaration : expected '='\n");
6992 ctxt->wellFormed = 0;
6993 ctxt->disableSAX = 1;
6994 return(standalone);
6995 }
6996 NEXT;
6997 SKIP_BLANKS;
6998 if (RAW == '\''){
6999 NEXT;
7000 if ((RAW == 'n') && (NXT(1) == 'o')) {
7001 standalone = 0;
7002 SKIP(2);
7003 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7004 (NXT(2) == 's')) {
7005 standalone = 1;
7006 SKIP(3);
7007 } else {
7008 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7010 ctxt->sax->error(ctxt->userData,
7011 "standalone accepts only 'yes' or 'no'\n");
7012 ctxt->wellFormed = 0;
7013 ctxt->disableSAX = 1;
7014 }
7015 if (RAW != '\'') {
7016 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7018 ctxt->sax->error(ctxt->userData, "String not closed\n");
7019 ctxt->wellFormed = 0;
7020 ctxt->disableSAX = 1;
7021 } else
7022 NEXT;
7023 } else if (RAW == '"'){
7024 NEXT;
7025 if ((RAW == 'n') && (NXT(1) == 'o')) {
7026 standalone = 0;
7027 SKIP(2);
7028 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7029 (NXT(2) == 's')) {
7030 standalone = 1;
7031 SKIP(3);
7032 } else {
7033 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7035 ctxt->sax->error(ctxt->userData,
7036 "standalone accepts only 'yes' or 'no'\n");
7037 ctxt->wellFormed = 0;
7038 ctxt->disableSAX = 1;
7039 }
7040 if (RAW != '"') {
7041 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7042 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7043 ctxt->sax->error(ctxt->userData, "String not closed\n");
7044 ctxt->wellFormed = 0;
7045 ctxt->disableSAX = 1;
7046 } else
7047 NEXT;
7048 } else {
7049 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7051 ctxt->sax->error(ctxt->userData,
7052 "Standalone value not found\n");
7053 ctxt->wellFormed = 0;
7054 ctxt->disableSAX = 1;
7055 }
7056 }
7057 return(standalone);
7058}
7059
7060/**
7061 * xmlParseXMLDecl:
7062 * @ctxt: an XML parser context
7063 *
7064 * parse an XML declaration header
7065 *
7066 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7067 */
7068
7069void
7070xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7071 xmlChar *version;
7072
7073 /*
7074 * We know that '<?xml' is here.
7075 */
7076 SKIP(5);
7077
7078 if (!IS_BLANK(RAW)) {
7079 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7081 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7082 ctxt->wellFormed = 0;
7083 ctxt->disableSAX = 1;
7084 }
7085 SKIP_BLANKS;
7086
7087 /*
7088 * We should have the VersionInfo here.
7089 */
7090 version = xmlParseVersionInfo(ctxt);
7091 if (version == NULL)
7092 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7093 ctxt->version = xmlStrdup(version);
7094 xmlFree(version);
7095
7096 /*
7097 * We may have the encoding declaration
7098 */
7099 if (!IS_BLANK(RAW)) {
7100 if ((RAW == '?') && (NXT(1) == '>')) {
7101 SKIP(2);
7102 return;
7103 }
7104 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7105 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7106 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7107 ctxt->wellFormed = 0;
7108 ctxt->disableSAX = 1;
7109 }
7110 xmlParseEncodingDecl(ctxt);
7111 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7112 /*
7113 * The XML REC instructs us to stop parsing right here
7114 */
7115 return;
7116 }
7117
7118 /*
7119 * We may have the standalone status.
7120 */
7121 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7122 if ((RAW == '?') && (NXT(1) == '>')) {
7123 SKIP(2);
7124 return;
7125 }
7126 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7128 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7129 ctxt->wellFormed = 0;
7130 ctxt->disableSAX = 1;
7131 }
7132 SKIP_BLANKS;
7133 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7134
7135 SKIP_BLANKS;
7136 if ((RAW == '?') && (NXT(1) == '>')) {
7137 SKIP(2);
7138 } else if (RAW == '>') {
7139 /* Deprecated old WD ... */
7140 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7142 ctxt->sax->error(ctxt->userData,
7143 "XML declaration must end-up with '?>'\n");
7144 ctxt->wellFormed = 0;
7145 ctxt->disableSAX = 1;
7146 NEXT;
7147 } else {
7148 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7150 ctxt->sax->error(ctxt->userData,
7151 "parsing XML declaration: '?>' expected\n");
7152 ctxt->wellFormed = 0;
7153 ctxt->disableSAX = 1;
7154 MOVETO_ENDTAG(CUR_PTR);
7155 NEXT;
7156 }
7157}
7158
7159/**
7160 * xmlParseMisc:
7161 * @ctxt: an XML parser context
7162 *
7163 * parse an XML Misc* optionnal field.
7164 *
7165 * [27] Misc ::= Comment | PI | S
7166 */
7167
7168void
7169xmlParseMisc(xmlParserCtxtPtr ctxt) {
7170 while (((RAW == '<') && (NXT(1) == '?')) ||
7171 ((RAW == '<') && (NXT(1) == '!') &&
7172 (NXT(2) == '-') && (NXT(3) == '-')) ||
7173 IS_BLANK(CUR)) {
7174 if ((RAW == '<') && (NXT(1) == '?')) {
7175 xmlParsePI(ctxt);
7176 } else if (IS_BLANK(CUR)) {
7177 NEXT;
7178 } else
7179 xmlParseComment(ctxt);
7180 }
7181}
7182
7183/**
7184 * xmlParseDocument:
7185 * @ctxt: an XML parser context
7186 *
7187 * parse an XML document (and build a tree if using the standard SAX
7188 * interface).
7189 *
7190 * [1] document ::= prolog element Misc*
7191 *
7192 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7193 *
7194 * Returns 0, -1 in case of error. the parser context is augmented
7195 * as a result of the parsing.
7196 */
7197
7198int
7199xmlParseDocument(xmlParserCtxtPtr ctxt) {
7200 xmlChar start[4];
7201 xmlCharEncoding enc;
7202
7203 xmlInitParser();
7204
7205 GROW;
7206
7207 /*
7208 * SAX: beginning of the document processing.
7209 */
7210 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7211 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7212
7213 /*
7214 * Get the 4 first bytes and decode the charset
7215 * if enc != XML_CHAR_ENCODING_NONE
7216 * plug some encoding conversion routines.
7217 */
7218 start[0] = RAW;
7219 start[1] = NXT(1);
7220 start[2] = NXT(2);
7221 start[3] = NXT(3);
7222 enc = xmlDetectCharEncoding(start, 4);
7223 if (enc != XML_CHAR_ENCODING_NONE) {
7224 xmlSwitchEncoding(ctxt, enc);
7225 }
7226
7227
7228 if (CUR == 0) {
7229 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7231 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7232 ctxt->wellFormed = 0;
7233 ctxt->disableSAX = 1;
7234 }
7235
7236 /*
7237 * Check for the XMLDecl in the Prolog.
7238 */
7239 GROW;
7240 if ((RAW == '<') && (NXT(1) == '?') &&
7241 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7242 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7243
7244 /*
7245 * Note that we will switch encoding on the fly.
7246 */
7247 xmlParseXMLDecl(ctxt);
7248 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7249 /*
7250 * The XML REC instructs us to stop parsing right here
7251 */
7252 return(-1);
7253 }
7254 ctxt->standalone = ctxt->input->standalone;
7255 SKIP_BLANKS;
7256 } else {
7257 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7258 }
7259 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7260 ctxt->sax->startDocument(ctxt->userData);
7261
7262 /*
7263 * The Misc part of the Prolog
7264 */
7265 GROW;
7266 xmlParseMisc(ctxt);
7267
7268 /*
7269 * Then possibly doc type declaration(s) and more Misc
7270 * (doctypedecl Misc*)?
7271 */
7272 GROW;
7273 if ((RAW == '<') && (NXT(1) == '!') &&
7274 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7275 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7276 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7277 (NXT(8) == 'E')) {
7278
7279 ctxt->inSubset = 1;
7280 xmlParseDocTypeDecl(ctxt);
7281 if (RAW == '[') {
7282 ctxt->instate = XML_PARSER_DTD;
7283 xmlParseInternalSubset(ctxt);
7284 }
7285
7286 /*
7287 * Create and update the external subset.
7288 */
7289 ctxt->inSubset = 2;
7290 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7291 (!ctxt->disableSAX))
7292 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7293 ctxt->extSubSystem, ctxt->extSubURI);
7294 ctxt->inSubset = 0;
7295
7296
7297 ctxt->instate = XML_PARSER_PROLOG;
7298 xmlParseMisc(ctxt);
7299 }
7300
7301 /*
7302 * Time to start parsing the tree itself
7303 */
7304 GROW;
7305 if (RAW != '<') {
7306 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7307 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7308 ctxt->sax->error(ctxt->userData,
7309 "Start tag expected, '<' not found\n");
7310 ctxt->wellFormed = 0;
7311 ctxt->disableSAX = 1;
7312 ctxt->instate = XML_PARSER_EOF;
7313 } else {
7314 ctxt->instate = XML_PARSER_CONTENT;
7315 xmlParseElement(ctxt);
7316 ctxt->instate = XML_PARSER_EPILOG;
7317
7318
7319 /*
7320 * The Misc part at the end
7321 */
7322 xmlParseMisc(ctxt);
7323
7324 if (RAW != 0) {
7325 ctxt->errNo = XML_ERR_DOCUMENT_END;
7326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7327 ctxt->sax->error(ctxt->userData,
7328 "Extra content at the end of the document\n");
7329 ctxt->wellFormed = 0;
7330 ctxt->disableSAX = 1;
7331 }
7332 ctxt->instate = XML_PARSER_EOF;
7333 }
7334
7335 /*
7336 * SAX: end of the document processing.
7337 */
7338 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7339 (!ctxt->disableSAX))
7340 ctxt->sax->endDocument(ctxt->userData);
7341
7342 if (! ctxt->wellFormed) return(-1);
7343 return(0);
7344}
7345
7346/**
7347 * xmlParseExtParsedEnt:
7348 * @ctxt: an XML parser context
7349 *
7350 * parse a genreral parsed entity
7351 * An external general parsed entity is well-formed if it matches the
7352 * production labeled extParsedEnt.
7353 *
7354 * [78] extParsedEnt ::= TextDecl? content
7355 *
7356 * Returns 0, -1 in case of error. the parser context is augmented
7357 * as a result of the parsing.
7358 */
7359
7360int
7361xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7362 xmlChar start[4];
7363 xmlCharEncoding enc;
7364
7365 xmlDefaultSAXHandlerInit();
7366
7367 GROW;
7368
7369 /*
7370 * SAX: beginning of the document processing.
7371 */
7372 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7373 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7374
7375 /*
7376 * Get the 4 first bytes and decode the charset
7377 * if enc != XML_CHAR_ENCODING_NONE
7378 * plug some encoding conversion routines.
7379 */
7380 start[0] = RAW;
7381 start[1] = NXT(1);
7382 start[2] = NXT(2);
7383 start[3] = NXT(3);
7384 enc = xmlDetectCharEncoding(start, 4);
7385 if (enc != XML_CHAR_ENCODING_NONE) {
7386 xmlSwitchEncoding(ctxt, enc);
7387 }
7388
7389
7390 if (CUR == 0) {
7391 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7393 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7394 ctxt->wellFormed = 0;
7395 ctxt->disableSAX = 1;
7396 }
7397
7398 /*
7399 * Check for the XMLDecl in the Prolog.
7400 */
7401 GROW;
7402 if ((RAW == '<') && (NXT(1) == '?') &&
7403 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7404 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7405
7406 /*
7407 * Note that we will switch encoding on the fly.
7408 */
7409 xmlParseXMLDecl(ctxt);
7410 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7411 /*
7412 * The XML REC instructs us to stop parsing right here
7413 */
7414 return(-1);
7415 }
7416 SKIP_BLANKS;
7417 } else {
7418 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7419 }
7420 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7421 ctxt->sax->startDocument(ctxt->userData);
7422
7423 /*
7424 * Doing validity checking on chunk doesn't make sense
7425 */
7426 ctxt->instate = XML_PARSER_CONTENT;
7427 ctxt->validate = 0;
7428 ctxt->loadsubset = 0;
7429 ctxt->depth = 0;
7430
7431 xmlParseContent(ctxt);
7432
7433 if ((RAW == '<') && (NXT(1) == '/')) {
7434 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7436 ctxt->sax->error(ctxt->userData,
7437 "chunk is not well balanced\n");
7438 ctxt->wellFormed = 0;
7439 ctxt->disableSAX = 1;
7440 } else if (RAW != 0) {
7441 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7443 ctxt->sax->error(ctxt->userData,
7444 "extra content at the end of well balanced chunk\n");
7445 ctxt->wellFormed = 0;
7446 ctxt->disableSAX = 1;
7447 }
7448
7449 /*
7450 * SAX: end of the document processing.
7451 */
7452 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7453 (!ctxt->disableSAX))
7454 ctxt->sax->endDocument(ctxt->userData);
7455
7456 if (! ctxt->wellFormed) return(-1);
7457 return(0);
7458}
7459
7460/************************************************************************
7461 * *
7462 * Progressive parsing interfaces *
7463 * *
7464 ************************************************************************/
7465
7466/**
7467 * xmlParseLookupSequence:
7468 * @ctxt: an XML parser context
7469 * @first: the first char to lookup
7470 * @next: the next char to lookup or zero
7471 * @third: the next char to lookup or zero
7472 *
7473 * Try to find if a sequence (first, next, third) or just (first next) or
7474 * (first) is available in the input stream.
7475 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7476 * to avoid rescanning sequences of bytes, it DOES change the state of the
7477 * parser, do not use liberally.
7478 *
7479 * Returns the index to the current parsing point if the full sequence
7480 * is available, -1 otherwise.
7481 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007482static int
Owen Taylor3473f882001-02-23 17:55:21 +00007483xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7484 xmlChar next, xmlChar third) {
7485 int base, len;
7486 xmlParserInputPtr in;
7487 const xmlChar *buf;
7488
7489 in = ctxt->input;
7490 if (in == NULL) return(-1);
7491 base = in->cur - in->base;
7492 if (base < 0) return(-1);
7493 if (ctxt->checkIndex > base)
7494 base = ctxt->checkIndex;
7495 if (in->buf == NULL) {
7496 buf = in->base;
7497 len = in->length;
7498 } else {
7499 buf = in->buf->buffer->content;
7500 len = in->buf->buffer->use;
7501 }
7502 /* take into account the sequence length */
7503 if (third) len -= 2;
7504 else if (next) len --;
7505 for (;base < len;base++) {
7506 if (buf[base] == first) {
7507 if (third != 0) {
7508 if ((buf[base + 1] != next) ||
7509 (buf[base + 2] != third)) continue;
7510 } else if (next != 0) {
7511 if (buf[base + 1] != next) continue;
7512 }
7513 ctxt->checkIndex = 0;
7514#ifdef DEBUG_PUSH
7515 if (next == 0)
7516 xmlGenericError(xmlGenericErrorContext,
7517 "PP: lookup '%c' found at %d\n",
7518 first, base);
7519 else if (third == 0)
7520 xmlGenericError(xmlGenericErrorContext,
7521 "PP: lookup '%c%c' found at %d\n",
7522 first, next, base);
7523 else
7524 xmlGenericError(xmlGenericErrorContext,
7525 "PP: lookup '%c%c%c' found at %d\n",
7526 first, next, third, base);
7527#endif
7528 return(base - (in->cur - in->base));
7529 }
7530 }
7531 ctxt->checkIndex = base;
7532#ifdef DEBUG_PUSH
7533 if (next == 0)
7534 xmlGenericError(xmlGenericErrorContext,
7535 "PP: lookup '%c' failed\n", first);
7536 else if (third == 0)
7537 xmlGenericError(xmlGenericErrorContext,
7538 "PP: lookup '%c%c' failed\n", first, next);
7539 else
7540 xmlGenericError(xmlGenericErrorContext,
7541 "PP: lookup '%c%c%c' failed\n", first, next, third);
7542#endif
7543 return(-1);
7544}
7545
7546/**
7547 * xmlParseTryOrFinish:
7548 * @ctxt: an XML parser context
7549 * @terminate: last chunk indicator
7550 *
7551 * Try to progress on parsing
7552 *
7553 * Returns zero if no parsing was possible
7554 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007555static int
Owen Taylor3473f882001-02-23 17:55:21 +00007556xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7557 int ret = 0;
7558 int avail;
7559 xmlChar cur, next;
7560
7561#ifdef DEBUG_PUSH
7562 switch (ctxt->instate) {
7563 case XML_PARSER_EOF:
7564 xmlGenericError(xmlGenericErrorContext,
7565 "PP: try EOF\n"); break;
7566 case XML_PARSER_START:
7567 xmlGenericError(xmlGenericErrorContext,
7568 "PP: try START\n"); break;
7569 case XML_PARSER_MISC:
7570 xmlGenericError(xmlGenericErrorContext,
7571 "PP: try MISC\n");break;
7572 case XML_PARSER_COMMENT:
7573 xmlGenericError(xmlGenericErrorContext,
7574 "PP: try COMMENT\n");break;
7575 case XML_PARSER_PROLOG:
7576 xmlGenericError(xmlGenericErrorContext,
7577 "PP: try PROLOG\n");break;
7578 case XML_PARSER_START_TAG:
7579 xmlGenericError(xmlGenericErrorContext,
7580 "PP: try START_TAG\n");break;
7581 case XML_PARSER_CONTENT:
7582 xmlGenericError(xmlGenericErrorContext,
7583 "PP: try CONTENT\n");break;
7584 case XML_PARSER_CDATA_SECTION:
7585 xmlGenericError(xmlGenericErrorContext,
7586 "PP: try CDATA_SECTION\n");break;
7587 case XML_PARSER_END_TAG:
7588 xmlGenericError(xmlGenericErrorContext,
7589 "PP: try END_TAG\n");break;
7590 case XML_PARSER_ENTITY_DECL:
7591 xmlGenericError(xmlGenericErrorContext,
7592 "PP: try ENTITY_DECL\n");break;
7593 case XML_PARSER_ENTITY_VALUE:
7594 xmlGenericError(xmlGenericErrorContext,
7595 "PP: try ENTITY_VALUE\n");break;
7596 case XML_PARSER_ATTRIBUTE_VALUE:
7597 xmlGenericError(xmlGenericErrorContext,
7598 "PP: try ATTRIBUTE_VALUE\n");break;
7599 case XML_PARSER_DTD:
7600 xmlGenericError(xmlGenericErrorContext,
7601 "PP: try DTD\n");break;
7602 case XML_PARSER_EPILOG:
7603 xmlGenericError(xmlGenericErrorContext,
7604 "PP: try EPILOG\n");break;
7605 case XML_PARSER_PI:
7606 xmlGenericError(xmlGenericErrorContext,
7607 "PP: try PI\n");break;
7608 case XML_PARSER_IGNORE:
7609 xmlGenericError(xmlGenericErrorContext,
7610 "PP: try IGNORE\n");break;
7611 }
7612#endif
7613
7614 while (1) {
7615 /*
7616 * Pop-up of finished entities.
7617 */
7618 while ((RAW == 0) && (ctxt->inputNr > 1))
7619 xmlPopInput(ctxt);
7620
7621 if (ctxt->input ==NULL) break;
7622 if (ctxt->input->buf == NULL)
7623 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7624 else
7625 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7626 if (avail < 1)
7627 goto done;
7628 switch (ctxt->instate) {
7629 case XML_PARSER_EOF:
7630 /*
7631 * Document parsing is done !
7632 */
7633 goto done;
7634 case XML_PARSER_START:
7635 /*
7636 * Very first chars read from the document flow.
7637 */
Owen Taylor3473f882001-02-23 17:55:21 +00007638 if (avail < 2)
7639 goto done;
7640
7641 cur = ctxt->input->cur[0];
7642 next = ctxt->input->cur[1];
7643 if (cur == 0) {
7644 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7645 ctxt->sax->setDocumentLocator(ctxt->userData,
7646 &xmlDefaultSAXLocator);
7647 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7649 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7650 ctxt->wellFormed = 0;
7651 ctxt->disableSAX = 1;
7652 ctxt->instate = XML_PARSER_EOF;
7653#ifdef DEBUG_PUSH
7654 xmlGenericError(xmlGenericErrorContext,
7655 "PP: entering EOF\n");
7656#endif
7657 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7658 ctxt->sax->endDocument(ctxt->userData);
7659 goto done;
7660 }
7661 if ((cur == '<') && (next == '?')) {
7662 /* PI or XML decl */
7663 if (avail < 5) return(ret);
7664 if ((!terminate) &&
7665 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7666 return(ret);
7667 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7668 ctxt->sax->setDocumentLocator(ctxt->userData,
7669 &xmlDefaultSAXLocator);
7670 if ((ctxt->input->cur[2] == 'x') &&
7671 (ctxt->input->cur[3] == 'm') &&
7672 (ctxt->input->cur[4] == 'l') &&
7673 (IS_BLANK(ctxt->input->cur[5]))) {
7674 ret += 5;
7675#ifdef DEBUG_PUSH
7676 xmlGenericError(xmlGenericErrorContext,
7677 "PP: Parsing XML Decl\n");
7678#endif
7679 xmlParseXMLDecl(ctxt);
7680 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7681 /*
7682 * The XML REC instructs us to stop parsing right
7683 * here
7684 */
7685 ctxt->instate = XML_PARSER_EOF;
7686 return(0);
7687 }
7688 ctxt->standalone = ctxt->input->standalone;
7689 if ((ctxt->encoding == NULL) &&
7690 (ctxt->input->encoding != NULL))
7691 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7692 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7693 (!ctxt->disableSAX))
7694 ctxt->sax->startDocument(ctxt->userData);
7695 ctxt->instate = XML_PARSER_MISC;
7696#ifdef DEBUG_PUSH
7697 xmlGenericError(xmlGenericErrorContext,
7698 "PP: entering MISC\n");
7699#endif
7700 } else {
7701 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7702 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7703 (!ctxt->disableSAX))
7704 ctxt->sax->startDocument(ctxt->userData);
7705 ctxt->instate = XML_PARSER_MISC;
7706#ifdef DEBUG_PUSH
7707 xmlGenericError(xmlGenericErrorContext,
7708 "PP: entering MISC\n");
7709#endif
7710 }
7711 } else {
7712 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7713 ctxt->sax->setDocumentLocator(ctxt->userData,
7714 &xmlDefaultSAXLocator);
7715 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7716 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7717 (!ctxt->disableSAX))
7718 ctxt->sax->startDocument(ctxt->userData);
7719 ctxt->instate = XML_PARSER_MISC;
7720#ifdef DEBUG_PUSH
7721 xmlGenericError(xmlGenericErrorContext,
7722 "PP: entering MISC\n");
7723#endif
7724 }
7725 break;
7726 case XML_PARSER_MISC:
7727 SKIP_BLANKS;
7728 if (ctxt->input->buf == NULL)
7729 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7730 else
7731 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7732 if (avail < 2)
7733 goto done;
7734 cur = ctxt->input->cur[0];
7735 next = ctxt->input->cur[1];
7736 if ((cur == '<') && (next == '?')) {
7737 if ((!terminate) &&
7738 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7739 goto done;
7740#ifdef DEBUG_PUSH
7741 xmlGenericError(xmlGenericErrorContext,
7742 "PP: Parsing PI\n");
7743#endif
7744 xmlParsePI(ctxt);
7745 } else if ((cur == '<') && (next == '!') &&
7746 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7747 if ((!terminate) &&
7748 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7749 goto done;
7750#ifdef DEBUG_PUSH
7751 xmlGenericError(xmlGenericErrorContext,
7752 "PP: Parsing Comment\n");
7753#endif
7754 xmlParseComment(ctxt);
7755 ctxt->instate = XML_PARSER_MISC;
7756 } else if ((cur == '<') && (next == '!') &&
7757 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7758 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7759 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7760 (ctxt->input->cur[8] == 'E')) {
7761 if ((!terminate) &&
7762 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7763 goto done;
7764#ifdef DEBUG_PUSH
7765 xmlGenericError(xmlGenericErrorContext,
7766 "PP: Parsing internal subset\n");
7767#endif
7768 ctxt->inSubset = 1;
7769 xmlParseDocTypeDecl(ctxt);
7770 if (RAW == '[') {
7771 ctxt->instate = XML_PARSER_DTD;
7772#ifdef DEBUG_PUSH
7773 xmlGenericError(xmlGenericErrorContext,
7774 "PP: entering DTD\n");
7775#endif
7776 } else {
7777 /*
7778 * Create and update the external subset.
7779 */
7780 ctxt->inSubset = 2;
7781 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7782 (ctxt->sax->externalSubset != NULL))
7783 ctxt->sax->externalSubset(ctxt->userData,
7784 ctxt->intSubName, ctxt->extSubSystem,
7785 ctxt->extSubURI);
7786 ctxt->inSubset = 0;
7787 ctxt->instate = XML_PARSER_PROLOG;
7788#ifdef DEBUG_PUSH
7789 xmlGenericError(xmlGenericErrorContext,
7790 "PP: entering PROLOG\n");
7791#endif
7792 }
7793 } else if ((cur == '<') && (next == '!') &&
7794 (avail < 9)) {
7795 goto done;
7796 } else {
7797 ctxt->instate = XML_PARSER_START_TAG;
7798#ifdef DEBUG_PUSH
7799 xmlGenericError(xmlGenericErrorContext,
7800 "PP: entering START_TAG\n");
7801#endif
7802 }
7803 break;
7804 case XML_PARSER_IGNORE:
7805 xmlGenericError(xmlGenericErrorContext,
7806 "PP: internal error, state == IGNORE");
7807 ctxt->instate = XML_PARSER_DTD;
7808#ifdef DEBUG_PUSH
7809 xmlGenericError(xmlGenericErrorContext,
7810 "PP: entering DTD\n");
7811#endif
7812 break;
7813 case XML_PARSER_PROLOG:
7814 SKIP_BLANKS;
7815 if (ctxt->input->buf == NULL)
7816 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7817 else
7818 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7819 if (avail < 2)
7820 goto done;
7821 cur = ctxt->input->cur[0];
7822 next = ctxt->input->cur[1];
7823 if ((cur == '<') && (next == '?')) {
7824 if ((!terminate) &&
7825 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7826 goto done;
7827#ifdef DEBUG_PUSH
7828 xmlGenericError(xmlGenericErrorContext,
7829 "PP: Parsing PI\n");
7830#endif
7831 xmlParsePI(ctxt);
7832 } else if ((cur == '<') && (next == '!') &&
7833 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7834 if ((!terminate) &&
7835 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7836 goto done;
7837#ifdef DEBUG_PUSH
7838 xmlGenericError(xmlGenericErrorContext,
7839 "PP: Parsing Comment\n");
7840#endif
7841 xmlParseComment(ctxt);
7842 ctxt->instate = XML_PARSER_PROLOG;
7843 } else if ((cur == '<') && (next == '!') &&
7844 (avail < 4)) {
7845 goto done;
7846 } else {
7847 ctxt->instate = XML_PARSER_START_TAG;
7848#ifdef DEBUG_PUSH
7849 xmlGenericError(xmlGenericErrorContext,
7850 "PP: entering START_TAG\n");
7851#endif
7852 }
7853 break;
7854 case XML_PARSER_EPILOG:
7855 SKIP_BLANKS;
7856 if (ctxt->input->buf == NULL)
7857 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7858 else
7859 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7860 if (avail < 2)
7861 goto done;
7862 cur = ctxt->input->cur[0];
7863 next = ctxt->input->cur[1];
7864 if ((cur == '<') && (next == '?')) {
7865 if ((!terminate) &&
7866 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7867 goto done;
7868#ifdef DEBUG_PUSH
7869 xmlGenericError(xmlGenericErrorContext,
7870 "PP: Parsing PI\n");
7871#endif
7872 xmlParsePI(ctxt);
7873 ctxt->instate = XML_PARSER_EPILOG;
7874 } else if ((cur == '<') && (next == '!') &&
7875 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7876 if ((!terminate) &&
7877 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7878 goto done;
7879#ifdef DEBUG_PUSH
7880 xmlGenericError(xmlGenericErrorContext,
7881 "PP: Parsing Comment\n");
7882#endif
7883 xmlParseComment(ctxt);
7884 ctxt->instate = XML_PARSER_EPILOG;
7885 } else if ((cur == '<') && (next == '!') &&
7886 (avail < 4)) {
7887 goto done;
7888 } else {
7889 ctxt->errNo = XML_ERR_DOCUMENT_END;
7890 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7891 ctxt->sax->error(ctxt->userData,
7892 "Extra content at the end of the document\n");
7893 ctxt->wellFormed = 0;
7894 ctxt->disableSAX = 1;
7895 ctxt->instate = XML_PARSER_EOF;
7896#ifdef DEBUG_PUSH
7897 xmlGenericError(xmlGenericErrorContext,
7898 "PP: entering EOF\n");
7899#endif
7900 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7901 (!ctxt->disableSAX))
7902 ctxt->sax->endDocument(ctxt->userData);
7903 goto done;
7904 }
7905 break;
7906 case XML_PARSER_START_TAG: {
7907 xmlChar *name, *oldname;
7908
7909 if ((avail < 2) && (ctxt->inputNr == 1))
7910 goto done;
7911 cur = ctxt->input->cur[0];
7912 if (cur != '<') {
7913 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7915 ctxt->sax->error(ctxt->userData,
7916 "Start tag expect, '<' not found\n");
7917 ctxt->wellFormed = 0;
7918 ctxt->disableSAX = 1;
7919 ctxt->instate = XML_PARSER_EOF;
7920#ifdef DEBUG_PUSH
7921 xmlGenericError(xmlGenericErrorContext,
7922 "PP: entering EOF\n");
7923#endif
7924 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7925 (!ctxt->disableSAX))
7926 ctxt->sax->endDocument(ctxt->userData);
7927 goto done;
7928 }
7929 if ((!terminate) &&
7930 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7931 goto done;
7932 if (ctxt->spaceNr == 0)
7933 spacePush(ctxt, -1);
7934 else
7935 spacePush(ctxt, *ctxt->space);
7936 name = xmlParseStartTag(ctxt);
7937 if (name == NULL) {
7938 spacePop(ctxt);
7939 ctxt->instate = XML_PARSER_EOF;
7940#ifdef DEBUG_PUSH
7941 xmlGenericError(xmlGenericErrorContext,
7942 "PP: entering EOF\n");
7943#endif
7944 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7945 (!ctxt->disableSAX))
7946 ctxt->sax->endDocument(ctxt->userData);
7947 goto done;
7948 }
7949 namePush(ctxt, xmlStrdup(name));
7950
7951 /*
7952 * [ VC: Root Element Type ]
7953 * The Name in the document type declaration must match
7954 * the element type of the root element.
7955 */
7956 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7957 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7958 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7959
7960 /*
7961 * Check for an Empty Element.
7962 */
7963 if ((RAW == '/') && (NXT(1) == '>')) {
7964 SKIP(2);
7965 if ((ctxt->sax != NULL) &&
7966 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7967 ctxt->sax->endElement(ctxt->userData, name);
7968 xmlFree(name);
7969 oldname = namePop(ctxt);
7970 spacePop(ctxt);
7971 if (oldname != NULL) {
7972#ifdef DEBUG_STACK
7973 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7974#endif
7975 xmlFree(oldname);
7976 }
7977 if (ctxt->name == NULL) {
7978 ctxt->instate = XML_PARSER_EPILOG;
7979#ifdef DEBUG_PUSH
7980 xmlGenericError(xmlGenericErrorContext,
7981 "PP: entering EPILOG\n");
7982#endif
7983 } else {
7984 ctxt->instate = XML_PARSER_CONTENT;
7985#ifdef DEBUG_PUSH
7986 xmlGenericError(xmlGenericErrorContext,
7987 "PP: entering CONTENT\n");
7988#endif
7989 }
7990 break;
7991 }
7992 if (RAW == '>') {
7993 NEXT;
7994 } else {
7995 ctxt->errNo = XML_ERR_GT_REQUIRED;
7996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7997 ctxt->sax->error(ctxt->userData,
7998 "Couldn't find end of Start Tag %s\n",
7999 name);
8000 ctxt->wellFormed = 0;
8001 ctxt->disableSAX = 1;
8002
8003 /*
8004 * end of parsing of this node.
8005 */
8006 nodePop(ctxt);
8007 oldname = namePop(ctxt);
8008 spacePop(ctxt);
8009 if (oldname != NULL) {
8010#ifdef DEBUG_STACK
8011 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8012#endif
8013 xmlFree(oldname);
8014 }
8015 }
8016 xmlFree(name);
8017 ctxt->instate = XML_PARSER_CONTENT;
8018#ifdef DEBUG_PUSH
8019 xmlGenericError(xmlGenericErrorContext,
8020 "PP: entering CONTENT\n");
8021#endif
8022 break;
8023 }
8024 case XML_PARSER_CONTENT: {
8025 const xmlChar *test;
8026 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008027 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008028
8029 /*
8030 * Handle preparsed entities and charRef
8031 */
8032 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008033 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008034
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008035 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008036 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8037 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008038 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008039 ctxt->token = 0;
8040 }
8041 if ((avail < 2) && (ctxt->inputNr == 1))
8042 goto done;
8043 cur = ctxt->input->cur[0];
8044 next = ctxt->input->cur[1];
8045
8046 test = CUR_PTR;
8047 cons = ctxt->input->consumed;
8048 tok = ctxt->token;
8049 if ((cur == '<') && (next == '?')) {
8050 if ((!terminate) &&
8051 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8052 goto done;
8053#ifdef DEBUG_PUSH
8054 xmlGenericError(xmlGenericErrorContext,
8055 "PP: Parsing PI\n");
8056#endif
8057 xmlParsePI(ctxt);
8058 } else if ((cur == '<') && (next == '!') &&
8059 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8060 if ((!terminate) &&
8061 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8062 goto done;
8063#ifdef DEBUG_PUSH
8064 xmlGenericError(xmlGenericErrorContext,
8065 "PP: Parsing Comment\n");
8066#endif
8067 xmlParseComment(ctxt);
8068 ctxt->instate = XML_PARSER_CONTENT;
8069 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8070 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8071 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8072 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8073 (ctxt->input->cur[8] == '[')) {
8074 SKIP(9);
8075 ctxt->instate = XML_PARSER_CDATA_SECTION;
8076#ifdef DEBUG_PUSH
8077 xmlGenericError(xmlGenericErrorContext,
8078 "PP: entering CDATA_SECTION\n");
8079#endif
8080 break;
8081 } else if ((cur == '<') && (next == '!') &&
8082 (avail < 9)) {
8083 goto done;
8084 } else if ((cur == '<') && (next == '/')) {
8085 ctxt->instate = XML_PARSER_END_TAG;
8086#ifdef DEBUG_PUSH
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: entering END_TAG\n");
8089#endif
8090 break;
8091 } else if (cur == '<') {
8092 ctxt->instate = XML_PARSER_START_TAG;
8093#ifdef DEBUG_PUSH
8094 xmlGenericError(xmlGenericErrorContext,
8095 "PP: entering START_TAG\n");
8096#endif
8097 break;
8098 } else if (cur == '&') {
8099 if ((!terminate) &&
8100 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8101 goto done;
8102#ifdef DEBUG_PUSH
8103 xmlGenericError(xmlGenericErrorContext,
8104 "PP: Parsing Reference\n");
8105#endif
8106 xmlParseReference(ctxt);
8107 } else {
8108 /* TODO Avoid the extra copy, handle directly !!! */
8109 /*
8110 * Goal of the following test is:
8111 * - minimize calls to the SAX 'character' callback
8112 * when they are mergeable
8113 * - handle an problem for isBlank when we only parse
8114 * a sequence of blank chars and the next one is
8115 * not available to check against '<' presence.
8116 * - tries to homogenize the differences in SAX
8117 * callbacks beween the push and pull versions
8118 * of the parser.
8119 */
8120 if ((ctxt->inputNr == 1) &&
8121 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8122 if ((!terminate) &&
8123 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8124 goto done;
8125 }
8126 ctxt->checkIndex = 0;
8127#ifdef DEBUG_PUSH
8128 xmlGenericError(xmlGenericErrorContext,
8129 "PP: Parsing char data\n");
8130#endif
8131 xmlParseCharData(ctxt, 0);
8132 }
8133 /*
8134 * Pop-up of finished entities.
8135 */
8136 while ((RAW == 0) && (ctxt->inputNr > 1))
8137 xmlPopInput(ctxt);
8138 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8139 (tok == ctxt->token)) {
8140 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8142 ctxt->sax->error(ctxt->userData,
8143 "detected an error in element content\n");
8144 ctxt->wellFormed = 0;
8145 ctxt->disableSAX = 1;
8146 ctxt->instate = XML_PARSER_EOF;
8147 break;
8148 }
8149 break;
8150 }
8151 case XML_PARSER_CDATA_SECTION: {
8152 /*
8153 * The Push mode need to have the SAX callback for
8154 * cdataBlock merge back contiguous callbacks.
8155 */
8156 int base;
8157
8158 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8159 if (base < 0) {
8160 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8161 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8162 if (ctxt->sax->cdataBlock != NULL)
8163 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8164 XML_PARSER_BIG_BUFFER_SIZE);
8165 }
8166 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8167 ctxt->checkIndex = 0;
8168 }
8169 goto done;
8170 } else {
8171 if ((ctxt->sax != NULL) && (base > 0) &&
8172 (!ctxt->disableSAX)) {
8173 if (ctxt->sax->cdataBlock != NULL)
8174 ctxt->sax->cdataBlock(ctxt->userData,
8175 ctxt->input->cur, base);
8176 }
8177 SKIP(base + 3);
8178 ctxt->checkIndex = 0;
8179 ctxt->instate = XML_PARSER_CONTENT;
8180#ifdef DEBUG_PUSH
8181 xmlGenericError(xmlGenericErrorContext,
8182 "PP: entering CONTENT\n");
8183#endif
8184 }
8185 break;
8186 }
8187 case XML_PARSER_END_TAG:
8188 if (avail < 2)
8189 goto done;
8190 if ((!terminate) &&
8191 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8192 goto done;
8193 xmlParseEndTag(ctxt);
8194 if (ctxt->name == NULL) {
8195 ctxt->instate = XML_PARSER_EPILOG;
8196#ifdef DEBUG_PUSH
8197 xmlGenericError(xmlGenericErrorContext,
8198 "PP: entering EPILOG\n");
8199#endif
8200 } else {
8201 ctxt->instate = XML_PARSER_CONTENT;
8202#ifdef DEBUG_PUSH
8203 xmlGenericError(xmlGenericErrorContext,
8204 "PP: entering CONTENT\n");
8205#endif
8206 }
8207 break;
8208 case XML_PARSER_DTD: {
8209 /*
8210 * Sorry but progressive parsing of the internal subset
8211 * is not expected to be supported. We first check that
8212 * the full content of the internal subset is available and
8213 * the parsing is launched only at that point.
8214 * Internal subset ends up with "']' S? '>'" in an unescaped
8215 * section and not in a ']]>' sequence which are conditional
8216 * sections (whoever argued to keep that crap in XML deserve
8217 * a place in hell !).
8218 */
8219 int base, i;
8220 xmlChar *buf;
8221 xmlChar quote = 0;
8222
8223 base = ctxt->input->cur - ctxt->input->base;
8224 if (base < 0) return(0);
8225 if (ctxt->checkIndex > base)
8226 base = ctxt->checkIndex;
8227 buf = ctxt->input->buf->buffer->content;
8228 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8229 base++) {
8230 if (quote != 0) {
8231 if (buf[base] == quote)
8232 quote = 0;
8233 continue;
8234 }
8235 if (buf[base] == '"') {
8236 quote = '"';
8237 continue;
8238 }
8239 if (buf[base] == '\'') {
8240 quote = '\'';
8241 continue;
8242 }
8243 if (buf[base] == ']') {
8244 if ((unsigned int) base +1 >=
8245 ctxt->input->buf->buffer->use)
8246 break;
8247 if (buf[base + 1] == ']') {
8248 /* conditional crap, skip both ']' ! */
8249 base++;
8250 continue;
8251 }
8252 for (i = 0;
8253 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8254 i++) {
8255 if (buf[base + i] == '>')
8256 goto found_end_int_subset;
8257 }
8258 break;
8259 }
8260 }
8261 /*
8262 * We didn't found the end of the Internal subset
8263 */
8264 if (quote == 0)
8265 ctxt->checkIndex = base;
8266#ifdef DEBUG_PUSH
8267 if (next == 0)
8268 xmlGenericError(xmlGenericErrorContext,
8269 "PP: lookup of int subset end filed\n");
8270#endif
8271 goto done;
8272
8273found_end_int_subset:
8274 xmlParseInternalSubset(ctxt);
8275 ctxt->inSubset = 2;
8276 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8277 (ctxt->sax->externalSubset != NULL))
8278 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8279 ctxt->extSubSystem, ctxt->extSubURI);
8280 ctxt->inSubset = 0;
8281 ctxt->instate = XML_PARSER_PROLOG;
8282 ctxt->checkIndex = 0;
8283#ifdef DEBUG_PUSH
8284 xmlGenericError(xmlGenericErrorContext,
8285 "PP: entering PROLOG\n");
8286#endif
8287 break;
8288 }
8289 case XML_PARSER_COMMENT:
8290 xmlGenericError(xmlGenericErrorContext,
8291 "PP: internal error, state == COMMENT\n");
8292 ctxt->instate = XML_PARSER_CONTENT;
8293#ifdef DEBUG_PUSH
8294 xmlGenericError(xmlGenericErrorContext,
8295 "PP: entering CONTENT\n");
8296#endif
8297 break;
8298 case XML_PARSER_PI:
8299 xmlGenericError(xmlGenericErrorContext,
8300 "PP: internal error, state == PI\n");
8301 ctxt->instate = XML_PARSER_CONTENT;
8302#ifdef DEBUG_PUSH
8303 xmlGenericError(xmlGenericErrorContext,
8304 "PP: entering CONTENT\n");
8305#endif
8306 break;
8307 case XML_PARSER_ENTITY_DECL:
8308 xmlGenericError(xmlGenericErrorContext,
8309 "PP: internal error, state == ENTITY_DECL\n");
8310 ctxt->instate = XML_PARSER_DTD;
8311#ifdef DEBUG_PUSH
8312 xmlGenericError(xmlGenericErrorContext,
8313 "PP: entering DTD\n");
8314#endif
8315 break;
8316 case XML_PARSER_ENTITY_VALUE:
8317 xmlGenericError(xmlGenericErrorContext,
8318 "PP: internal error, state == ENTITY_VALUE\n");
8319 ctxt->instate = XML_PARSER_CONTENT;
8320#ifdef DEBUG_PUSH
8321 xmlGenericError(xmlGenericErrorContext,
8322 "PP: entering DTD\n");
8323#endif
8324 break;
8325 case XML_PARSER_ATTRIBUTE_VALUE:
8326 xmlGenericError(xmlGenericErrorContext,
8327 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8328 ctxt->instate = XML_PARSER_START_TAG;
8329#ifdef DEBUG_PUSH
8330 xmlGenericError(xmlGenericErrorContext,
8331 "PP: entering START_TAG\n");
8332#endif
8333 break;
8334 case XML_PARSER_SYSTEM_LITERAL:
8335 xmlGenericError(xmlGenericErrorContext,
8336 "PP: internal error, state == SYSTEM_LITERAL\n");
8337 ctxt->instate = XML_PARSER_START_TAG;
8338#ifdef DEBUG_PUSH
8339 xmlGenericError(xmlGenericErrorContext,
8340 "PP: entering START_TAG\n");
8341#endif
8342 break;
8343 }
8344 }
8345done:
8346#ifdef DEBUG_PUSH
8347 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8348#endif
8349 return(ret);
8350}
8351
8352/**
Owen Taylor3473f882001-02-23 17:55:21 +00008353 * xmlParseChunk:
8354 * @ctxt: an XML parser context
8355 * @chunk: an char array
8356 * @size: the size in byte of the chunk
8357 * @terminate: last chunk indicator
8358 *
8359 * Parse a Chunk of memory
8360 *
8361 * Returns zero if no error, the xmlParserErrors otherwise.
8362 */
8363int
8364xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8365 int terminate) {
8366 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8367 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8368 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8369 int cur = ctxt->input->cur - ctxt->input->base;
8370
8371 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8372 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8373 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008374 ctxt->input->end =
8375 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008376#ifdef DEBUG_PUSH
8377 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8378#endif
8379
8380 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8381 xmlParseTryOrFinish(ctxt, terminate);
8382 } else if (ctxt->instate != XML_PARSER_EOF) {
8383 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8384 xmlParserInputBufferPtr in = ctxt->input->buf;
8385 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8386 (in->raw != NULL)) {
8387 int nbchars;
8388
8389 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8390 if (nbchars < 0) {
8391 xmlGenericError(xmlGenericErrorContext,
8392 "xmlParseChunk: encoder error\n");
8393 return(XML_ERR_INVALID_ENCODING);
8394 }
8395 }
8396 }
8397 }
8398 xmlParseTryOrFinish(ctxt, terminate);
8399 if (terminate) {
8400 /*
8401 * Check for termination
8402 */
8403 if ((ctxt->instate != XML_PARSER_EOF) &&
8404 (ctxt->instate != XML_PARSER_EPILOG)) {
8405 ctxt->errNo = XML_ERR_DOCUMENT_END;
8406 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8407 ctxt->sax->error(ctxt->userData,
8408 "Extra content at the end of the document\n");
8409 ctxt->wellFormed = 0;
8410 ctxt->disableSAX = 1;
8411 }
8412 if (ctxt->instate != XML_PARSER_EOF) {
8413 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8414 (!ctxt->disableSAX))
8415 ctxt->sax->endDocument(ctxt->userData);
8416 }
8417 ctxt->instate = XML_PARSER_EOF;
8418 }
8419 return((xmlParserErrors) ctxt->errNo);
8420}
8421
8422/************************************************************************
8423 * *
8424 * I/O front end functions to the parser *
8425 * *
8426 ************************************************************************/
8427
8428/**
8429 * xmlStopParser:
8430 * @ctxt: an XML parser context
8431 *
8432 * Blocks further parser processing
8433 */
8434void
8435xmlStopParser(xmlParserCtxtPtr ctxt) {
8436 ctxt->instate = XML_PARSER_EOF;
8437 if (ctxt->input != NULL)
8438 ctxt->input->cur = BAD_CAST"";
8439}
8440
8441/**
8442 * xmlCreatePushParserCtxt:
8443 * @sax: a SAX handler
8444 * @user_data: The user data returned on SAX callbacks
8445 * @chunk: a pointer to an array of chars
8446 * @size: number of chars in the array
8447 * @filename: an optional file name or URI
8448 *
8449 * Create a parser context for using the XML parser in push mode
8450 * To allow content encoding detection, @size should be >= 4
8451 * The value of @filename is used for fetching external entities
8452 * and error/warning reports.
8453 *
8454 * Returns the new parser context or NULL
8455 */
8456xmlParserCtxtPtr
8457xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8458 const char *chunk, int size, const char *filename) {
8459 xmlParserCtxtPtr ctxt;
8460 xmlParserInputPtr inputStream;
8461 xmlParserInputBufferPtr buf;
8462 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8463
8464 /*
8465 * plug some encoding conversion routines
8466 */
8467 if ((chunk != NULL) && (size >= 4))
8468 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8469
8470 buf = xmlAllocParserInputBuffer(enc);
8471 if (buf == NULL) return(NULL);
8472
8473 ctxt = xmlNewParserCtxt();
8474 if (ctxt == NULL) {
8475 xmlFree(buf);
8476 return(NULL);
8477 }
8478 if (sax != NULL) {
8479 if (ctxt->sax != &xmlDefaultSAXHandler)
8480 xmlFree(ctxt->sax);
8481 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8482 if (ctxt->sax == NULL) {
8483 xmlFree(buf);
8484 xmlFree(ctxt);
8485 return(NULL);
8486 }
8487 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8488 if (user_data != NULL)
8489 ctxt->userData = user_data;
8490 }
8491 if (filename == NULL) {
8492 ctxt->directory = NULL;
8493 } else {
8494 ctxt->directory = xmlParserGetDirectory(filename);
8495 }
8496
8497 inputStream = xmlNewInputStream(ctxt);
8498 if (inputStream == NULL) {
8499 xmlFreeParserCtxt(ctxt);
8500 return(NULL);
8501 }
8502
8503 if (filename == NULL)
8504 inputStream->filename = NULL;
8505 else
8506 inputStream->filename = xmlMemStrdup(filename);
8507 inputStream->buf = buf;
8508 inputStream->base = inputStream->buf->buffer->content;
8509 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008510 inputStream->end =
8511 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008512 if (enc != XML_CHAR_ENCODING_NONE) {
8513 xmlSwitchEncoding(ctxt, enc);
8514 }
8515
8516 inputPush(ctxt, inputStream);
8517
8518 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8519 (ctxt->input->buf != NULL)) {
8520 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8521#ifdef DEBUG_PUSH
8522 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8523#endif
8524 }
8525
8526 return(ctxt);
8527}
8528
8529/**
8530 * xmlCreateIOParserCtxt:
8531 * @sax: a SAX handler
8532 * @user_data: The user data returned on SAX callbacks
8533 * @ioread: an I/O read function
8534 * @ioclose: an I/O close function
8535 * @ioctx: an I/O handler
8536 * @enc: the charset encoding if known
8537 *
8538 * Create a parser context for using the XML parser with an existing
8539 * I/O stream
8540 *
8541 * Returns the new parser context or NULL
8542 */
8543xmlParserCtxtPtr
8544xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8545 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8546 void *ioctx, xmlCharEncoding enc) {
8547 xmlParserCtxtPtr ctxt;
8548 xmlParserInputPtr inputStream;
8549 xmlParserInputBufferPtr buf;
8550
8551 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8552 if (buf == NULL) return(NULL);
8553
8554 ctxt = xmlNewParserCtxt();
8555 if (ctxt == NULL) {
8556 xmlFree(buf);
8557 return(NULL);
8558 }
8559 if (sax != NULL) {
8560 if (ctxt->sax != &xmlDefaultSAXHandler)
8561 xmlFree(ctxt->sax);
8562 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8563 if (ctxt->sax == NULL) {
8564 xmlFree(buf);
8565 xmlFree(ctxt);
8566 return(NULL);
8567 }
8568 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8569 if (user_data != NULL)
8570 ctxt->userData = user_data;
8571 }
8572
8573 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8574 if (inputStream == NULL) {
8575 xmlFreeParserCtxt(ctxt);
8576 return(NULL);
8577 }
8578 inputPush(ctxt, inputStream);
8579
8580 return(ctxt);
8581}
8582
8583/************************************************************************
8584 * *
8585 * Front ends when parsing a Dtd *
8586 * *
8587 ************************************************************************/
8588
8589/**
8590 * xmlIOParseDTD:
8591 * @sax: the SAX handler block or NULL
8592 * @input: an Input Buffer
8593 * @enc: the charset encoding if known
8594 *
8595 * Load and parse a DTD
8596 *
8597 * Returns the resulting xmlDtdPtr or NULL in case of error.
8598 * @input will be freed at parsing end.
8599 */
8600
8601xmlDtdPtr
8602xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8603 xmlCharEncoding enc) {
8604 xmlDtdPtr ret = NULL;
8605 xmlParserCtxtPtr ctxt;
8606 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008607 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008608
8609 if (input == NULL)
8610 return(NULL);
8611
8612 ctxt = xmlNewParserCtxt();
8613 if (ctxt == NULL) {
8614 return(NULL);
8615 }
8616
8617 /*
8618 * Set-up the SAX context
8619 */
8620 if (sax != NULL) {
8621 if (ctxt->sax != NULL)
8622 xmlFree(ctxt->sax);
8623 ctxt->sax = sax;
8624 ctxt->userData = NULL;
8625 }
8626
8627 /*
8628 * generate a parser input from the I/O handler
8629 */
8630
8631 pinput = xmlNewIOInputStream(ctxt, input, enc);
8632 if (pinput == NULL) {
8633 if (sax != NULL) ctxt->sax = NULL;
8634 xmlFreeParserCtxt(ctxt);
8635 return(NULL);
8636 }
8637
8638 /*
8639 * plug some encoding conversion routines here.
8640 */
8641 xmlPushInput(ctxt, pinput);
8642
8643 pinput->filename = NULL;
8644 pinput->line = 1;
8645 pinput->col = 1;
8646 pinput->base = ctxt->input->cur;
8647 pinput->cur = ctxt->input->cur;
8648 pinput->free = NULL;
8649
8650 /*
8651 * let's parse that entity knowing it's an external subset.
8652 */
8653 ctxt->inSubset = 2;
8654 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8655 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8656 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008657
8658 if (enc == XML_CHAR_ENCODING_NONE) {
8659 /*
8660 * Get the 4 first bytes and decode the charset
8661 * if enc != XML_CHAR_ENCODING_NONE
8662 * plug some encoding conversion routines.
8663 */
8664 start[0] = RAW;
8665 start[1] = NXT(1);
8666 start[2] = NXT(2);
8667 start[3] = NXT(3);
8668 enc = xmlDetectCharEncoding(start, 4);
8669 if (enc != XML_CHAR_ENCODING_NONE) {
8670 xmlSwitchEncoding(ctxt, enc);
8671 }
8672 }
8673
Owen Taylor3473f882001-02-23 17:55:21 +00008674 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8675
8676 if (ctxt->myDoc != NULL) {
8677 if (ctxt->wellFormed) {
8678 ret = ctxt->myDoc->extSubset;
8679 ctxt->myDoc->extSubset = NULL;
8680 } else {
8681 ret = NULL;
8682 }
8683 xmlFreeDoc(ctxt->myDoc);
8684 ctxt->myDoc = NULL;
8685 }
8686 if (sax != NULL) ctxt->sax = NULL;
8687 xmlFreeParserCtxt(ctxt);
8688
8689 return(ret);
8690}
8691
8692/**
8693 * xmlSAXParseDTD:
8694 * @sax: the SAX handler block
8695 * @ExternalID: a NAME* containing the External ID of the DTD
8696 * @SystemID: a NAME* containing the URL to the DTD
8697 *
8698 * Load and parse an external subset.
8699 *
8700 * Returns the resulting xmlDtdPtr or NULL in case of error.
8701 */
8702
8703xmlDtdPtr
8704xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8705 const xmlChar *SystemID) {
8706 xmlDtdPtr ret = NULL;
8707 xmlParserCtxtPtr ctxt;
8708 xmlParserInputPtr input = NULL;
8709 xmlCharEncoding enc;
8710
8711 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8712
8713 ctxt = xmlNewParserCtxt();
8714 if (ctxt == NULL) {
8715 return(NULL);
8716 }
8717
8718 /*
8719 * Set-up the SAX context
8720 */
8721 if (sax != NULL) {
8722 if (ctxt->sax != NULL)
8723 xmlFree(ctxt->sax);
8724 ctxt->sax = sax;
8725 ctxt->userData = NULL;
8726 }
8727
8728 /*
8729 * Ask the Entity resolver to load the damn thing
8730 */
8731
8732 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8733 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8734 if (input == NULL) {
8735 if (sax != NULL) ctxt->sax = NULL;
8736 xmlFreeParserCtxt(ctxt);
8737 return(NULL);
8738 }
8739
8740 /*
8741 * plug some encoding conversion routines here.
8742 */
8743 xmlPushInput(ctxt, input);
8744 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8745 xmlSwitchEncoding(ctxt, enc);
8746
8747 if (input->filename == NULL)
8748 input->filename = (char *) xmlStrdup(SystemID);
8749 input->line = 1;
8750 input->col = 1;
8751 input->base = ctxt->input->cur;
8752 input->cur = ctxt->input->cur;
8753 input->free = NULL;
8754
8755 /*
8756 * let's parse that entity knowing it's an external subset.
8757 */
8758 ctxt->inSubset = 2;
8759 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8760 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8761 ExternalID, SystemID);
8762 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8763
8764 if (ctxt->myDoc != NULL) {
8765 if (ctxt->wellFormed) {
8766 ret = ctxt->myDoc->extSubset;
8767 ctxt->myDoc->extSubset = NULL;
8768 } else {
8769 ret = NULL;
8770 }
8771 xmlFreeDoc(ctxt->myDoc);
8772 ctxt->myDoc = NULL;
8773 }
8774 if (sax != NULL) ctxt->sax = NULL;
8775 xmlFreeParserCtxt(ctxt);
8776
8777 return(ret);
8778}
8779
8780/**
8781 * xmlParseDTD:
8782 * @ExternalID: a NAME* containing the External ID of the DTD
8783 * @SystemID: a NAME* containing the URL to the DTD
8784 *
8785 * Load and parse an external subset.
8786 *
8787 * Returns the resulting xmlDtdPtr or NULL in case of error.
8788 */
8789
8790xmlDtdPtr
8791xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8792 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8793}
8794
8795/************************************************************************
8796 * *
8797 * Front ends when parsing an Entity *
8798 * *
8799 ************************************************************************/
8800
8801/**
Owen Taylor3473f882001-02-23 17:55:21 +00008802 * xmlParseCtxtExternalEntity:
8803 * @ctx: the existing parsing context
8804 * @URL: the URL for the entity to load
8805 * @ID: the System ID for the entity to load
8806 * @list: the return value for the set of parsed nodes
8807 *
8808 * Parse an external general entity within an existing parsing context
8809 * An external general parsed entity is well-formed if it matches the
8810 * production labeled extParsedEnt.
8811 *
8812 * [78] extParsedEnt ::= TextDecl? content
8813 *
8814 * Returns 0 if the entity is well formed, -1 in case of args problem and
8815 * the parser error code otherwise
8816 */
8817
8818int
8819xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8820 const xmlChar *ID, xmlNodePtr *list) {
8821 xmlParserCtxtPtr ctxt;
8822 xmlDocPtr newDoc;
8823 xmlSAXHandlerPtr oldsax = NULL;
8824 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008825 xmlChar start[4];
8826 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00008827
8828 if (ctx->depth > 40) {
8829 return(XML_ERR_ENTITY_LOOP);
8830 }
8831
8832 if (list != NULL)
8833 *list = NULL;
8834 if ((URL == NULL) && (ID == NULL))
8835 return(-1);
8836 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8837 return(-1);
8838
8839
8840 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8841 if (ctxt == NULL) return(-1);
8842 ctxt->userData = ctxt;
8843 oldsax = ctxt->sax;
8844 ctxt->sax = ctx->sax;
8845 newDoc = xmlNewDoc(BAD_CAST "1.0");
8846 if (newDoc == NULL) {
8847 xmlFreeParserCtxt(ctxt);
8848 return(-1);
8849 }
8850 if (ctx->myDoc != NULL) {
8851 newDoc->intSubset = ctx->myDoc->intSubset;
8852 newDoc->extSubset = ctx->myDoc->extSubset;
8853 }
8854 if (ctx->myDoc->URL != NULL) {
8855 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8856 }
8857 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8858 if (newDoc->children == NULL) {
8859 ctxt->sax = oldsax;
8860 xmlFreeParserCtxt(ctxt);
8861 newDoc->intSubset = NULL;
8862 newDoc->extSubset = NULL;
8863 xmlFreeDoc(newDoc);
8864 return(-1);
8865 }
8866 nodePush(ctxt, newDoc->children);
8867 if (ctx->myDoc == NULL) {
8868 ctxt->myDoc = newDoc;
8869 } else {
8870 ctxt->myDoc = ctx->myDoc;
8871 newDoc->children->doc = ctx->myDoc;
8872 }
8873
Daniel Veillard87a764e2001-06-20 17:41:10 +00008874 /*
8875 * Get the 4 first bytes and decode the charset
8876 * if enc != XML_CHAR_ENCODING_NONE
8877 * plug some encoding conversion routines.
8878 */
8879 GROW
8880 start[0] = RAW;
8881 start[1] = NXT(1);
8882 start[2] = NXT(2);
8883 start[3] = NXT(3);
8884 enc = xmlDetectCharEncoding(start, 4);
8885 if (enc != XML_CHAR_ENCODING_NONE) {
8886 xmlSwitchEncoding(ctxt, enc);
8887 }
8888
Owen Taylor3473f882001-02-23 17:55:21 +00008889 /*
8890 * Parse a possible text declaration first
8891 */
Owen Taylor3473f882001-02-23 17:55:21 +00008892 if ((RAW == '<') && (NXT(1) == '?') &&
8893 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8894 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8895 xmlParseTextDecl(ctxt);
8896 }
8897
8898 /*
8899 * Doing validity checking on chunk doesn't make sense
8900 */
8901 ctxt->instate = XML_PARSER_CONTENT;
8902 ctxt->validate = ctx->validate;
8903 ctxt->loadsubset = ctx->loadsubset;
8904 ctxt->depth = ctx->depth + 1;
8905 ctxt->replaceEntities = ctx->replaceEntities;
8906 if (ctxt->validate) {
8907 ctxt->vctxt.error = ctx->vctxt.error;
8908 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00008909 } else {
8910 ctxt->vctxt.error = NULL;
8911 ctxt->vctxt.warning = NULL;
8912 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00008913 ctxt->vctxt.nodeTab = NULL;
8914 ctxt->vctxt.nodeNr = 0;
8915 ctxt->vctxt.nodeMax = 0;
8916 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008917
8918 xmlParseContent(ctxt);
8919
8920 if ((RAW == '<') && (NXT(1) == '/')) {
8921 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8923 ctxt->sax->error(ctxt->userData,
8924 "chunk is not well balanced\n");
8925 ctxt->wellFormed = 0;
8926 ctxt->disableSAX = 1;
8927 } else if (RAW != 0) {
8928 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8930 ctxt->sax->error(ctxt->userData,
8931 "extra content at the end of well balanced chunk\n");
8932 ctxt->wellFormed = 0;
8933 ctxt->disableSAX = 1;
8934 }
8935 if (ctxt->node != newDoc->children) {
8936 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8937 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8938 ctxt->sax->error(ctxt->userData,
8939 "chunk is not well balanced\n");
8940 ctxt->wellFormed = 0;
8941 ctxt->disableSAX = 1;
8942 }
8943
8944 if (!ctxt->wellFormed) {
8945 if (ctxt->errNo == 0)
8946 ret = 1;
8947 else
8948 ret = ctxt->errNo;
8949 } else {
8950 if (list != NULL) {
8951 xmlNodePtr cur;
8952
8953 /*
8954 * Return the newly created nodeset after unlinking it from
8955 * they pseudo parent.
8956 */
8957 cur = newDoc->children->children;
8958 *list = cur;
8959 while (cur != NULL) {
8960 cur->parent = NULL;
8961 cur = cur->next;
8962 }
8963 newDoc->children->children = NULL;
8964 }
8965 ret = 0;
8966 }
8967 ctxt->sax = oldsax;
8968 xmlFreeParserCtxt(ctxt);
8969 newDoc->intSubset = NULL;
8970 newDoc->extSubset = NULL;
8971 xmlFreeDoc(newDoc);
8972
8973 return(ret);
8974}
8975
8976/**
Daniel Veillard257d9102001-05-08 10:41:44 +00008977 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00008978 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00008979 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00008980 * @sax: the SAX handler bloc (possibly NULL)
8981 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8982 * @depth: Used for loop detection, use 0
8983 * @URL: the URL for the entity to load
8984 * @ID: the System ID for the entity to load
8985 * @list: the return value for the set of parsed nodes
8986 *
Daniel Veillard257d9102001-05-08 10:41:44 +00008987 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00008988 *
8989 * Returns 0 if the entity is well formed, -1 in case of args problem and
8990 * the parser error code otherwise
8991 */
8992
Daniel Veillard257d9102001-05-08 10:41:44 +00008993static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00008994xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
8995 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00008996 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00008997 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00008998 xmlParserCtxtPtr ctxt;
8999 xmlDocPtr newDoc;
9000 xmlSAXHandlerPtr oldsax = NULL;
9001 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009002 xmlChar start[4];
9003 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009004
9005 if (depth > 40) {
9006 return(XML_ERR_ENTITY_LOOP);
9007 }
9008
9009
9010
9011 if (list != NULL)
9012 *list = NULL;
9013 if ((URL == NULL) && (ID == NULL))
9014 return(-1);
9015 if (doc == NULL) /* @@ relax but check for dereferences */
9016 return(-1);
9017
9018
9019 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9020 if (ctxt == NULL) return(-1);
9021 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009022 if (oldctxt != NULL) {
9023 ctxt->_private = oldctxt->_private;
9024 ctxt->loadsubset = oldctxt->loadsubset;
9025 ctxt->validate = oldctxt->validate;
9026 ctxt->external = oldctxt->external;
9027 } else {
9028 /*
9029 * Doing validity checking on chunk without context
9030 * doesn't make sense
9031 */
9032 ctxt->_private = NULL;
9033 ctxt->validate = 0;
9034 ctxt->external = 2;
9035 ctxt->loadsubset = 0;
9036 }
Owen Taylor3473f882001-02-23 17:55:21 +00009037 if (sax != NULL) {
9038 oldsax = ctxt->sax;
9039 ctxt->sax = sax;
9040 if (user_data != NULL)
9041 ctxt->userData = user_data;
9042 }
9043 newDoc = xmlNewDoc(BAD_CAST "1.0");
9044 if (newDoc == NULL) {
9045 xmlFreeParserCtxt(ctxt);
9046 return(-1);
9047 }
9048 if (doc != NULL) {
9049 newDoc->intSubset = doc->intSubset;
9050 newDoc->extSubset = doc->extSubset;
9051 }
9052 if (doc->URL != NULL) {
9053 newDoc->URL = xmlStrdup(doc->URL);
9054 }
9055 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9056 if (newDoc->children == NULL) {
9057 if (sax != NULL)
9058 ctxt->sax = oldsax;
9059 xmlFreeParserCtxt(ctxt);
9060 newDoc->intSubset = NULL;
9061 newDoc->extSubset = NULL;
9062 xmlFreeDoc(newDoc);
9063 return(-1);
9064 }
9065 nodePush(ctxt, newDoc->children);
9066 if (doc == NULL) {
9067 ctxt->myDoc = newDoc;
9068 } else {
9069 ctxt->myDoc = doc;
9070 newDoc->children->doc = doc;
9071 }
9072
Daniel Veillard87a764e2001-06-20 17:41:10 +00009073 /*
9074 * Get the 4 first bytes and decode the charset
9075 * if enc != XML_CHAR_ENCODING_NONE
9076 * plug some encoding conversion routines.
9077 */
9078 GROW;
9079 start[0] = RAW;
9080 start[1] = NXT(1);
9081 start[2] = NXT(2);
9082 start[3] = NXT(3);
9083 enc = xmlDetectCharEncoding(start, 4);
9084 if (enc != XML_CHAR_ENCODING_NONE) {
9085 xmlSwitchEncoding(ctxt, enc);
9086 }
9087
Owen Taylor3473f882001-02-23 17:55:21 +00009088 /*
9089 * Parse a possible text declaration first
9090 */
Owen Taylor3473f882001-02-23 17:55:21 +00009091 if ((RAW == '<') && (NXT(1) == '?') &&
9092 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9093 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9094 xmlParseTextDecl(ctxt);
9095 }
9096
Owen Taylor3473f882001-02-23 17:55:21 +00009097 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009098 ctxt->depth = depth;
9099
9100 xmlParseContent(ctxt);
9101
9102 if ((RAW == '<') && (NXT(1) == '/')) {
9103 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9105 ctxt->sax->error(ctxt->userData,
9106 "chunk is not well balanced\n");
9107 ctxt->wellFormed = 0;
9108 ctxt->disableSAX = 1;
9109 } else if (RAW != 0) {
9110 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9112 ctxt->sax->error(ctxt->userData,
9113 "extra content at the end of well balanced chunk\n");
9114 ctxt->wellFormed = 0;
9115 ctxt->disableSAX = 1;
9116 }
9117 if (ctxt->node != newDoc->children) {
9118 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9119 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9120 ctxt->sax->error(ctxt->userData,
9121 "chunk is not well balanced\n");
9122 ctxt->wellFormed = 0;
9123 ctxt->disableSAX = 1;
9124 }
9125
9126 if (!ctxt->wellFormed) {
9127 if (ctxt->errNo == 0)
9128 ret = 1;
9129 else
9130 ret = ctxt->errNo;
9131 } else {
9132 if (list != NULL) {
9133 xmlNodePtr cur;
9134
9135 /*
9136 * Return the newly created nodeset after unlinking it from
9137 * they pseudo parent.
9138 */
9139 cur = newDoc->children->children;
9140 *list = cur;
9141 while (cur != NULL) {
9142 cur->parent = NULL;
9143 cur = cur->next;
9144 }
9145 newDoc->children->children = NULL;
9146 }
9147 ret = 0;
9148 }
9149 if (sax != NULL)
9150 ctxt->sax = oldsax;
9151 xmlFreeParserCtxt(ctxt);
9152 newDoc->intSubset = NULL;
9153 newDoc->extSubset = NULL;
9154 xmlFreeDoc(newDoc);
9155
9156 return(ret);
9157}
9158
9159/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009160 * xmlParseExternalEntity:
9161 * @doc: the document the chunk pertains to
9162 * @sax: the SAX handler bloc (possibly NULL)
9163 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9164 * @depth: Used for loop detection, use 0
9165 * @URL: the URL for the entity to load
9166 * @ID: the System ID for the entity to load
9167 * @list: the return value for the set of parsed nodes
9168 *
9169 * Parse an external general entity
9170 * An external general parsed entity is well-formed if it matches the
9171 * production labeled extParsedEnt.
9172 *
9173 * [78] extParsedEnt ::= TextDecl? content
9174 *
9175 * Returns 0 if the entity is well formed, -1 in case of args problem and
9176 * the parser error code otherwise
9177 */
9178
9179int
9180xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9181 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009182 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
9183 ID, list));
Daniel Veillard257d9102001-05-08 10:41:44 +00009184}
9185
9186/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009187 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009188 * @doc: the document the chunk pertains to
9189 * @sax: the SAX handler bloc (possibly NULL)
9190 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9191 * @depth: Used for loop detection, use 0
9192 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9193 * @list: the return value for the set of parsed nodes
9194 *
9195 * Parse a well-balanced chunk of an XML document
9196 * called by the parser
9197 * The allowed sequence for the Well Balanced Chunk is the one defined by
9198 * the content production in the XML grammar:
9199 *
9200 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9201 *
9202 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9203 * the parser error code otherwise
9204 */
9205
9206int
9207xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9208 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9209 xmlParserCtxtPtr ctxt;
9210 xmlDocPtr newDoc;
9211 xmlSAXHandlerPtr oldsax = NULL;
9212 int size;
9213 int ret = 0;
9214
9215 if (depth > 40) {
9216 return(XML_ERR_ENTITY_LOOP);
9217 }
9218
9219
9220 if (list != NULL)
9221 *list = NULL;
9222 if (string == NULL)
9223 return(-1);
9224
9225 size = xmlStrlen(string);
9226
9227 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9228 if (ctxt == NULL) return(-1);
9229 ctxt->userData = ctxt;
9230 if (sax != NULL) {
9231 oldsax = ctxt->sax;
9232 ctxt->sax = sax;
9233 if (user_data != NULL)
9234 ctxt->userData = user_data;
9235 }
9236 newDoc = xmlNewDoc(BAD_CAST "1.0");
9237 if (newDoc == NULL) {
9238 xmlFreeParserCtxt(ctxt);
9239 return(-1);
9240 }
9241 if (doc != NULL) {
9242 newDoc->intSubset = doc->intSubset;
9243 newDoc->extSubset = doc->extSubset;
9244 }
9245 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9246 if (newDoc->children == NULL) {
9247 if (sax != NULL)
9248 ctxt->sax = oldsax;
9249 xmlFreeParserCtxt(ctxt);
9250 newDoc->intSubset = NULL;
9251 newDoc->extSubset = NULL;
9252 xmlFreeDoc(newDoc);
9253 return(-1);
9254 }
9255 nodePush(ctxt, newDoc->children);
9256 if (doc == NULL) {
9257 ctxt->myDoc = newDoc;
9258 } else {
9259 ctxt->myDoc = doc;
9260 newDoc->children->doc = doc;
9261 }
9262 ctxt->instate = XML_PARSER_CONTENT;
9263 ctxt->depth = depth;
9264
9265 /*
9266 * Doing validity checking on chunk doesn't make sense
9267 */
9268 ctxt->validate = 0;
9269 ctxt->loadsubset = 0;
9270
9271 xmlParseContent(ctxt);
9272
9273 if ((RAW == '<') && (NXT(1) == '/')) {
9274 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9275 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9276 ctxt->sax->error(ctxt->userData,
9277 "chunk is not well balanced\n");
9278 ctxt->wellFormed = 0;
9279 ctxt->disableSAX = 1;
9280 } else if (RAW != 0) {
9281 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9282 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9283 ctxt->sax->error(ctxt->userData,
9284 "extra content at the end of well balanced chunk\n");
9285 ctxt->wellFormed = 0;
9286 ctxt->disableSAX = 1;
9287 }
9288 if (ctxt->node != newDoc->children) {
9289 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9290 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9291 ctxt->sax->error(ctxt->userData,
9292 "chunk is not well balanced\n");
9293 ctxt->wellFormed = 0;
9294 ctxt->disableSAX = 1;
9295 }
9296
9297 if (!ctxt->wellFormed) {
9298 if (ctxt->errNo == 0)
9299 ret = 1;
9300 else
9301 ret = ctxt->errNo;
9302 } else {
9303 if (list != NULL) {
9304 xmlNodePtr cur;
9305
9306 /*
9307 * Return the newly created nodeset after unlinking it from
9308 * they pseudo parent.
9309 */
9310 cur = newDoc->children->children;
9311 *list = cur;
9312 while (cur != NULL) {
9313 cur->parent = NULL;
9314 cur = cur->next;
9315 }
9316 newDoc->children->children = NULL;
9317 }
9318 ret = 0;
9319 }
9320 if (sax != NULL)
9321 ctxt->sax = oldsax;
9322 xmlFreeParserCtxt(ctxt);
9323 newDoc->intSubset = NULL;
9324 newDoc->extSubset = NULL;
9325 xmlFreeDoc(newDoc);
9326
9327 return(ret);
9328}
9329
9330/**
9331 * xmlSAXParseEntity:
9332 * @sax: the SAX handler block
9333 * @filename: the filename
9334 *
9335 * parse an XML external entity out of context and build a tree.
9336 * It use the given SAX function block to handle the parsing callback.
9337 * If sax is NULL, fallback to the default DOM tree building routines.
9338 *
9339 * [78] extParsedEnt ::= TextDecl? content
9340 *
9341 * This correspond to a "Well Balanced" chunk
9342 *
9343 * Returns the resulting document tree
9344 */
9345
9346xmlDocPtr
9347xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9348 xmlDocPtr ret;
9349 xmlParserCtxtPtr ctxt;
9350 char *directory = NULL;
9351
9352 ctxt = xmlCreateFileParserCtxt(filename);
9353 if (ctxt == NULL) {
9354 return(NULL);
9355 }
9356 if (sax != NULL) {
9357 if (ctxt->sax != NULL)
9358 xmlFree(ctxt->sax);
9359 ctxt->sax = sax;
9360 ctxt->userData = NULL;
9361 }
9362
9363 if ((ctxt->directory == NULL) && (directory == NULL))
9364 directory = xmlParserGetDirectory(filename);
9365
9366 xmlParseExtParsedEnt(ctxt);
9367
9368 if (ctxt->wellFormed)
9369 ret = ctxt->myDoc;
9370 else {
9371 ret = NULL;
9372 xmlFreeDoc(ctxt->myDoc);
9373 ctxt->myDoc = NULL;
9374 }
9375 if (sax != NULL)
9376 ctxt->sax = NULL;
9377 xmlFreeParserCtxt(ctxt);
9378
9379 return(ret);
9380}
9381
9382/**
9383 * xmlParseEntity:
9384 * @filename: the filename
9385 *
9386 * parse an XML external entity out of context and build a tree.
9387 *
9388 * [78] extParsedEnt ::= TextDecl? content
9389 *
9390 * This correspond to a "Well Balanced" chunk
9391 *
9392 * Returns the resulting document tree
9393 */
9394
9395xmlDocPtr
9396xmlParseEntity(const char *filename) {
9397 return(xmlSAXParseEntity(NULL, filename));
9398}
9399
9400/**
9401 * xmlCreateEntityParserCtxt:
9402 * @URL: the entity URL
9403 * @ID: the entity PUBLIC ID
9404 * @base: a posible base for the target URI
9405 *
9406 * Create a parser context for an external entity
9407 * Automatic support for ZLIB/Compress compressed document is provided
9408 * by default if found at compile-time.
9409 *
9410 * Returns the new parser context or NULL
9411 */
9412xmlParserCtxtPtr
9413xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9414 const xmlChar *base) {
9415 xmlParserCtxtPtr ctxt;
9416 xmlParserInputPtr inputStream;
9417 char *directory = NULL;
9418 xmlChar *uri;
9419
9420 ctxt = xmlNewParserCtxt();
9421 if (ctxt == NULL) {
9422 return(NULL);
9423 }
9424
9425 uri = xmlBuildURI(URL, base);
9426
9427 if (uri == NULL) {
9428 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9429 if (inputStream == NULL) {
9430 xmlFreeParserCtxt(ctxt);
9431 return(NULL);
9432 }
9433
9434 inputPush(ctxt, inputStream);
9435
9436 if ((ctxt->directory == NULL) && (directory == NULL))
9437 directory = xmlParserGetDirectory((char *)URL);
9438 if ((ctxt->directory == NULL) && (directory != NULL))
9439 ctxt->directory = directory;
9440 } else {
9441 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9442 if (inputStream == NULL) {
9443 xmlFree(uri);
9444 xmlFreeParserCtxt(ctxt);
9445 return(NULL);
9446 }
9447
9448 inputPush(ctxt, inputStream);
9449
9450 if ((ctxt->directory == NULL) && (directory == NULL))
9451 directory = xmlParserGetDirectory((char *)uri);
9452 if ((ctxt->directory == NULL) && (directory != NULL))
9453 ctxt->directory = directory;
9454 xmlFree(uri);
9455 }
9456
9457 return(ctxt);
9458}
9459
9460/************************************************************************
9461 * *
9462 * Front ends when parsing from a file *
9463 * *
9464 ************************************************************************/
9465
9466/**
9467 * xmlCreateFileParserCtxt:
9468 * @filename: the filename
9469 *
9470 * Create a parser context for a file content.
9471 * Automatic support for ZLIB/Compress compressed document is provided
9472 * by default if found at compile-time.
9473 *
9474 * Returns the new parser context or NULL
9475 */
9476xmlParserCtxtPtr
9477xmlCreateFileParserCtxt(const char *filename)
9478{
9479 xmlParserCtxtPtr ctxt;
9480 xmlParserInputPtr inputStream;
9481 xmlParserInputBufferPtr buf;
9482 char *directory = NULL;
9483
9484 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9485 if (buf == NULL) {
9486 return(NULL);
9487 }
9488
9489 ctxt = xmlNewParserCtxt();
9490 if (ctxt == NULL) {
9491 if (xmlDefaultSAXHandler.error != NULL) {
9492 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9493 }
9494 return(NULL);
9495 }
9496
9497 inputStream = xmlNewInputStream(ctxt);
9498 if (inputStream == NULL) {
9499 xmlFreeParserCtxt(ctxt);
9500 return(NULL);
9501 }
9502
9503 inputStream->filename = xmlMemStrdup(filename);
9504 inputStream->buf = buf;
9505 inputStream->base = inputStream->buf->buffer->content;
9506 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009507 inputStream->end =
9508 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009509
9510 inputPush(ctxt, inputStream);
9511 if ((ctxt->directory == NULL) && (directory == NULL))
9512 directory = xmlParserGetDirectory(filename);
9513 if ((ctxt->directory == NULL) && (directory != NULL))
9514 ctxt->directory = directory;
9515
9516 return(ctxt);
9517}
9518
9519/**
9520 * xmlSAXParseFile:
9521 * @sax: the SAX handler block
9522 * @filename: the filename
9523 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9524 * documents
9525 *
9526 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9527 * compressed document is provided by default if found at compile-time.
9528 * It use the given SAX function block to handle the parsing callback.
9529 * If sax is NULL, fallback to the default DOM tree building routines.
9530 *
9531 * Returns the resulting document tree
9532 */
9533
9534xmlDocPtr
9535xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9536 int recovery) {
9537 xmlDocPtr ret;
9538 xmlParserCtxtPtr ctxt;
9539 char *directory = NULL;
9540
9541 ctxt = xmlCreateFileParserCtxt(filename);
9542 if (ctxt == NULL) {
9543 return(NULL);
9544 }
9545 if (sax != NULL) {
9546 if (ctxt->sax != NULL)
9547 xmlFree(ctxt->sax);
9548 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009549 }
9550
9551 if ((ctxt->directory == NULL) && (directory == NULL))
9552 directory = xmlParserGetDirectory(filename);
9553 if ((ctxt->directory == NULL) && (directory != NULL))
9554 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9555
9556 xmlParseDocument(ctxt);
9557
9558 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9559 else {
9560 ret = NULL;
9561 xmlFreeDoc(ctxt->myDoc);
9562 ctxt->myDoc = NULL;
9563 }
9564 if (sax != NULL)
9565 ctxt->sax = NULL;
9566 xmlFreeParserCtxt(ctxt);
9567
9568 return(ret);
9569}
9570
9571/**
9572 * xmlRecoverDoc:
9573 * @cur: a pointer to an array of xmlChar
9574 *
9575 * parse an XML in-memory document and build a tree.
9576 * In the case the document is not Well Formed, a tree is built anyway
9577 *
9578 * Returns the resulting document tree
9579 */
9580
9581xmlDocPtr
9582xmlRecoverDoc(xmlChar *cur) {
9583 return(xmlSAXParseDoc(NULL, cur, 1));
9584}
9585
9586/**
9587 * xmlParseFile:
9588 * @filename: the filename
9589 *
9590 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9591 * compressed document is provided by default if found at compile-time.
9592 *
9593 * Returns the resulting document tree
9594 */
9595
9596xmlDocPtr
9597xmlParseFile(const char *filename) {
9598 return(xmlSAXParseFile(NULL, filename, 0));
9599}
9600
9601/**
9602 * xmlRecoverFile:
9603 * @filename: the filename
9604 *
9605 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9606 * compressed document is provided by default if found at compile-time.
9607 * In the case the document is not Well Formed, a tree is built anyway
9608 *
9609 * Returns the resulting document tree
9610 */
9611
9612xmlDocPtr
9613xmlRecoverFile(const char *filename) {
9614 return(xmlSAXParseFile(NULL, filename, 1));
9615}
9616
9617
9618/**
9619 * xmlSetupParserForBuffer:
9620 * @ctxt: an XML parser context
9621 * @buffer: a xmlChar * buffer
9622 * @filename: a file name
9623 *
9624 * Setup the parser context to parse a new buffer; Clears any prior
9625 * contents from the parser context. The buffer parameter must not be
9626 * NULL, but the filename parameter can be
9627 */
9628void
9629xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9630 const char* filename)
9631{
9632 xmlParserInputPtr input;
9633
9634 input = xmlNewInputStream(ctxt);
9635 if (input == NULL) {
9636 perror("malloc");
9637 xmlFree(ctxt);
9638 return;
9639 }
9640
9641 xmlClearParserCtxt(ctxt);
9642 if (filename != NULL)
9643 input->filename = xmlMemStrdup(filename);
9644 input->base = buffer;
9645 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009646 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009647 inputPush(ctxt, input);
9648}
9649
9650/**
9651 * xmlSAXUserParseFile:
9652 * @sax: a SAX handler
9653 * @user_data: The user data returned on SAX callbacks
9654 * @filename: a file name
9655 *
9656 * parse an XML file and call the given SAX handler routines.
9657 * Automatic support for ZLIB/Compress compressed document is provided
9658 *
9659 * Returns 0 in case of success or a error number otherwise
9660 */
9661int
9662xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9663 const char *filename) {
9664 int ret = 0;
9665 xmlParserCtxtPtr ctxt;
9666
9667 ctxt = xmlCreateFileParserCtxt(filename);
9668 if (ctxt == NULL) return -1;
9669 if (ctxt->sax != &xmlDefaultSAXHandler)
9670 xmlFree(ctxt->sax);
9671 ctxt->sax = sax;
9672 if (user_data != NULL)
9673 ctxt->userData = user_data;
9674
9675 xmlParseDocument(ctxt);
9676
9677 if (ctxt->wellFormed)
9678 ret = 0;
9679 else {
9680 if (ctxt->errNo != 0)
9681 ret = ctxt->errNo;
9682 else
9683 ret = -1;
9684 }
9685 if (sax != NULL)
9686 ctxt->sax = NULL;
9687 xmlFreeParserCtxt(ctxt);
9688
9689 return ret;
9690}
9691
9692/************************************************************************
9693 * *
9694 * Front ends when parsing from memory *
9695 * *
9696 ************************************************************************/
9697
9698/**
9699 * xmlCreateMemoryParserCtxt:
9700 * @buffer: a pointer to a char array
9701 * @size: the size of the array
9702 *
9703 * Create a parser context for an XML in-memory document.
9704 *
9705 * Returns the new parser context or NULL
9706 */
9707xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009708xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009709 xmlParserCtxtPtr ctxt;
9710 xmlParserInputPtr input;
9711 xmlParserInputBufferPtr buf;
9712
9713 if (buffer == NULL)
9714 return(NULL);
9715 if (size <= 0)
9716 return(NULL);
9717
9718 ctxt = xmlNewParserCtxt();
9719 if (ctxt == NULL)
9720 return(NULL);
9721
9722 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9723 if (buf == NULL) return(NULL);
9724
9725 input = xmlNewInputStream(ctxt);
9726 if (input == NULL) {
9727 xmlFreeParserCtxt(ctxt);
9728 return(NULL);
9729 }
9730
9731 input->filename = NULL;
9732 input->buf = buf;
9733 input->base = input->buf->buffer->content;
9734 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009735 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009736
9737 inputPush(ctxt, input);
9738 return(ctxt);
9739}
9740
9741/**
9742 * xmlSAXParseMemory:
9743 * @sax: the SAX handler block
9744 * @buffer: an pointer to a char array
9745 * @size: the size of the array
9746 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9747 * documents
9748 *
9749 * parse an XML in-memory block and use the given SAX function block
9750 * to handle the parsing callback. If sax is NULL, fallback to the default
9751 * DOM tree building routines.
9752 *
9753 * Returns the resulting document tree
9754 */
9755xmlDocPtr
9756xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9757 xmlDocPtr ret;
9758 xmlParserCtxtPtr ctxt;
9759
9760 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9761 if (ctxt == NULL) return(NULL);
9762 if (sax != NULL) {
9763 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009764 }
9765
9766 xmlParseDocument(ctxt);
9767
9768 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9769 else {
9770 ret = NULL;
9771 xmlFreeDoc(ctxt->myDoc);
9772 ctxt->myDoc = NULL;
9773 }
9774 if (sax != NULL)
9775 ctxt->sax = NULL;
9776 xmlFreeParserCtxt(ctxt);
9777
9778 return(ret);
9779}
9780
9781/**
9782 * xmlParseMemory:
9783 * @buffer: an pointer to a char array
9784 * @size: the size of the array
9785 *
9786 * parse an XML in-memory block and build a tree.
9787 *
9788 * Returns the resulting document tree
9789 */
9790
9791xmlDocPtr xmlParseMemory(char *buffer, int size) {
9792 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9793}
9794
9795/**
9796 * xmlRecoverMemory:
9797 * @buffer: an pointer to a char array
9798 * @size: the size of the array
9799 *
9800 * parse an XML in-memory block and build a tree.
9801 * In the case the document is not Well Formed, a tree is built anyway
9802 *
9803 * Returns the resulting document tree
9804 */
9805
9806xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9807 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9808}
9809
9810/**
9811 * xmlSAXUserParseMemory:
9812 * @sax: a SAX handler
9813 * @user_data: The user data returned on SAX callbacks
9814 * @buffer: an in-memory XML document input
9815 * @size: the length of the XML document in bytes
9816 *
9817 * A better SAX parsing routine.
9818 * parse an XML in-memory buffer and call the given SAX handler routines.
9819 *
9820 * Returns 0 in case of success or a error number otherwise
9821 */
9822int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009823 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009824 int ret = 0;
9825 xmlParserCtxtPtr ctxt;
9826 xmlSAXHandlerPtr oldsax = NULL;
9827
9828 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9829 if (ctxt == NULL) return -1;
9830 if (sax != NULL) {
9831 oldsax = ctxt->sax;
9832 ctxt->sax = sax;
9833 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009834 if (user_data != NULL)
9835 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009836
9837 xmlParseDocument(ctxt);
9838
9839 if (ctxt->wellFormed)
9840 ret = 0;
9841 else {
9842 if (ctxt->errNo != 0)
9843 ret = ctxt->errNo;
9844 else
9845 ret = -1;
9846 }
9847 if (sax != NULL) {
9848 ctxt->sax = oldsax;
9849 }
9850 xmlFreeParserCtxt(ctxt);
9851
9852 return ret;
9853}
9854
9855/**
9856 * xmlCreateDocParserCtxt:
9857 * @cur: a pointer to an array of xmlChar
9858 *
9859 * Creates a parser context for an XML in-memory document.
9860 *
9861 * Returns the new parser context or NULL
9862 */
9863xmlParserCtxtPtr
9864xmlCreateDocParserCtxt(xmlChar *cur) {
9865 int len;
9866
9867 if (cur == NULL)
9868 return(NULL);
9869 len = xmlStrlen(cur);
9870 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9871}
9872
9873/**
9874 * xmlSAXParseDoc:
9875 * @sax: the SAX handler block
9876 * @cur: a pointer to an array of xmlChar
9877 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9878 * documents
9879 *
9880 * parse an XML in-memory document and build a tree.
9881 * It use the given SAX function block to handle the parsing callback.
9882 * If sax is NULL, fallback to the default DOM tree building routines.
9883 *
9884 * Returns the resulting document tree
9885 */
9886
9887xmlDocPtr
9888xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9889 xmlDocPtr ret;
9890 xmlParserCtxtPtr ctxt;
9891
9892 if (cur == NULL) return(NULL);
9893
9894
9895 ctxt = xmlCreateDocParserCtxt(cur);
9896 if (ctxt == NULL) return(NULL);
9897 if (sax != NULL) {
9898 ctxt->sax = sax;
9899 ctxt->userData = NULL;
9900 }
9901
9902 xmlParseDocument(ctxt);
9903 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9904 else {
9905 ret = NULL;
9906 xmlFreeDoc(ctxt->myDoc);
9907 ctxt->myDoc = NULL;
9908 }
9909 if (sax != NULL)
9910 ctxt->sax = NULL;
9911 xmlFreeParserCtxt(ctxt);
9912
9913 return(ret);
9914}
9915
9916/**
9917 * xmlParseDoc:
9918 * @cur: a pointer to an array of xmlChar
9919 *
9920 * parse an XML in-memory document and build a tree.
9921 *
9922 * Returns the resulting document tree
9923 */
9924
9925xmlDocPtr
9926xmlParseDoc(xmlChar *cur) {
9927 return(xmlSAXParseDoc(NULL, cur, 0));
9928}
9929
9930
9931/************************************************************************
9932 * *
9933 * Miscellaneous *
9934 * *
9935 ************************************************************************/
9936
9937#ifdef LIBXML_XPATH_ENABLED
9938#include <libxml/xpath.h>
9939#endif
9940
9941static int xmlParserInitialized = 0;
9942
9943/**
9944 * xmlInitParser:
9945 *
9946 * Initialization function for the XML parser.
9947 * This is not reentrant. Call once before processing in case of
9948 * use in multithreaded programs.
9949 */
9950
9951void
9952xmlInitParser(void) {
9953 if (xmlParserInitialized) return;
9954
9955 xmlInitCharEncodingHandlers();
9956 xmlInitializePredefinedEntities();
9957 xmlDefaultSAXHandlerInit();
9958 xmlRegisterDefaultInputCallbacks();
9959 xmlRegisterDefaultOutputCallbacks();
9960#ifdef LIBXML_HTML_ENABLED
9961 htmlInitAutoClose();
9962 htmlDefaultSAXHandlerInit();
9963#endif
9964#ifdef LIBXML_XPATH_ENABLED
9965 xmlXPathInit();
9966#endif
9967 xmlParserInitialized = 1;
9968}
9969
9970/**
9971 * xmlCleanupParser:
9972 *
9973 * Cleanup function for the XML parser. It tries to reclaim all
9974 * parsing related global memory allocated for the parser processing.
9975 * It doesn't deallocate any document related memory. Calling this
9976 * function should not prevent reusing the parser.
9977 */
9978
9979void
9980xmlCleanupParser(void) {
9981 xmlParserInitialized = 0;
9982 xmlCleanupCharEncodingHandlers();
9983 xmlCleanupPredefinedEntities();
9984}
9985
9986/**
9987 * xmlPedanticParserDefault:
9988 * @val: int 0 or 1
9989 *
9990 * Set and return the previous value for enabling pedantic warnings.
9991 *
9992 * Returns the last value for 0 for no substitution, 1 for substitution.
9993 */
9994
9995int
9996xmlPedanticParserDefault(int val) {
9997 int old = xmlPedanticParserDefaultValue;
9998
9999 xmlPedanticParserDefaultValue = val;
10000 return(old);
10001}
10002
10003/**
10004 * xmlSubstituteEntitiesDefault:
10005 * @val: int 0 or 1
10006 *
10007 * Set and return the previous value for default entity support.
10008 * Initially the parser always keep entity references instead of substituting
10009 * entity values in the output. This function has to be used to change the
10010 * default parser behaviour
10011 * SAX::subtituteEntities() has to be used for changing that on a file by
10012 * file basis.
10013 *
10014 * Returns the last value for 0 for no substitution, 1 for substitution.
10015 */
10016
10017int
10018xmlSubstituteEntitiesDefault(int val) {
10019 int old = xmlSubstituteEntitiesDefaultValue;
10020
10021 xmlSubstituteEntitiesDefaultValue = val;
10022 return(old);
10023}
10024
10025/**
10026 * xmlKeepBlanksDefault:
10027 * @val: int 0 or 1
10028 *
10029 * Set and return the previous value for default blanks text nodes support.
10030 * The 1.x version of the parser used an heuristic to try to detect
10031 * ignorable white spaces. As a result the SAX callback was generating
10032 * ignorableWhitespace() callbacks instead of characters() one, and when
10033 * using the DOM output text nodes containing those blanks were not generated.
10034 * The 2.x and later version will switch to the XML standard way and
10035 * ignorableWhitespace() are only generated when running the parser in
10036 * validating mode and when the current element doesn't allow CDATA or
10037 * mixed content.
10038 * This function is provided as a way to force the standard behaviour
10039 * on 1.X libs and to switch back to the old mode for compatibility when
10040 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
10041 * by using xmlIsBlankNode() commodity function to detect the "empty"
10042 * nodes generated.
10043 * This value also affect autogeneration of indentation when saving code
10044 * if blanks sections are kept, indentation is not generated.
10045 *
10046 * Returns the last value for 0 for no substitution, 1 for substitution.
10047 */
10048
10049int
10050xmlKeepBlanksDefault(int val) {
10051 int old = xmlKeepBlanksDefaultValue;
10052
10053 xmlKeepBlanksDefaultValue = val;
10054 xmlIndentTreeOutput = !val;
10055 return(old);
10056}
10057