blob: bac835733d15891018245b23e2565fd697b73647 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
83int xmlGetWarningsDefaultValue = 1;
84int xmlParserDebugEntities = 0;
85#ifdef VMS
86int xmlSubstituteEntitiesDefaultVal = 0;
87#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
88int xmlDoValidityCheckingDefaultVal = 0;
89#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
90#else
91int xmlSubstituteEntitiesDefaultValue = 0;
92int xmlDoValidityCheckingDefaultValue = 0;
93#endif
94int xmlLoadExtDtdDefaultValue = 0;
95int xmlPedanticParserDefaultValue = 0;
96int xmlKeepBlanksDefaultValue = 1;
97
98/*
99 * List of XML prefixed PI allowed by W3C specs
100 */
101
102const char *xmlW3CPIs[] = {
103 "xml-stylesheet",
104 NULL
105};
106
107/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
108void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110 const xmlChar **str);
111
Daniel Veillard257d9102001-05-08 10:41:44 +0000112static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000113xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
114 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000115 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000116 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000117
118/************************************************************************
119 * *
120 * Parser stacks related functions and macros *
121 * *
122 ************************************************************************/
123
124xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
125 const xmlChar ** str);
126
127/*
128 * Generic function for accessing stacks in the Parser Context
129 */
130
131#define PUSH_AND_POP(scope, type, name) \
132scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
133 if (ctxt->name##Nr >= ctxt->name##Max) { \
134 ctxt->name##Max *= 2; \
135 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
136 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
137 if (ctxt->name##Tab == NULL) { \
138 xmlGenericError(xmlGenericErrorContext, \
139 "realloc failed !\n"); \
140 return(0); \
141 } \
142 } \
143 ctxt->name##Tab[ctxt->name##Nr] = value; \
144 ctxt->name = value; \
145 return(ctxt->name##Nr++); \
146} \
147scope type name##Pop(xmlParserCtxtPtr ctxt) { \
148 type ret; \
149 if (ctxt->name##Nr <= 0) return(0); \
150 ctxt->name##Nr--; \
151 if (ctxt->name##Nr > 0) \
152 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
153 else \
154 ctxt->name = NULL; \
155 ret = ctxt->name##Tab[ctxt->name##Nr]; \
156 ctxt->name##Tab[ctxt->name##Nr] = 0; \
157 return(ret); \
158} \
159
160/*
161 * Those macros actually generate the functions
162 */
163PUSH_AND_POP(extern, xmlParserInputPtr, input)
164PUSH_AND_POP(extern, xmlNodePtr, node)
165PUSH_AND_POP(extern, xmlChar*, name)
166
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000167static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000168 if (ctxt->spaceNr >= ctxt->spaceMax) {
169 ctxt->spaceMax *= 2;
170 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
171 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
172 if (ctxt->spaceTab == NULL) {
173 xmlGenericError(xmlGenericErrorContext,
174 "realloc failed !\n");
175 return(0);
176 }
177 }
178 ctxt->spaceTab[ctxt->spaceNr] = val;
179 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
180 return(ctxt->spaceNr++);
181}
182
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000183static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000184 int ret;
185 if (ctxt->spaceNr <= 0) return(0);
186 ctxt->spaceNr--;
187 if (ctxt->spaceNr > 0)
188 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
189 else
190 ctxt->space = NULL;
191 ret = ctxt->spaceTab[ctxt->spaceNr];
192 ctxt->spaceTab[ctxt->spaceNr] = -1;
193 return(ret);
194}
195
196/*
197 * Macros for accessing the content. Those should be used only by the parser,
198 * and not exported.
199 *
200 * Dirty macros, i.e. one often need to make assumption on the context to
201 * use them
202 *
203 * CUR_PTR return the current pointer to the xmlChar to be parsed.
204 * To be used with extreme caution since operations consuming
205 * characters may move the input buffer to a different location !
206 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
207 * This should be used internally by the parser
208 * only to compare to ASCII values otherwise it would break when
209 * running with UTF-8 encoding.
210 * RAW same as CUR but in the input buffer, bypass any token
211 * extraction that may have been done
212 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
213 * to compare on ASCII based substring.
214 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
215 * strings within the parser.
216 *
217 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
218 *
219 * NEXT Skip to the next character, this does the proper decoding
220 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
221 * NEXTL(l) Skip l xmlChars in the input buffer
222 * CUR_CHAR(l) returns the current unicode character (int), set l
223 * to the number of xmlChars used for the encoding [0-5].
224 * CUR_SCHAR same but operate on a string instead of the context
225 * COPY_BUF copy the current unicode char to the target buffer, increment
226 * the index
227 * GROW, SHRINK handling of input buffers
228 */
229
230#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
231#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
232#define NXT(val) ctxt->input->cur[(val)]
233#define CUR_PTR ctxt->input->cur
234
235#define SKIP(val) do { \
236 ctxt->nbChars += (val),ctxt->input->cur += (val); \
237 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000238 if ((*ctxt->input->cur == 0) && \
239 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
240 xmlPopInput(ctxt); \
241 } while (0)
242
Daniel Veillard48b2f892001-02-25 16:11:03 +0000243#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000244 xmlParserInputShrink(ctxt->input); \
245 if ((*ctxt->input->cur == 0) && \
246 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
247 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000248 }
Owen Taylor3473f882001-02-23 17:55:21 +0000249
Daniel Veillard48b2f892001-02-25 16:11:03 +0000250#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000251 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
252 if ((*ctxt->input->cur == 0) && \
253 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
254 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000255 }
Owen Taylor3473f882001-02-23 17:55:21 +0000256
257#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
258
259#define NEXT xmlNextChar(ctxt)
260
Daniel Veillard21a0f912001-02-25 19:54:14 +0000261#define NEXT1 { \
262 ctxt->input->cur++; \
263 ctxt->nbChars++; \
264 if (*ctxt->input->cur == 0) \
265 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
266 }
267
Owen Taylor3473f882001-02-23 17:55:21 +0000268#define NEXTL(l) do { \
269 if (*(ctxt->input->cur) == '\n') { \
270 ctxt->input->line++; ctxt->input->col = 1; \
271 } else ctxt->input->col++; \
272 ctxt->token = 0; ctxt->input->cur += l; \
273 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000274 } while (0)
275
276#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
277#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
278
279#define COPY_BUF(l,b,i,v) \
280 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000281 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000282
283/**
284 * xmlSkipBlankChars:
285 * @ctxt: the XML parser context
286 *
287 * skip all blanks character found at that point in the input streams.
288 * It pops up finished entities in the process if allowable at that point.
289 *
290 * Returns the number of space chars skipped
291 */
292
293int
294xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000295 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000296
Daniel Veillard02141ea2001-04-30 11:46:40 +0000297 if (ctxt->token != 0) {
298 if (!IS_BLANK(ctxt->token))
299 return(0);
300 ctxt->token = 0;
301 res++;
302 }
Owen Taylor3473f882001-02-23 17:55:21 +0000303 /*
304 * It's Okay to use CUR/NEXT here since all the blanks are on
305 * the ASCII range.
306 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000307 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
308 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000309 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000310 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000311 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000312 cur = ctxt->input->cur;
313 while (IS_BLANK(*cur)) {
314 if (*cur == '\n') {
315 ctxt->input->line++; ctxt->input->col = 1;
316 }
317 cur++;
318 res++;
319 if (*cur == 0) {
320 ctxt->input->cur = cur;
321 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
322 cur = ctxt->input->cur;
323 }
324 }
325 ctxt->input->cur = cur;
326 } else {
327 int cur;
328 do {
329 cur = CUR;
330 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
331 NEXT;
332 cur = CUR;
333 res++;
334 }
335 while ((cur == 0) && (ctxt->inputNr > 1) &&
336 (ctxt->instate != XML_PARSER_COMMENT)) {
337 xmlPopInput(ctxt);
338 cur = CUR;
339 }
340 /*
341 * Need to handle support of entities branching here
342 */
343 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
344 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
345 }
Owen Taylor3473f882001-02-23 17:55:21 +0000346 return(res);
347}
348
349/************************************************************************
350 * *
351 * Commodity functions to handle entities *
352 * *
353 ************************************************************************/
354
355/**
356 * xmlPopInput:
357 * @ctxt: an XML parser context
358 *
359 * xmlPopInput: the current input pointed by ctxt->input came to an end
360 * pop it and return the next char.
361 *
362 * Returns the current xmlChar in the parser context
363 */
364xmlChar
365xmlPopInput(xmlParserCtxtPtr ctxt) {
366 if (ctxt->inputNr == 1) return(0); /* End of main Input */
367 if (xmlParserDebugEntities)
368 xmlGenericError(xmlGenericErrorContext,
369 "Popping input %d\n", ctxt->inputNr);
370 xmlFreeInputStream(inputPop(ctxt));
371 if ((*ctxt->input->cur == 0) &&
372 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
373 return(xmlPopInput(ctxt));
374 return(CUR);
375}
376
377/**
378 * xmlPushInput:
379 * @ctxt: an XML parser context
380 * @input: an XML parser input fragment (entity, XML fragment ...).
381 *
382 * xmlPushInput: switch to a new input stream which is stacked on top
383 * of the previous one(s).
384 */
385void
386xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
387 if (input == NULL) return;
388
389 if (xmlParserDebugEntities) {
390 if ((ctxt->input != NULL) && (ctxt->input->filename))
391 xmlGenericError(xmlGenericErrorContext,
392 "%s(%d): ", ctxt->input->filename,
393 ctxt->input->line);
394 xmlGenericError(xmlGenericErrorContext,
395 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
396 }
397 inputPush(ctxt, input);
398 GROW;
399}
400
401/**
402 * xmlParseCharRef:
403 * @ctxt: an XML parser context
404 *
405 * parse Reference declarations
406 *
407 * [66] CharRef ::= '&#' [0-9]+ ';' |
408 * '&#x' [0-9a-fA-F]+ ';'
409 *
410 * [ WFC: Legal Character ]
411 * Characters referred to using character references must match the
412 * production for Char.
413 *
414 * Returns the value parsed (as an int), 0 in case of error
415 */
416int
417xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000418 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000419 int count = 0;
420
421 if (ctxt->token != 0) {
422 val = ctxt->token;
423 ctxt->token = 0;
424 return(val);
425 }
426 /*
427 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
428 */
429 if ((RAW == '&') && (NXT(1) == '#') &&
430 (NXT(2) == 'x')) {
431 SKIP(3);
432 GROW;
433 while (RAW != ';') { /* loop blocked by count */
434 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
435 val = val * 16 + (CUR - '0');
436 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
437 val = val * 16 + (CUR - 'a') + 10;
438 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
439 val = val * 16 + (CUR - 'A') + 10;
440 else {
441 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
443 ctxt->sax->error(ctxt->userData,
444 "xmlParseCharRef: invalid hexadecimal value\n");
445 ctxt->wellFormed = 0;
446 ctxt->disableSAX = 1;
447 val = 0;
448 break;
449 }
450 NEXT;
451 count++;
452 }
453 if (RAW == ';') {
454 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
455 ctxt->nbChars ++;
456 ctxt->input->cur++;
457 }
458 } else if ((RAW == '&') && (NXT(1) == '#')) {
459 SKIP(2);
460 GROW;
461 while (RAW != ';') { /* loop blocked by count */
462 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
463 val = val * 10 + (CUR - '0');
464 else {
465 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
467 ctxt->sax->error(ctxt->userData,
468 "xmlParseCharRef: invalid decimal value\n");
469 ctxt->wellFormed = 0;
470 ctxt->disableSAX = 1;
471 val = 0;
472 break;
473 }
474 NEXT;
475 count++;
476 }
477 if (RAW == ';') {
478 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
479 ctxt->nbChars ++;
480 ctxt->input->cur++;
481 }
482 } else {
483 ctxt->errNo = XML_ERR_INVALID_CHARREF;
484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
485 ctxt->sax->error(ctxt->userData,
486 "xmlParseCharRef: invalid value\n");
487 ctxt->wellFormed = 0;
488 ctxt->disableSAX = 1;
489 }
490
491 /*
492 * [ WFC: Legal Character ]
493 * Characters referred to using character references must match the
494 * production for Char.
495 */
496 if (IS_CHAR(val)) {
497 return(val);
498 } else {
499 ctxt->errNo = XML_ERR_INVALID_CHAR;
500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
501 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
502 val);
503 ctxt->wellFormed = 0;
504 ctxt->disableSAX = 1;
505 }
506 return(0);
507}
508
509/**
510 * xmlParseStringCharRef:
511 * @ctxt: an XML parser context
512 * @str: a pointer to an index in the string
513 *
514 * parse Reference declarations, variant parsing from a string rather
515 * than an an input flow.
516 *
517 * [66] CharRef ::= '&#' [0-9]+ ';' |
518 * '&#x' [0-9a-fA-F]+ ';'
519 *
520 * [ WFC: Legal Character ]
521 * Characters referred to using character references must match the
522 * production for Char.
523 *
524 * Returns the value parsed (as an int), 0 in case of error, str will be
525 * updated to the current value of the index
526 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000527static int
Owen Taylor3473f882001-02-23 17:55:21 +0000528xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
529 const xmlChar *ptr;
530 xmlChar cur;
531 int val = 0;
532
533 if ((str == NULL) || (*str == NULL)) return(0);
534 ptr = *str;
535 cur = *ptr;
536 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
537 ptr += 3;
538 cur = *ptr;
539 while (cur != ';') { /* Non input consuming loop */
540 if ((cur >= '0') && (cur <= '9'))
541 val = val * 16 + (cur - '0');
542 else if ((cur >= 'a') && (cur <= 'f'))
543 val = val * 16 + (cur - 'a') + 10;
544 else if ((cur >= 'A') && (cur <= 'F'))
545 val = val * 16 + (cur - 'A') + 10;
546 else {
547 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
549 ctxt->sax->error(ctxt->userData,
550 "xmlParseStringCharRef: invalid hexadecimal value\n");
551 ctxt->wellFormed = 0;
552 ctxt->disableSAX = 1;
553 val = 0;
554 break;
555 }
556 ptr++;
557 cur = *ptr;
558 }
559 if (cur == ';')
560 ptr++;
561 } else if ((cur == '&') && (ptr[1] == '#')){
562 ptr += 2;
563 cur = *ptr;
564 while (cur != ';') { /* Non input consuming loops */
565 if ((cur >= '0') && (cur <= '9'))
566 val = val * 10 + (cur - '0');
567 else {
568 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
570 ctxt->sax->error(ctxt->userData,
571 "xmlParseStringCharRef: invalid decimal value\n");
572 ctxt->wellFormed = 0;
573 ctxt->disableSAX = 1;
574 val = 0;
575 break;
576 }
577 ptr++;
578 cur = *ptr;
579 }
580 if (cur == ';')
581 ptr++;
582 } else {
583 ctxt->errNo = XML_ERR_INVALID_CHARREF;
584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
585 ctxt->sax->error(ctxt->userData,
586 "xmlParseCharRef: invalid value\n");
587 ctxt->wellFormed = 0;
588 ctxt->disableSAX = 1;
589 return(0);
590 }
591 *str = ptr;
592
593 /*
594 * [ WFC: Legal Character ]
595 * Characters referred to using character references must match the
596 * production for Char.
597 */
598 if (IS_CHAR(val)) {
599 return(val);
600 } else {
601 ctxt->errNo = XML_ERR_INVALID_CHAR;
602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
603 ctxt->sax->error(ctxt->userData,
604 "CharRef: invalid xmlChar value %d\n", val);
605 ctxt->wellFormed = 0;
606 ctxt->disableSAX = 1;
607 }
608 return(0);
609}
610
611/**
612 * xmlParserHandlePEReference:
613 * @ctxt: the parser context
614 *
615 * [69] PEReference ::= '%' Name ';'
616 *
617 * [ WFC: No Recursion ]
618 * A parsed entity must not contain a recursive
619 * reference to itself, either directly or indirectly.
620 *
621 * [ WFC: Entity Declared ]
622 * In a document without any DTD, a document with only an internal DTD
623 * subset which contains no parameter entity references, or a document
624 * with "standalone='yes'", ... ... The declaration of a parameter
625 * entity must precede any reference to it...
626 *
627 * [ VC: Entity Declared ]
628 * In a document with an external subset or external parameter entities
629 * with "standalone='no'", ... ... The declaration of a parameter entity
630 * must precede any reference to it...
631 *
632 * [ WFC: In DTD ]
633 * Parameter-entity references may only appear in the DTD.
634 * NOTE: misleading but this is handled.
635 *
636 * A PEReference may have been detected in the current input stream
637 * the handling is done accordingly to
638 * http://www.w3.org/TR/REC-xml#entproc
639 * i.e.
640 * - Included in literal in entity values
641 * - Included as Paraemeter Entity reference within DTDs
642 */
643void
644xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
645 xmlChar *name;
646 xmlEntityPtr entity = NULL;
647 xmlParserInputPtr input;
648
649 if (ctxt->token != 0) {
650 return;
651 }
652 if (RAW != '%') return;
653 switch(ctxt->instate) {
654 case XML_PARSER_CDATA_SECTION:
655 return;
656 case XML_PARSER_COMMENT:
657 return;
658 case XML_PARSER_START_TAG:
659 return;
660 case XML_PARSER_END_TAG:
661 return;
662 case XML_PARSER_EOF:
663 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
665 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
666 ctxt->wellFormed = 0;
667 ctxt->disableSAX = 1;
668 return;
669 case XML_PARSER_PROLOG:
670 case XML_PARSER_START:
671 case XML_PARSER_MISC:
672 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
674 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
675 ctxt->wellFormed = 0;
676 ctxt->disableSAX = 1;
677 return;
678 case XML_PARSER_ENTITY_DECL:
679 case XML_PARSER_CONTENT:
680 case XML_PARSER_ATTRIBUTE_VALUE:
681 case XML_PARSER_PI:
682 case XML_PARSER_SYSTEM_LITERAL:
683 /* we just ignore it there */
684 return;
685 case XML_PARSER_EPILOG:
686 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
688 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
689 ctxt->wellFormed = 0;
690 ctxt->disableSAX = 1;
691 return;
692 case XML_PARSER_ENTITY_VALUE:
693 /*
694 * NOTE: in the case of entity values, we don't do the
695 * substitution here since we need the literal
696 * entity value to be able to save the internal
697 * subset of the document.
698 * This will be handled by xmlStringDecodeEntities
699 */
700 return;
701 case XML_PARSER_DTD:
702 /*
703 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
704 * In the internal DTD subset, parameter-entity references
705 * can occur only where markup declarations can occur, not
706 * within markup declarations.
707 * In that case this is handled in xmlParseMarkupDecl
708 */
709 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
710 return;
711 break;
712 case XML_PARSER_IGNORE:
713 return;
714 }
715
716 NEXT;
717 name = xmlParseName(ctxt);
718 if (xmlParserDebugEntities)
719 xmlGenericError(xmlGenericErrorContext,
720 "PE Reference: %s\n", name);
721 if (name == NULL) {
722 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
724 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
725 ctxt->wellFormed = 0;
726 ctxt->disableSAX = 1;
727 } else {
728 if (RAW == ';') {
729 NEXT;
730 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
731 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
732 if (entity == NULL) {
733
734 /*
735 * [ WFC: Entity Declared ]
736 * In a document without any DTD, a document with only an
737 * internal DTD subset which contains no parameter entity
738 * references, or a document with "standalone='yes'", ...
739 * ... The declaration of a parameter entity must precede
740 * any reference to it...
741 */
742 if ((ctxt->standalone == 1) ||
743 ((ctxt->hasExternalSubset == 0) &&
744 (ctxt->hasPErefs == 0))) {
745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
746 ctxt->sax->error(ctxt->userData,
747 "PEReference: %%%s; not found\n", name);
748 ctxt->wellFormed = 0;
749 ctxt->disableSAX = 1;
750 } else {
751 /*
752 * [ VC: Entity Declared ]
753 * In a document with an external subset or external
754 * parameter entities with "standalone='no'", ...
755 * ... The declaration of a parameter entity must precede
756 * any reference to it...
757 */
758 if ((!ctxt->disableSAX) &&
759 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
760 ctxt->vctxt.error(ctxt->vctxt.userData,
761 "PEReference: %%%s; not found\n", name);
762 } else if ((!ctxt->disableSAX) &&
763 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
764 ctxt->sax->warning(ctxt->userData,
765 "PEReference: %%%s; not found\n", name);
766 ctxt->valid = 0;
767 }
768 } else {
769 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
770 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000771 xmlChar start[4];
772 xmlCharEncoding enc;
773
Owen Taylor3473f882001-02-23 17:55:21 +0000774 /*
775 * handle the extra spaces added before and after
776 * c.f. http://www.w3.org/TR/REC-xml#as-PE
777 * this is done independantly.
778 */
779 input = xmlNewEntityInputStream(ctxt, entity);
780 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000781
782 /*
783 * Get the 4 first bytes and decode the charset
784 * if enc != XML_CHAR_ENCODING_NONE
785 * plug some encoding conversion routines.
786 */
787 GROW
788 start[0] = RAW;
789 start[1] = NXT(1);
790 start[2] = NXT(2);
791 start[3] = NXT(3);
792 enc = xmlDetectCharEncoding(start, 4);
793 if (enc != XML_CHAR_ENCODING_NONE) {
794 xmlSwitchEncoding(ctxt, enc);
795 }
796
Owen Taylor3473f882001-02-23 17:55:21 +0000797 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
798 (RAW == '<') && (NXT(1) == '?') &&
799 (NXT(2) == 'x') && (NXT(3) == 'm') &&
800 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
801 xmlParseTextDecl(ctxt);
802 }
803 if (ctxt->token == 0)
804 ctxt->token = ' ';
805 } else {
806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
807 ctxt->sax->error(ctxt->userData,
808 "xmlHandlePEReference: %s is not a parameter entity\n",
809 name);
810 ctxt->wellFormed = 0;
811 ctxt->disableSAX = 1;
812 }
813 }
814 } else {
815 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
817 ctxt->sax->error(ctxt->userData,
818 "xmlHandlePEReference: expecting ';'\n");
819 ctxt->wellFormed = 0;
820 ctxt->disableSAX = 1;
821 }
822 xmlFree(name);
823 }
824}
825
826/*
827 * Macro used to grow the current buffer.
828 */
829#define growBuffer(buffer) { \
830 buffer##_size *= 2; \
831 buffer = (xmlChar *) \
832 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
833 if (buffer == NULL) { \
834 perror("realloc failed"); \
835 return(NULL); \
836 } \
837}
838
839/**
840 * xmlStringDecodeEntities:
841 * @ctxt: the parser context
842 * @str: the input string
843 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
844 * @end: an end marker xmlChar, 0 if none
845 * @end2: an end marker xmlChar, 0 if none
846 * @end3: an end marker xmlChar, 0 if none
847 *
848 * Takes a entity string content and process to do the adequate subtitutions.
849 *
850 * [67] Reference ::= EntityRef | CharRef
851 *
852 * [69] PEReference ::= '%' Name ';'
853 *
854 * Returns A newly allocated string with the substitution done. The caller
855 * must deallocate it !
856 */
857xmlChar *
858xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
859 xmlChar end, xmlChar end2, xmlChar end3) {
860 xmlChar *buffer = NULL;
861 int buffer_size = 0;
862
863 xmlChar *current = NULL;
864 xmlEntityPtr ent;
865 int c,l;
866 int nbchars = 0;
867
868 if (str == NULL)
869 return(NULL);
870
871 if (ctxt->depth > 40) {
872 ctxt->errNo = XML_ERR_ENTITY_LOOP;
873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
874 ctxt->sax->error(ctxt->userData,
875 "Detected entity reference loop\n");
876 ctxt->wellFormed = 0;
877 ctxt->disableSAX = 1;
878 return(NULL);
879 }
880
881 /*
882 * allocate a translation buffer.
883 */
884 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
885 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
886 if (buffer == NULL) {
887 perror("xmlDecodeEntities: malloc failed");
888 return(NULL);
889 }
890
891 /*
892 * Ok loop until we reach one of the ending char or a size limit.
893 * we are operating on already parsed values.
894 */
895 c = CUR_SCHAR(str, l);
896 while ((c != 0) && (c != end) && /* non input consuming loop */
897 (c != end2) && (c != end3)) {
898
899 if (c == 0) break;
900 if ((c == '&') && (str[1] == '#')) {
901 int val = xmlParseStringCharRef(ctxt, &str);
902 if (val != 0) {
903 COPY_BUF(0,buffer,nbchars,val);
904 }
905 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
906 if (xmlParserDebugEntities)
907 xmlGenericError(xmlGenericErrorContext,
908 "String decoding Entity Reference: %.30s\n",
909 str);
910 ent = xmlParseStringEntityRef(ctxt, &str);
911 if ((ent != NULL) &&
912 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
913 if (ent->content != NULL) {
914 COPY_BUF(0,buffer,nbchars,ent->content[0]);
915 } else {
916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
917 ctxt->sax->error(ctxt->userData,
918 "internal error entity has no content\n");
919 }
920 } else if ((ent != NULL) && (ent->content != NULL)) {
921 xmlChar *rep;
922
923 ctxt->depth++;
924 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
925 0, 0, 0);
926 ctxt->depth--;
927 if (rep != NULL) {
928 current = rep;
929 while (*current != 0) { /* non input consuming loop */
930 buffer[nbchars++] = *current++;
931 if (nbchars >
932 buffer_size - XML_PARSER_BUFFER_SIZE) {
933 growBuffer(buffer);
934 }
935 }
936 xmlFree(rep);
937 }
938 } else if (ent != NULL) {
939 int i = xmlStrlen(ent->name);
940 const xmlChar *cur = ent->name;
941
942 buffer[nbchars++] = '&';
943 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
944 growBuffer(buffer);
945 }
946 for (;i > 0;i--)
947 buffer[nbchars++] = *cur++;
948 buffer[nbchars++] = ';';
949 }
950 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
951 if (xmlParserDebugEntities)
952 xmlGenericError(xmlGenericErrorContext,
953 "String decoding PE Reference: %.30s\n", str);
954 ent = xmlParseStringPEReference(ctxt, &str);
955 if (ent != NULL) {
956 xmlChar *rep;
957
958 ctxt->depth++;
959 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
960 0, 0, 0);
961 ctxt->depth--;
962 if (rep != NULL) {
963 current = rep;
964 while (*current != 0) { /* non input consuming loop */
965 buffer[nbchars++] = *current++;
966 if (nbchars >
967 buffer_size - XML_PARSER_BUFFER_SIZE) {
968 growBuffer(buffer);
969 }
970 }
971 xmlFree(rep);
972 }
973 }
974 } else {
975 COPY_BUF(l,buffer,nbchars,c);
976 str += l;
977 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
978 growBuffer(buffer);
979 }
980 }
981 c = CUR_SCHAR(str, l);
982 }
983 buffer[nbchars++] = 0;
984 return(buffer);
985}
986
987
988/************************************************************************
989 * *
990 * Commodity functions to handle xmlChars *
991 * *
992 ************************************************************************/
993
994/**
995 * xmlStrndup:
996 * @cur: the input xmlChar *
997 * @len: the len of @cur
998 *
999 * a strndup for array of xmlChar's
1000 *
1001 * Returns a new xmlChar * or NULL
1002 */
1003xmlChar *
1004xmlStrndup(const xmlChar *cur, int len) {
1005 xmlChar *ret;
1006
1007 if ((cur == NULL) || (len < 0)) return(NULL);
1008 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1009 if (ret == NULL) {
1010 xmlGenericError(xmlGenericErrorContext,
1011 "malloc of %ld byte failed\n",
1012 (len + 1) * (long)sizeof(xmlChar));
1013 return(NULL);
1014 }
1015 memcpy(ret, cur, len * sizeof(xmlChar));
1016 ret[len] = 0;
1017 return(ret);
1018}
1019
1020/**
1021 * xmlStrdup:
1022 * @cur: the input xmlChar *
1023 *
1024 * a strdup for array of xmlChar's. Since they are supposed to be
1025 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1026 * a termination mark of '0'.
1027 *
1028 * Returns a new xmlChar * or NULL
1029 */
1030xmlChar *
1031xmlStrdup(const xmlChar *cur) {
1032 const xmlChar *p = cur;
1033
1034 if (cur == NULL) return(NULL);
1035 while (*p != 0) p++; /* non input consuming */
1036 return(xmlStrndup(cur, p - cur));
1037}
1038
1039/**
1040 * xmlCharStrndup:
1041 * @cur: the input char *
1042 * @len: the len of @cur
1043 *
1044 * a strndup for char's to xmlChar's
1045 *
1046 * Returns a new xmlChar * or NULL
1047 */
1048
1049xmlChar *
1050xmlCharStrndup(const char *cur, int len) {
1051 int i;
1052 xmlChar *ret;
1053
1054 if ((cur == NULL) || (len < 0)) return(NULL);
1055 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1056 if (ret == NULL) {
1057 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1058 (len + 1) * (long)sizeof(xmlChar));
1059 return(NULL);
1060 }
1061 for (i = 0;i < len;i++)
1062 ret[i] = (xmlChar) cur[i];
1063 ret[len] = 0;
1064 return(ret);
1065}
1066
1067/**
1068 * xmlCharStrdup:
1069 * @cur: the input char *
1070 * @len: the len of @cur
1071 *
1072 * a strdup for char's to xmlChar's
1073 *
1074 * Returns a new xmlChar * or NULL
1075 */
1076
1077xmlChar *
1078xmlCharStrdup(const char *cur) {
1079 const char *p = cur;
1080
1081 if (cur == NULL) return(NULL);
1082 while (*p != '\0') p++; /* non input consuming */
1083 return(xmlCharStrndup(cur, p - cur));
1084}
1085
1086/**
1087 * xmlStrcmp:
1088 * @str1: the first xmlChar *
1089 * @str2: the second xmlChar *
1090 *
1091 * a strcmp for xmlChar's
1092 *
1093 * Returns the integer result of the comparison
1094 */
1095
1096int
1097xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1098 register int tmp;
1099
1100 if (str1 == str2) return(0);
1101 if (str1 == NULL) return(-1);
1102 if (str2 == NULL) return(1);
1103 do {
1104 tmp = *str1++ - *str2;
1105 if (tmp != 0) return(tmp);
1106 } while (*str2++ != 0);
1107 return 0;
1108}
1109
1110/**
1111 * xmlStrEqual:
1112 * @str1: the first xmlChar *
1113 * @str2: the second xmlChar *
1114 *
1115 * Check if both string are equal of have same content
1116 * Should be a bit more readable and faster than xmlStrEqual()
1117 *
1118 * Returns 1 if they are equal, 0 if they are different
1119 */
1120
1121int
1122xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1123 if (str1 == str2) return(1);
1124 if (str1 == NULL) return(0);
1125 if (str2 == NULL) return(0);
1126 do {
1127 if (*str1++ != *str2) return(0);
1128 } while (*str2++);
1129 return(1);
1130}
1131
1132/**
1133 * xmlStrncmp:
1134 * @str1: the first xmlChar *
1135 * @str2: the second xmlChar *
1136 * @len: the max comparison length
1137 *
1138 * a strncmp for xmlChar's
1139 *
1140 * Returns the integer result of the comparison
1141 */
1142
1143int
1144xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1145 register int tmp;
1146
1147 if (len <= 0) return(0);
1148 if (str1 == str2) return(0);
1149 if (str1 == NULL) return(-1);
1150 if (str2 == NULL) return(1);
1151 do {
1152 tmp = *str1++ - *str2;
1153 if (tmp != 0 || --len == 0) return(tmp);
1154 } while (*str2++ != 0);
1155 return 0;
1156}
1157
1158static xmlChar casemap[256] = {
1159 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1160 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1161 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1162 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1163 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1164 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1165 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1166 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1167 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1168 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1169 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1170 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1171 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1172 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1173 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1174 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1175 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1176 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1177 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1178 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1179 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1180 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1181 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1182 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1183 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1184 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1185 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1186 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1187 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1188 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1189 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1190 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1191};
1192
1193/**
1194 * xmlStrcasecmp:
1195 * @str1: the first xmlChar *
1196 * @str2: the second xmlChar *
1197 *
1198 * a strcasecmp for xmlChar's
1199 *
1200 * Returns the integer result of the comparison
1201 */
1202
1203int
1204xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1205 register int tmp;
1206
1207 if (str1 == str2) return(0);
1208 if (str1 == NULL) return(-1);
1209 if (str2 == NULL) return(1);
1210 do {
1211 tmp = casemap[*str1++] - casemap[*str2];
1212 if (tmp != 0) return(tmp);
1213 } while (*str2++ != 0);
1214 return 0;
1215}
1216
1217/**
1218 * xmlStrncasecmp:
1219 * @str1: the first xmlChar *
1220 * @str2: the second xmlChar *
1221 * @len: the max comparison length
1222 *
1223 * a strncasecmp for xmlChar's
1224 *
1225 * Returns the integer result of the comparison
1226 */
1227
1228int
1229xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1230 register int tmp;
1231
1232 if (len <= 0) return(0);
1233 if (str1 == str2) return(0);
1234 if (str1 == NULL) return(-1);
1235 if (str2 == NULL) return(1);
1236 do {
1237 tmp = casemap[*str1++] - casemap[*str2];
1238 if (tmp != 0 || --len == 0) return(tmp);
1239 } while (*str2++ != 0);
1240 return 0;
1241}
1242
1243/**
1244 * xmlStrchr:
1245 * @str: the xmlChar * array
1246 * @val: the xmlChar to search
1247 *
1248 * a strchr for xmlChar's
1249 *
1250 * Returns the xmlChar * for the first occurence or NULL.
1251 */
1252
1253const xmlChar *
1254xmlStrchr(const xmlChar *str, xmlChar val) {
1255 if (str == NULL) return(NULL);
1256 while (*str != 0) { /* non input consuming */
1257 if (*str == val) return((xmlChar *) str);
1258 str++;
1259 }
1260 return(NULL);
1261}
1262
1263/**
1264 * xmlStrstr:
1265 * @str: the xmlChar * array (haystack)
1266 * @val: the xmlChar to search (needle)
1267 *
1268 * a strstr for xmlChar's
1269 *
1270 * Returns the xmlChar * for the first occurence or NULL.
1271 */
1272
1273const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001274xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001275 int n;
1276
1277 if (str == NULL) return(NULL);
1278 if (val == NULL) return(NULL);
1279 n = xmlStrlen(val);
1280
1281 if (n == 0) return(str);
1282 while (*str != 0) { /* non input consuming */
1283 if (*str == *val) {
1284 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1285 }
1286 str++;
1287 }
1288 return(NULL);
1289}
1290
1291/**
1292 * xmlStrcasestr:
1293 * @str: the xmlChar * array (haystack)
1294 * @val: the xmlChar to search (needle)
1295 *
1296 * a case-ignoring strstr for xmlChar's
1297 *
1298 * Returns the xmlChar * for the first occurence or NULL.
1299 */
1300
1301const xmlChar *
1302xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1303 int n;
1304
1305 if (str == NULL) return(NULL);
1306 if (val == NULL) return(NULL);
1307 n = xmlStrlen(val);
1308
1309 if (n == 0) return(str);
1310 while (*str != 0) { /* non input consuming */
1311 if (casemap[*str] == casemap[*val])
1312 if (!xmlStrncasecmp(str, val, n)) return(str);
1313 str++;
1314 }
1315 return(NULL);
1316}
1317
1318/**
1319 * xmlStrsub:
1320 * @str: the xmlChar * array (haystack)
1321 * @start: the index of the first char (zero based)
1322 * @len: the length of the substring
1323 *
1324 * Extract a substring of a given string
1325 *
1326 * Returns the xmlChar * for the first occurence or NULL.
1327 */
1328
1329xmlChar *
1330xmlStrsub(const xmlChar *str, int start, int len) {
1331 int i;
1332
1333 if (str == NULL) return(NULL);
1334 if (start < 0) return(NULL);
1335 if (len < 0) return(NULL);
1336
1337 for (i = 0;i < start;i++) {
1338 if (*str == 0) return(NULL);
1339 str++;
1340 }
1341 if (*str == 0) return(NULL);
1342 return(xmlStrndup(str, len));
1343}
1344
1345/**
1346 * xmlStrlen:
1347 * @str: the xmlChar * array
1348 *
1349 * length of a xmlChar's string
1350 *
1351 * Returns the number of xmlChar contained in the ARRAY.
1352 */
1353
1354int
1355xmlStrlen(const xmlChar *str) {
1356 int len = 0;
1357
1358 if (str == NULL) return(0);
1359 while (*str != 0) { /* non input consuming */
1360 str++;
1361 len++;
1362 }
1363 return(len);
1364}
1365
1366/**
1367 * xmlStrncat:
1368 * @cur: the original xmlChar * array
1369 * @add: the xmlChar * array added
1370 * @len: the length of @add
1371 *
1372 * a strncat for array of xmlChar's, it will extend cur with the len
1373 * first bytes of @add.
1374 *
1375 * Returns a new xmlChar *, the original @cur is reallocated if needed
1376 * and should not be freed
1377 */
1378
1379xmlChar *
1380xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1381 int size;
1382 xmlChar *ret;
1383
1384 if ((add == NULL) || (len == 0))
1385 return(cur);
1386 if (cur == NULL)
1387 return(xmlStrndup(add, len));
1388
1389 size = xmlStrlen(cur);
1390 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1391 if (ret == NULL) {
1392 xmlGenericError(xmlGenericErrorContext,
1393 "xmlStrncat: realloc of %ld byte failed\n",
1394 (size + len + 1) * (long)sizeof(xmlChar));
1395 return(cur);
1396 }
1397 memcpy(&ret[size], add, len * sizeof(xmlChar));
1398 ret[size + len] = 0;
1399 return(ret);
1400}
1401
1402/**
1403 * xmlStrcat:
1404 * @cur: the original xmlChar * array
1405 * @add: the xmlChar * array added
1406 *
1407 * a strcat for array of xmlChar's. Since they are supposed to be
1408 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1409 * a termination mark of '0'.
1410 *
1411 * Returns a new xmlChar * containing the concatenated string.
1412 */
1413xmlChar *
1414xmlStrcat(xmlChar *cur, const xmlChar *add) {
1415 const xmlChar *p = add;
1416
1417 if (add == NULL) return(cur);
1418 if (cur == NULL)
1419 return(xmlStrdup(add));
1420
1421 while (*p != 0) p++; /* non input consuming */
1422 return(xmlStrncat(cur, add, p - add));
1423}
1424
1425/************************************************************************
1426 * *
1427 * Commodity functions, cleanup needed ? *
1428 * *
1429 ************************************************************************/
1430
1431/**
1432 * areBlanks:
1433 * @ctxt: an XML parser context
1434 * @str: a xmlChar *
1435 * @len: the size of @str
1436 *
1437 * Is this a sequence of blank chars that one can ignore ?
1438 *
1439 * Returns 1 if ignorable 0 otherwise.
1440 */
1441
1442static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1443 int i, ret;
1444 xmlNodePtr lastChild;
1445
Daniel Veillard2f362242001-03-02 17:36:21 +00001446 if (ctxt->keepBlanks)
1447 return(0);
1448
Owen Taylor3473f882001-02-23 17:55:21 +00001449 /*
1450 * Check for xml:space value.
1451 */
1452 if (*(ctxt->space) == 1)
1453 return(0);
1454
1455 /*
1456 * Check that the string is made of blanks
1457 */
1458 for (i = 0;i < len;i++)
1459 if (!(IS_BLANK(str[i]))) return(0);
1460
1461 /*
1462 * Look if the element is mixed content in the Dtd if available
1463 */
1464 if (ctxt->myDoc != NULL) {
1465 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1466 if (ret == 0) return(1);
1467 if (ret == 1) return(0);
1468 }
1469
1470 /*
1471 * Otherwise, heuristic :-\
1472 */
Owen Taylor3473f882001-02-23 17:55:21 +00001473 if (RAW != '<') return(0);
1474 if (ctxt->node == NULL) return(0);
1475 if ((ctxt->node->children == NULL) &&
1476 (RAW == '<') && (NXT(1) == '/')) return(0);
1477
1478 lastChild = xmlGetLastChild(ctxt->node);
1479 if (lastChild == NULL) {
1480 if (ctxt->node->content != NULL) return(0);
1481 } else if (xmlNodeIsText(lastChild))
1482 return(0);
1483 else if ((ctxt->node->children != NULL) &&
1484 (xmlNodeIsText(ctxt->node->children)))
1485 return(0);
1486 return(1);
1487}
1488
1489/*
1490 * Forward definition for recusive behaviour.
1491 */
1492void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1493void xmlParseReference(xmlParserCtxtPtr ctxt);
1494
1495/************************************************************************
1496 * *
1497 * Extra stuff for namespace support *
1498 * Relates to http://www.w3.org/TR/WD-xml-names *
1499 * *
1500 ************************************************************************/
1501
1502/**
1503 * xmlSplitQName:
1504 * @ctxt: an XML parser context
1505 * @name: an XML parser context
1506 * @prefix: a xmlChar **
1507 *
1508 * parse an UTF8 encoded XML qualified name string
1509 *
1510 * [NS 5] QName ::= (Prefix ':')? LocalPart
1511 *
1512 * [NS 6] Prefix ::= NCName
1513 *
1514 * [NS 7] LocalPart ::= NCName
1515 *
1516 * Returns the local part, and prefix is updated
1517 * to get the Prefix if any.
1518 */
1519
1520xmlChar *
1521xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1522 xmlChar buf[XML_MAX_NAMELEN + 5];
1523 xmlChar *buffer = NULL;
1524 int len = 0;
1525 int max = XML_MAX_NAMELEN;
1526 xmlChar *ret = NULL;
1527 const xmlChar *cur = name;
1528 int c;
1529
1530 *prefix = NULL;
1531
1532#ifndef XML_XML_NAMESPACE
1533 /* xml: prefix is not really a namespace */
1534 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1535 (cur[2] == 'l') && (cur[3] == ':'))
1536 return(xmlStrdup(name));
1537#endif
1538
1539 /* nasty but valid */
1540 if (cur[0] == ':')
1541 return(xmlStrdup(name));
1542
1543 c = *cur++;
1544 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1545 buf[len++] = c;
1546 c = *cur++;
1547 }
1548 if (len >= max) {
1549 /*
1550 * Okay someone managed to make a huge name, so he's ready to pay
1551 * for the processing speed.
1552 */
1553 max = len * 2;
1554
1555 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1556 if (buffer == NULL) {
1557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1558 ctxt->sax->error(ctxt->userData,
1559 "xmlSplitQName: out of memory\n");
1560 return(NULL);
1561 }
1562 memcpy(buffer, buf, len);
1563 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1564 if (len + 10 > max) {
1565 max *= 2;
1566 buffer = (xmlChar *) xmlRealloc(buffer,
1567 max * sizeof(xmlChar));
1568 if (buffer == NULL) {
1569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1570 ctxt->sax->error(ctxt->userData,
1571 "xmlSplitQName: out of memory\n");
1572 return(NULL);
1573 }
1574 }
1575 buffer[len++] = c;
1576 c = *cur++;
1577 }
1578 buffer[len] = 0;
1579 }
1580
1581 if (buffer == NULL)
1582 ret = xmlStrndup(buf, len);
1583 else {
1584 ret = buffer;
1585 buffer = NULL;
1586 max = XML_MAX_NAMELEN;
1587 }
1588
1589
1590 if (c == ':') {
1591 c = *cur++;
1592 if (c == 0) return(ret);
1593 *prefix = ret;
1594 len = 0;
1595
1596 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1597 buf[len++] = c;
1598 c = *cur++;
1599 }
1600 if (len >= max) {
1601 /*
1602 * Okay someone managed to make a huge name, so he's ready to pay
1603 * for the processing speed.
1604 */
1605 max = len * 2;
1606
1607 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1608 if (buffer == NULL) {
1609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1610 ctxt->sax->error(ctxt->userData,
1611 "xmlSplitQName: out of memory\n");
1612 return(NULL);
1613 }
1614 memcpy(buffer, buf, len);
1615 while (c != 0) { /* tested bigname2.xml */
1616 if (len + 10 > max) {
1617 max *= 2;
1618 buffer = (xmlChar *) xmlRealloc(buffer,
1619 max * sizeof(xmlChar));
1620 if (buffer == NULL) {
1621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1622 ctxt->sax->error(ctxt->userData,
1623 "xmlSplitQName: out of memory\n");
1624 return(NULL);
1625 }
1626 }
1627 buffer[len++] = c;
1628 c = *cur++;
1629 }
1630 buffer[len] = 0;
1631 }
1632
1633 if (buffer == NULL)
1634 ret = xmlStrndup(buf, len);
1635 else {
1636 ret = buffer;
1637 }
1638 }
1639
1640 return(ret);
1641}
1642
1643/************************************************************************
1644 * *
1645 * The parser itself *
1646 * Relates to http://www.w3.org/TR/REC-xml *
1647 * *
1648 ************************************************************************/
1649
Daniel Veillard76d66f42001-05-16 21:05:17 +00001650static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001651/**
1652 * xmlParseName:
1653 * @ctxt: an XML parser context
1654 *
1655 * parse an XML name.
1656 *
1657 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1658 * CombiningChar | Extender
1659 *
1660 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1661 *
1662 * [6] Names ::= Name (S Name)*
1663 *
1664 * Returns the Name parsed or NULL
1665 */
1666
1667xmlChar *
1668xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001669 const xmlChar *in;
1670 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001671 int count = 0;
1672
1673 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001674
1675 /*
1676 * Accelerator for simple ASCII names
1677 */
1678 in = ctxt->input->cur;
1679 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1680 ((*in >= 0x41) && (*in <= 0x5A)) ||
1681 (*in == '_') || (*in == ':')) {
1682 in++;
1683 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1684 ((*in >= 0x41) && (*in <= 0x5A)) ||
1685 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001686 (*in == '_') || (*in == '-') ||
1687 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001688 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001689 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001690 count = in - ctxt->input->cur;
1691 ret = xmlStrndup(ctxt->input->cur, count);
1692 ctxt->input->cur = in;
1693 return(ret);
1694 }
1695 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001696 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001697}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001698
Daniel Veillard76d66f42001-05-16 21:05:17 +00001699static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001700xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1701 xmlChar buf[XML_MAX_NAMELEN + 5];
1702 int len = 0, l;
1703 int c;
1704 int count = 0;
1705
1706 /*
1707 * Handler for more complex cases
1708 */
1709 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001710 c = CUR_CHAR(l);
1711 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1712 (!IS_LETTER(c) && (c != '_') &&
1713 (c != ':'))) {
1714 return(NULL);
1715 }
1716
1717 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1718 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1719 (c == '.') || (c == '-') ||
1720 (c == '_') || (c == ':') ||
1721 (IS_COMBINING(c)) ||
1722 (IS_EXTENDER(c)))) {
1723 if (count++ > 100) {
1724 count = 0;
1725 GROW;
1726 }
1727 COPY_BUF(l,buf,len,c);
1728 NEXTL(l);
1729 c = CUR_CHAR(l);
1730 if (len >= XML_MAX_NAMELEN) {
1731 /*
1732 * Okay someone managed to make a huge name, so he's ready to pay
1733 * for the processing speed.
1734 */
1735 xmlChar *buffer;
1736 int max = len * 2;
1737
1738 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1739 if (buffer == NULL) {
1740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1741 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001742 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001743 return(NULL);
1744 }
1745 memcpy(buffer, buf, len);
1746 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1747 (c == '.') || (c == '-') ||
1748 (c == '_') || (c == ':') ||
1749 (IS_COMBINING(c)) ||
1750 (IS_EXTENDER(c))) {
1751 if (count++ > 100) {
1752 count = 0;
1753 GROW;
1754 }
1755 if (len + 10 > max) {
1756 max *= 2;
1757 buffer = (xmlChar *) xmlRealloc(buffer,
1758 max * sizeof(xmlChar));
1759 if (buffer == NULL) {
1760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1761 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001762 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001763 return(NULL);
1764 }
1765 }
1766 COPY_BUF(l,buffer,len,c);
1767 NEXTL(l);
1768 c = CUR_CHAR(l);
1769 }
1770 buffer[len] = 0;
1771 return(buffer);
1772 }
1773 }
1774 return(xmlStrndup(buf, len));
1775}
1776
1777/**
1778 * xmlParseStringName:
1779 * @ctxt: an XML parser context
1780 * @str: a pointer to the string pointer (IN/OUT)
1781 *
1782 * parse an XML name.
1783 *
1784 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1785 * CombiningChar | Extender
1786 *
1787 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1788 *
1789 * [6] Names ::= Name (S Name)*
1790 *
1791 * Returns the Name parsed or NULL. The str pointer
1792 * is updated to the current location in the string.
1793 */
1794
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001795static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001796xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1797 xmlChar buf[XML_MAX_NAMELEN + 5];
1798 const xmlChar *cur = *str;
1799 int len = 0, l;
1800 int c;
1801
1802 c = CUR_SCHAR(cur, l);
1803 if (!IS_LETTER(c) && (c != '_') &&
1804 (c != ':')) {
1805 return(NULL);
1806 }
1807
1808 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1809 (c == '.') || (c == '-') ||
1810 (c == '_') || (c == ':') ||
1811 (IS_COMBINING(c)) ||
1812 (IS_EXTENDER(c))) {
1813 COPY_BUF(l,buf,len,c);
1814 cur += l;
1815 c = CUR_SCHAR(cur, l);
1816 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1817 /*
1818 * Okay someone managed to make a huge name, so he's ready to pay
1819 * for the processing speed.
1820 */
1821 xmlChar *buffer;
1822 int max = len * 2;
1823
1824 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1825 if (buffer == NULL) {
1826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1827 ctxt->sax->error(ctxt->userData,
1828 "xmlParseStringName: out of memory\n");
1829 return(NULL);
1830 }
1831 memcpy(buffer, buf, len);
1832 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1833 (c == '.') || (c == '-') ||
1834 (c == '_') || (c == ':') ||
1835 (IS_COMBINING(c)) ||
1836 (IS_EXTENDER(c))) {
1837 if (len + 10 > max) {
1838 max *= 2;
1839 buffer = (xmlChar *) xmlRealloc(buffer,
1840 max * sizeof(xmlChar));
1841 if (buffer == NULL) {
1842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1843 ctxt->sax->error(ctxt->userData,
1844 "xmlParseStringName: out of memory\n");
1845 return(NULL);
1846 }
1847 }
1848 COPY_BUF(l,buffer,len,c);
1849 cur += l;
1850 c = CUR_SCHAR(cur, l);
1851 }
1852 buffer[len] = 0;
1853 *str = cur;
1854 return(buffer);
1855 }
1856 }
1857 *str = cur;
1858 return(xmlStrndup(buf, len));
1859}
1860
1861/**
1862 * xmlParseNmtoken:
1863 * @ctxt: an XML parser context
1864 *
1865 * parse an XML Nmtoken.
1866 *
1867 * [7] Nmtoken ::= (NameChar)+
1868 *
1869 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1870 *
1871 * Returns the Nmtoken parsed or NULL
1872 */
1873
1874xmlChar *
1875xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1876 xmlChar buf[XML_MAX_NAMELEN + 5];
1877 int len = 0, l;
1878 int c;
1879 int count = 0;
1880
1881 GROW;
1882 c = CUR_CHAR(l);
1883
1884 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1885 (c == '.') || (c == '-') ||
1886 (c == '_') || (c == ':') ||
1887 (IS_COMBINING(c)) ||
1888 (IS_EXTENDER(c))) {
1889 if (count++ > 100) {
1890 count = 0;
1891 GROW;
1892 }
1893 COPY_BUF(l,buf,len,c);
1894 NEXTL(l);
1895 c = CUR_CHAR(l);
1896 if (len >= XML_MAX_NAMELEN) {
1897 /*
1898 * Okay someone managed to make a huge token, so he's ready to pay
1899 * for the processing speed.
1900 */
1901 xmlChar *buffer;
1902 int max = len * 2;
1903
1904 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1905 if (buffer == NULL) {
1906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1907 ctxt->sax->error(ctxt->userData,
1908 "xmlParseNmtoken: out of memory\n");
1909 return(NULL);
1910 }
1911 memcpy(buffer, buf, len);
1912 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1913 (c == '.') || (c == '-') ||
1914 (c == '_') || (c == ':') ||
1915 (IS_COMBINING(c)) ||
1916 (IS_EXTENDER(c))) {
1917 if (count++ > 100) {
1918 count = 0;
1919 GROW;
1920 }
1921 if (len + 10 > max) {
1922 max *= 2;
1923 buffer = (xmlChar *) xmlRealloc(buffer,
1924 max * sizeof(xmlChar));
1925 if (buffer == NULL) {
1926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1927 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001928 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001929 return(NULL);
1930 }
1931 }
1932 COPY_BUF(l,buffer,len,c);
1933 NEXTL(l);
1934 c = CUR_CHAR(l);
1935 }
1936 buffer[len] = 0;
1937 return(buffer);
1938 }
1939 }
1940 if (len == 0)
1941 return(NULL);
1942 return(xmlStrndup(buf, len));
1943}
1944
1945/**
1946 * xmlParseEntityValue:
1947 * @ctxt: an XML parser context
1948 * @orig: if non-NULL store a copy of the original entity value
1949 *
1950 * parse a value for ENTITY declarations
1951 *
1952 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1953 * "'" ([^%&'] | PEReference | Reference)* "'"
1954 *
1955 * Returns the EntityValue parsed with reference substitued or NULL
1956 */
1957
1958xmlChar *
1959xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1960 xmlChar *buf = NULL;
1961 int len = 0;
1962 int size = XML_PARSER_BUFFER_SIZE;
1963 int c, l;
1964 xmlChar stop;
1965 xmlChar *ret = NULL;
1966 const xmlChar *cur = NULL;
1967 xmlParserInputPtr input;
1968
1969 if (RAW == '"') stop = '"';
1970 else if (RAW == '\'') stop = '\'';
1971 else {
1972 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1974 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1975 ctxt->wellFormed = 0;
1976 ctxt->disableSAX = 1;
1977 return(NULL);
1978 }
1979 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1980 if (buf == NULL) {
1981 xmlGenericError(xmlGenericErrorContext,
1982 "malloc of %d byte failed\n", size);
1983 return(NULL);
1984 }
1985
1986 /*
1987 * The content of the entity definition is copied in a buffer.
1988 */
1989
1990 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1991 input = ctxt->input;
1992 GROW;
1993 NEXT;
1994 c = CUR_CHAR(l);
1995 /*
1996 * NOTE: 4.4.5 Included in Literal
1997 * When a parameter entity reference appears in a literal entity
1998 * value, ... a single or double quote character in the replacement
1999 * text is always treated as a normal data character and will not
2000 * terminate the literal.
2001 * In practice it means we stop the loop only when back at parsing
2002 * the initial entity and the quote is found
2003 */
2004 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2005 (ctxt->input != input))) {
2006 if (len + 5 >= size) {
2007 size *= 2;
2008 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2009 if (buf == NULL) {
2010 xmlGenericError(xmlGenericErrorContext,
2011 "realloc of %d byte failed\n", size);
2012 return(NULL);
2013 }
2014 }
2015 COPY_BUF(l,buf,len,c);
2016 NEXTL(l);
2017 /*
2018 * Pop-up of finished entities.
2019 */
2020 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2021 xmlPopInput(ctxt);
2022
2023 GROW;
2024 c = CUR_CHAR(l);
2025 if (c == 0) {
2026 GROW;
2027 c = CUR_CHAR(l);
2028 }
2029 }
2030 buf[len] = 0;
2031
2032 /*
2033 * Raise problem w.r.t. '&' and '%' being used in non-entities
2034 * reference constructs. Note Charref will be handled in
2035 * xmlStringDecodeEntities()
2036 */
2037 cur = buf;
2038 while (*cur != 0) { /* non input consuming */
2039 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2040 xmlChar *name;
2041 xmlChar tmp = *cur;
2042
2043 cur++;
2044 name = xmlParseStringName(ctxt, &cur);
2045 if ((name == NULL) || (*cur != ';')) {
2046 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2048 ctxt->sax->error(ctxt->userData,
2049 "EntityValue: '%c' forbidden except for entities references\n",
2050 tmp);
2051 ctxt->wellFormed = 0;
2052 ctxt->disableSAX = 1;
2053 }
2054 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2055 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2057 ctxt->sax->error(ctxt->userData,
2058 "EntityValue: PEReferences forbidden in internal subset\n",
2059 tmp);
2060 ctxt->wellFormed = 0;
2061 ctxt->disableSAX = 1;
2062 }
2063 if (name != NULL)
2064 xmlFree(name);
2065 }
2066 cur++;
2067 }
2068
2069 /*
2070 * Then PEReference entities are substituted.
2071 */
2072 if (c != stop) {
2073 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2075 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2076 ctxt->wellFormed = 0;
2077 ctxt->disableSAX = 1;
2078 xmlFree(buf);
2079 } else {
2080 NEXT;
2081 /*
2082 * NOTE: 4.4.7 Bypassed
2083 * When a general entity reference appears in the EntityValue in
2084 * an entity declaration, it is bypassed and left as is.
2085 * so XML_SUBSTITUTE_REF is not set here.
2086 */
2087 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2088 0, 0, 0);
2089 if (orig != NULL)
2090 *orig = buf;
2091 else
2092 xmlFree(buf);
2093 }
2094
2095 return(ret);
2096}
2097
2098/**
2099 * xmlParseAttValue:
2100 * @ctxt: an XML parser context
2101 *
2102 * parse a value for an attribute
2103 * Note: the parser won't do substitution of entities here, this
2104 * will be handled later in xmlStringGetNodeList
2105 *
2106 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2107 * "'" ([^<&'] | Reference)* "'"
2108 *
2109 * 3.3.3 Attribute-Value Normalization:
2110 * Before the value of an attribute is passed to the application or
2111 * checked for validity, the XML processor must normalize it as follows:
2112 * - a character reference is processed by appending the referenced
2113 * character to the attribute value
2114 * - an entity reference is processed by recursively processing the
2115 * replacement text of the entity
2116 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2117 * appending #x20 to the normalized value, except that only a single
2118 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2119 * parsed entity or the literal entity value of an internal parsed entity
2120 * - other characters are processed by appending them to the normalized value
2121 * If the declared value is not CDATA, then the XML processor must further
2122 * process the normalized attribute value by discarding any leading and
2123 * trailing space (#x20) characters, and by replacing sequences of space
2124 * (#x20) characters by a single space (#x20) character.
2125 * All attributes for which no declaration has been read should be treated
2126 * by a non-validating parser as if declared CDATA.
2127 *
2128 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2129 */
2130
2131xmlChar *
2132xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2133 xmlChar limit = 0;
2134 xmlChar *buf = NULL;
2135 int len = 0;
2136 int buf_size = 0;
2137 int c, l;
2138 xmlChar *current = NULL;
2139 xmlEntityPtr ent;
2140
2141
2142 SHRINK;
2143 if (NXT(0) == '"') {
2144 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2145 limit = '"';
2146 NEXT;
2147 } else if (NXT(0) == '\'') {
2148 limit = '\'';
2149 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2150 NEXT;
2151 } else {
2152 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2154 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2155 ctxt->wellFormed = 0;
2156 ctxt->disableSAX = 1;
2157 return(NULL);
2158 }
2159
2160 /*
2161 * allocate a translation buffer.
2162 */
2163 buf_size = XML_PARSER_BUFFER_SIZE;
2164 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2165 if (buf == NULL) {
2166 perror("xmlParseAttValue: malloc failed");
2167 return(NULL);
2168 }
2169
2170 /*
2171 * Ok loop until we reach one of the ending char or a size limit.
2172 */
2173 c = CUR_CHAR(l);
2174 while (((NXT(0) != limit) && /* checked */
2175 (c != '<')) || (ctxt->token != 0)) {
2176 if (c == 0) break;
2177 if (ctxt->token == '&') {
2178 /*
2179 * The reparsing will be done in xmlStringGetNodeList()
2180 * called by the attribute() function in SAX.c
2181 */
2182 static xmlChar buffer[6] = "&#38;";
2183
2184 if (len > buf_size - 10) {
2185 growBuffer(buf);
2186 }
2187 current = &buffer[0];
2188 while (*current != 0) { /* non input consuming */
2189 buf[len++] = *current++;
2190 }
2191 ctxt->token = 0;
2192 } else if (c == '&') {
2193 if (NXT(1) == '#') {
2194 int val = xmlParseCharRef(ctxt);
2195 if (val == '&') {
2196 /*
2197 * The reparsing will be done in xmlStringGetNodeList()
2198 * called by the attribute() function in SAX.c
2199 */
2200 static xmlChar buffer[6] = "&#38;";
2201
2202 if (len > buf_size - 10) {
2203 growBuffer(buf);
2204 }
2205 current = &buffer[0];
2206 while (*current != 0) { /* non input consuming */
2207 buf[len++] = *current++;
2208 }
2209 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002210 if (len > buf_size - 10) {
2211 growBuffer(buf);
2212 }
Owen Taylor3473f882001-02-23 17:55:21 +00002213 len += xmlCopyChar(0, &buf[len], val);
2214 }
2215 } else {
2216 ent = xmlParseEntityRef(ctxt);
2217 if ((ent != NULL) &&
2218 (ctxt->replaceEntities != 0)) {
2219 xmlChar *rep;
2220
2221 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2222 rep = xmlStringDecodeEntities(ctxt, ent->content,
2223 XML_SUBSTITUTE_REF, 0, 0, 0);
2224 if (rep != NULL) {
2225 current = rep;
2226 while (*current != 0) { /* non input consuming */
2227 buf[len++] = *current++;
2228 if (len > buf_size - 10) {
2229 growBuffer(buf);
2230 }
2231 }
2232 xmlFree(rep);
2233 }
2234 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002235 if (len > buf_size - 10) {
2236 growBuffer(buf);
2237 }
Owen Taylor3473f882001-02-23 17:55:21 +00002238 if (ent->content != NULL)
2239 buf[len++] = ent->content[0];
2240 }
2241 } else if (ent != NULL) {
2242 int i = xmlStrlen(ent->name);
2243 const xmlChar *cur = ent->name;
2244
2245 /*
2246 * This may look absurd but is needed to detect
2247 * entities problems
2248 */
2249 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2250 (ent->content != NULL)) {
2251 xmlChar *rep;
2252 rep = xmlStringDecodeEntities(ctxt, ent->content,
2253 XML_SUBSTITUTE_REF, 0, 0, 0);
2254 if (rep != NULL)
2255 xmlFree(rep);
2256 }
2257
2258 /*
2259 * Just output the reference
2260 */
2261 buf[len++] = '&';
2262 if (len > buf_size - i - 10) {
2263 growBuffer(buf);
2264 }
2265 for (;i > 0;i--)
2266 buf[len++] = *cur++;
2267 buf[len++] = ';';
2268 }
2269 }
2270 } else {
2271 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2272 COPY_BUF(l,buf,len,0x20);
2273 if (len > buf_size - 10) {
2274 growBuffer(buf);
2275 }
2276 } else {
2277 COPY_BUF(l,buf,len,c);
2278 if (len > buf_size - 10) {
2279 growBuffer(buf);
2280 }
2281 }
2282 NEXTL(l);
2283 }
2284 GROW;
2285 c = CUR_CHAR(l);
2286 }
2287 buf[len++] = 0;
2288 if (RAW == '<') {
2289 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2290 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2291 ctxt->sax->error(ctxt->userData,
2292 "Unescaped '<' not allowed in attributes values\n");
2293 ctxt->wellFormed = 0;
2294 ctxt->disableSAX = 1;
2295 } else if (RAW != limit) {
2296 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2298 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2299 ctxt->wellFormed = 0;
2300 ctxt->disableSAX = 1;
2301 } else
2302 NEXT;
2303 return(buf);
2304}
2305
2306/**
2307 * xmlParseSystemLiteral:
2308 * @ctxt: an XML parser context
2309 *
2310 * parse an XML Literal
2311 *
2312 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2313 *
2314 * Returns the SystemLiteral parsed or NULL
2315 */
2316
2317xmlChar *
2318xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2319 xmlChar *buf = NULL;
2320 int len = 0;
2321 int size = XML_PARSER_BUFFER_SIZE;
2322 int cur, l;
2323 xmlChar stop;
2324 int state = ctxt->instate;
2325 int count = 0;
2326
2327 SHRINK;
2328 if (RAW == '"') {
2329 NEXT;
2330 stop = '"';
2331 } else if (RAW == '\'') {
2332 NEXT;
2333 stop = '\'';
2334 } else {
2335 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2337 ctxt->sax->error(ctxt->userData,
2338 "SystemLiteral \" or ' expected\n");
2339 ctxt->wellFormed = 0;
2340 ctxt->disableSAX = 1;
2341 return(NULL);
2342 }
2343
2344 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2345 if (buf == NULL) {
2346 xmlGenericError(xmlGenericErrorContext,
2347 "malloc of %d byte failed\n", size);
2348 return(NULL);
2349 }
2350 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2351 cur = CUR_CHAR(l);
2352 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2353 if (len + 5 >= size) {
2354 size *= 2;
2355 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2356 if (buf == NULL) {
2357 xmlGenericError(xmlGenericErrorContext,
2358 "realloc of %d byte failed\n", size);
2359 ctxt->instate = (xmlParserInputState) state;
2360 return(NULL);
2361 }
2362 }
2363 count++;
2364 if (count > 50) {
2365 GROW;
2366 count = 0;
2367 }
2368 COPY_BUF(l,buf,len,cur);
2369 NEXTL(l);
2370 cur = CUR_CHAR(l);
2371 if (cur == 0) {
2372 GROW;
2373 SHRINK;
2374 cur = CUR_CHAR(l);
2375 }
2376 }
2377 buf[len] = 0;
2378 ctxt->instate = (xmlParserInputState) state;
2379 if (!IS_CHAR(cur)) {
2380 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2382 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2383 ctxt->wellFormed = 0;
2384 ctxt->disableSAX = 1;
2385 } else {
2386 NEXT;
2387 }
2388 return(buf);
2389}
2390
2391/**
2392 * xmlParsePubidLiteral:
2393 * @ctxt: an XML parser context
2394 *
2395 * parse an XML public literal
2396 *
2397 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2398 *
2399 * Returns the PubidLiteral parsed or NULL.
2400 */
2401
2402xmlChar *
2403xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2404 xmlChar *buf = NULL;
2405 int len = 0;
2406 int size = XML_PARSER_BUFFER_SIZE;
2407 xmlChar cur;
2408 xmlChar stop;
2409 int count = 0;
2410
2411 SHRINK;
2412 if (RAW == '"') {
2413 NEXT;
2414 stop = '"';
2415 } else if (RAW == '\'') {
2416 NEXT;
2417 stop = '\'';
2418 } else {
2419 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2420 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2421 ctxt->sax->error(ctxt->userData,
2422 "SystemLiteral \" or ' expected\n");
2423 ctxt->wellFormed = 0;
2424 ctxt->disableSAX = 1;
2425 return(NULL);
2426 }
2427 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2428 if (buf == NULL) {
2429 xmlGenericError(xmlGenericErrorContext,
2430 "malloc of %d byte failed\n", size);
2431 return(NULL);
2432 }
2433 cur = CUR;
2434 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2435 if (len + 1 >= size) {
2436 size *= 2;
2437 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2438 if (buf == NULL) {
2439 xmlGenericError(xmlGenericErrorContext,
2440 "realloc of %d byte failed\n", size);
2441 return(NULL);
2442 }
2443 }
2444 buf[len++] = cur;
2445 count++;
2446 if (count > 50) {
2447 GROW;
2448 count = 0;
2449 }
2450 NEXT;
2451 cur = CUR;
2452 if (cur == 0) {
2453 GROW;
2454 SHRINK;
2455 cur = CUR;
2456 }
2457 }
2458 buf[len] = 0;
2459 if (cur != stop) {
2460 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2462 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2463 ctxt->wellFormed = 0;
2464 ctxt->disableSAX = 1;
2465 } else {
2466 NEXT;
2467 }
2468 return(buf);
2469}
2470
Daniel Veillard48b2f892001-02-25 16:11:03 +00002471void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002472/**
2473 * xmlParseCharData:
2474 * @ctxt: an XML parser context
2475 * @cdata: int indicating whether we are within a CDATA section
2476 *
2477 * parse a CharData section.
2478 * if we are within a CDATA section ']]>' marks an end of section.
2479 *
2480 * The right angle bracket (>) may be represented using the string "&gt;",
2481 * and must, for compatibility, be escaped using "&gt;" or a character
2482 * reference when it appears in the string "]]>" in content, when that
2483 * string is not marking the end of a CDATA section.
2484 *
2485 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2486 */
2487
2488void
2489xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002490 const xmlChar *in;
2491 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002492 int line = ctxt->input->line;
2493 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002494
2495 SHRINK;
2496 GROW;
2497 /*
2498 * Accelerated common case where input don't need to be
2499 * modified before passing it to the handler.
2500 */
2501 if ((ctxt->token == 0) && (!cdata)) {
2502 in = ctxt->input->cur;
2503 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002504get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002505 while (((*in >= 0x20) && (*in != '<') &&
2506 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2507 in++;
2508 if (*in == 0xA) {
2509 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002510 in++;
2511 while (*in == 0xA) {
2512 ctxt->input->line++;
2513 in++;
2514 }
2515 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002516 }
2517 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002518 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002519 if (IS_BLANK(*ctxt->input->cur)) {
2520 const xmlChar *tmp = ctxt->input->cur;
2521 ctxt->input->cur = in;
2522 if (areBlanks(ctxt, tmp, nbchar)) {
2523 if (ctxt->sax->ignorableWhitespace != NULL)
2524 ctxt->sax->ignorableWhitespace(ctxt->userData,
2525 tmp, nbchar);
2526 } else {
2527 if (ctxt->sax->characters != NULL)
2528 ctxt->sax->characters(ctxt->userData,
2529 tmp, nbchar);
2530 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002531 line = ctxt->input->line;
2532 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002533 } else {
2534 if (ctxt->sax->characters != NULL)
2535 ctxt->sax->characters(ctxt->userData,
2536 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002537 line = ctxt->input->line;
2538 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002539 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002540 }
2541 ctxt->input->cur = in;
2542 if (*in == 0xD) {
2543 in++;
2544 if (*in == 0xA) {
2545 ctxt->input->cur = in;
2546 in++;
2547 ctxt->input->line++;
2548 continue; /* while */
2549 }
2550 in--;
2551 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002552 if (*in == '<') {
2553 return;
2554 }
2555 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002556 return;
2557 }
2558 SHRINK;
2559 GROW;
2560 in = ctxt->input->cur;
2561 } while ((*in >= 0x20) && (*in <= 0x7F));
2562 nbchar = 0;
2563 }
Daniel Veillard50582112001-03-26 22:52:16 +00002564 ctxt->input->line = line;
2565 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002566 xmlParseCharDataComplex(ctxt, cdata);
2567}
2568
2569void
2570xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002571 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2572 int nbchar = 0;
2573 int cur, l;
2574 int count = 0;
2575
2576 SHRINK;
2577 GROW;
2578 cur = CUR_CHAR(l);
2579 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2580 ((cur != '&') || (ctxt->token == '&')) &&
2581 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2582 if ((cur == ']') && (NXT(1) == ']') &&
2583 (NXT(2) == '>')) {
2584 if (cdata) break;
2585 else {
2586 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2588 ctxt->sax->error(ctxt->userData,
2589 "Sequence ']]>' not allowed in content\n");
2590 /* Should this be relaxed ??? I see a "must here */
2591 ctxt->wellFormed = 0;
2592 ctxt->disableSAX = 1;
2593 }
2594 }
2595 COPY_BUF(l,buf,nbchar,cur);
2596 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2597 /*
2598 * Ok the segment is to be consumed as chars.
2599 */
2600 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2601 if (areBlanks(ctxt, buf, nbchar)) {
2602 if (ctxt->sax->ignorableWhitespace != NULL)
2603 ctxt->sax->ignorableWhitespace(ctxt->userData,
2604 buf, nbchar);
2605 } else {
2606 if (ctxt->sax->characters != NULL)
2607 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2608 }
2609 }
2610 nbchar = 0;
2611 }
2612 count++;
2613 if (count > 50) {
2614 GROW;
2615 count = 0;
2616 }
2617 NEXTL(l);
2618 cur = CUR_CHAR(l);
2619 }
2620 if (nbchar != 0) {
2621 /*
2622 * Ok the segment is to be consumed as chars.
2623 */
2624 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2625 if (areBlanks(ctxt, buf, nbchar)) {
2626 if (ctxt->sax->ignorableWhitespace != NULL)
2627 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2628 } else {
2629 if (ctxt->sax->characters != NULL)
2630 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2631 }
2632 }
2633 }
2634}
2635
2636/**
2637 * xmlParseExternalID:
2638 * @ctxt: an XML parser context
2639 * @publicID: a xmlChar** receiving PubidLiteral
2640 * @strict: indicate whether we should restrict parsing to only
2641 * production [75], see NOTE below
2642 *
2643 * Parse an External ID or a Public ID
2644 *
2645 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2646 * 'PUBLIC' S PubidLiteral S SystemLiteral
2647 *
2648 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2649 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2650 *
2651 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2652 *
2653 * Returns the function returns SystemLiteral and in the second
2654 * case publicID receives PubidLiteral, is strict is off
2655 * it is possible to return NULL and have publicID set.
2656 */
2657
2658xmlChar *
2659xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2660 xmlChar *URI = NULL;
2661
2662 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002663
2664 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002665 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2666 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2667 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2668 SKIP(6);
2669 if (!IS_BLANK(CUR)) {
2670 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2672 ctxt->sax->error(ctxt->userData,
2673 "Space required after 'SYSTEM'\n");
2674 ctxt->wellFormed = 0;
2675 ctxt->disableSAX = 1;
2676 }
2677 SKIP_BLANKS;
2678 URI = xmlParseSystemLiteral(ctxt);
2679 if (URI == NULL) {
2680 ctxt->errNo = XML_ERR_URI_REQUIRED;
2681 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2682 ctxt->sax->error(ctxt->userData,
2683 "xmlParseExternalID: SYSTEM, no URI\n");
2684 ctxt->wellFormed = 0;
2685 ctxt->disableSAX = 1;
2686 }
2687 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2688 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2689 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2690 SKIP(6);
2691 if (!IS_BLANK(CUR)) {
2692 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2694 ctxt->sax->error(ctxt->userData,
2695 "Space required after 'PUBLIC'\n");
2696 ctxt->wellFormed = 0;
2697 ctxt->disableSAX = 1;
2698 }
2699 SKIP_BLANKS;
2700 *publicID = xmlParsePubidLiteral(ctxt);
2701 if (*publicID == NULL) {
2702 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2704 ctxt->sax->error(ctxt->userData,
2705 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2706 ctxt->wellFormed = 0;
2707 ctxt->disableSAX = 1;
2708 }
2709 if (strict) {
2710 /*
2711 * We don't handle [83] so "S SystemLiteral" is required.
2712 */
2713 if (!IS_BLANK(CUR)) {
2714 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2716 ctxt->sax->error(ctxt->userData,
2717 "Space required after the Public Identifier\n");
2718 ctxt->wellFormed = 0;
2719 ctxt->disableSAX = 1;
2720 }
2721 } else {
2722 /*
2723 * We handle [83] so we return immediately, if
2724 * "S SystemLiteral" is not detected. From a purely parsing
2725 * point of view that's a nice mess.
2726 */
2727 const xmlChar *ptr;
2728 GROW;
2729
2730 ptr = CUR_PTR;
2731 if (!IS_BLANK(*ptr)) return(NULL);
2732
2733 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2734 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2735 }
2736 SKIP_BLANKS;
2737 URI = xmlParseSystemLiteral(ctxt);
2738 if (URI == NULL) {
2739 ctxt->errNo = XML_ERR_URI_REQUIRED;
2740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2741 ctxt->sax->error(ctxt->userData,
2742 "xmlParseExternalID: PUBLIC, no URI\n");
2743 ctxt->wellFormed = 0;
2744 ctxt->disableSAX = 1;
2745 }
2746 }
2747 return(URI);
2748}
2749
2750/**
2751 * xmlParseComment:
2752 * @ctxt: an XML parser context
2753 *
2754 * Skip an XML (SGML) comment <!-- .... -->
2755 * The spec says that "For compatibility, the string "--" (double-hyphen)
2756 * must not occur within comments. "
2757 *
2758 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2759 */
2760void
2761xmlParseComment(xmlParserCtxtPtr ctxt) {
2762 xmlChar *buf = NULL;
2763 int len;
2764 int size = XML_PARSER_BUFFER_SIZE;
2765 int q, ql;
2766 int r, rl;
2767 int cur, l;
2768 xmlParserInputState state;
2769 xmlParserInputPtr input = ctxt->input;
2770 int count = 0;
2771
2772 /*
2773 * Check that there is a comment right here.
2774 */
2775 if ((RAW != '<') || (NXT(1) != '!') ||
2776 (NXT(2) != '-') || (NXT(3) != '-')) return;
2777
2778 state = ctxt->instate;
2779 ctxt->instate = XML_PARSER_COMMENT;
2780 SHRINK;
2781 SKIP(4);
2782 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2783 if (buf == NULL) {
2784 xmlGenericError(xmlGenericErrorContext,
2785 "malloc of %d byte failed\n", size);
2786 ctxt->instate = state;
2787 return;
2788 }
2789 q = CUR_CHAR(ql);
2790 NEXTL(ql);
2791 r = CUR_CHAR(rl);
2792 NEXTL(rl);
2793 cur = CUR_CHAR(l);
2794 len = 0;
2795 while (IS_CHAR(cur) && /* checked */
2796 ((cur != '>') ||
2797 (r != '-') || (q != '-'))) {
2798 if ((r == '-') && (q == '-') && (len > 1)) {
2799 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2801 ctxt->sax->error(ctxt->userData,
2802 "Comment must not contain '--' (double-hyphen)`\n");
2803 ctxt->wellFormed = 0;
2804 ctxt->disableSAX = 1;
2805 }
2806 if (len + 5 >= size) {
2807 size *= 2;
2808 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2809 if (buf == NULL) {
2810 xmlGenericError(xmlGenericErrorContext,
2811 "realloc of %d byte failed\n", size);
2812 ctxt->instate = state;
2813 return;
2814 }
2815 }
2816 COPY_BUF(ql,buf,len,q);
2817 q = r;
2818 ql = rl;
2819 r = cur;
2820 rl = l;
2821
2822 count++;
2823 if (count > 50) {
2824 GROW;
2825 count = 0;
2826 }
2827 NEXTL(l);
2828 cur = CUR_CHAR(l);
2829 if (cur == 0) {
2830 SHRINK;
2831 GROW;
2832 cur = CUR_CHAR(l);
2833 }
2834 }
2835 buf[len] = 0;
2836 if (!IS_CHAR(cur)) {
2837 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2839 ctxt->sax->error(ctxt->userData,
2840 "Comment not terminated \n<!--%.50s\n", buf);
2841 ctxt->wellFormed = 0;
2842 ctxt->disableSAX = 1;
2843 xmlFree(buf);
2844 } else {
2845 if (input != ctxt->input) {
2846 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2848 ctxt->sax->error(ctxt->userData,
2849"Comment doesn't start and stop in the same entity\n");
2850 ctxt->wellFormed = 0;
2851 ctxt->disableSAX = 1;
2852 }
2853 NEXT;
2854 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2855 (!ctxt->disableSAX))
2856 ctxt->sax->comment(ctxt->userData, buf);
2857 xmlFree(buf);
2858 }
2859 ctxt->instate = state;
2860}
2861
2862/**
2863 * xmlParsePITarget:
2864 * @ctxt: an XML parser context
2865 *
2866 * parse the name of a PI
2867 *
2868 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2869 *
2870 * Returns the PITarget name or NULL
2871 */
2872
2873xmlChar *
2874xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2875 xmlChar *name;
2876
2877 name = xmlParseName(ctxt);
2878 if ((name != NULL) &&
2879 ((name[0] == 'x') || (name[0] == 'X')) &&
2880 ((name[1] == 'm') || (name[1] == 'M')) &&
2881 ((name[2] == 'l') || (name[2] == 'L'))) {
2882 int i;
2883 if ((name[0] == 'x') && (name[1] == 'm') &&
2884 (name[2] == 'l') && (name[3] == 0)) {
2885 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2887 ctxt->sax->error(ctxt->userData,
2888 "XML declaration allowed only at the start of the document\n");
2889 ctxt->wellFormed = 0;
2890 ctxt->disableSAX = 1;
2891 return(name);
2892 } else if (name[3] == 0) {
2893 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2895 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2896 ctxt->wellFormed = 0;
2897 ctxt->disableSAX = 1;
2898 return(name);
2899 }
2900 for (i = 0;;i++) {
2901 if (xmlW3CPIs[i] == NULL) break;
2902 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2903 return(name);
2904 }
2905 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2906 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2907 ctxt->sax->warning(ctxt->userData,
2908 "xmlParsePItarget: invalid name prefix 'xml'\n");
2909 }
2910 }
2911 return(name);
2912}
2913
2914/**
2915 * xmlParsePI:
2916 * @ctxt: an XML parser context
2917 *
2918 * parse an XML Processing Instruction.
2919 *
2920 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2921 *
2922 * The processing is transfered to SAX once parsed.
2923 */
2924
2925void
2926xmlParsePI(xmlParserCtxtPtr ctxt) {
2927 xmlChar *buf = NULL;
2928 int len = 0;
2929 int size = XML_PARSER_BUFFER_SIZE;
2930 int cur, l;
2931 xmlChar *target;
2932 xmlParserInputState state;
2933 int count = 0;
2934
2935 if ((RAW == '<') && (NXT(1) == '?')) {
2936 xmlParserInputPtr input = ctxt->input;
2937 state = ctxt->instate;
2938 ctxt->instate = XML_PARSER_PI;
2939 /*
2940 * this is a Processing Instruction.
2941 */
2942 SKIP(2);
2943 SHRINK;
2944
2945 /*
2946 * Parse the target name and check for special support like
2947 * namespace.
2948 */
2949 target = xmlParsePITarget(ctxt);
2950 if (target != NULL) {
2951 if ((RAW == '?') && (NXT(1) == '>')) {
2952 if (input != ctxt->input) {
2953 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2955 ctxt->sax->error(ctxt->userData,
2956 "PI declaration doesn't start and stop in the same entity\n");
2957 ctxt->wellFormed = 0;
2958 ctxt->disableSAX = 1;
2959 }
2960 SKIP(2);
2961
2962 /*
2963 * SAX: PI detected.
2964 */
2965 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2966 (ctxt->sax->processingInstruction != NULL))
2967 ctxt->sax->processingInstruction(ctxt->userData,
2968 target, NULL);
2969 ctxt->instate = state;
2970 xmlFree(target);
2971 return;
2972 }
2973 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2974 if (buf == NULL) {
2975 xmlGenericError(xmlGenericErrorContext,
2976 "malloc of %d byte failed\n", size);
2977 ctxt->instate = state;
2978 return;
2979 }
2980 cur = CUR;
2981 if (!IS_BLANK(cur)) {
2982 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2984 ctxt->sax->error(ctxt->userData,
2985 "xmlParsePI: PI %s space expected\n", target);
2986 ctxt->wellFormed = 0;
2987 ctxt->disableSAX = 1;
2988 }
2989 SKIP_BLANKS;
2990 cur = CUR_CHAR(l);
2991 while (IS_CHAR(cur) && /* checked */
2992 ((cur != '?') || (NXT(1) != '>'))) {
2993 if (len + 5 >= size) {
2994 size *= 2;
2995 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2996 if (buf == NULL) {
2997 xmlGenericError(xmlGenericErrorContext,
2998 "realloc of %d byte failed\n", size);
2999 ctxt->instate = state;
3000 return;
3001 }
3002 }
3003 count++;
3004 if (count > 50) {
3005 GROW;
3006 count = 0;
3007 }
3008 COPY_BUF(l,buf,len,cur);
3009 NEXTL(l);
3010 cur = CUR_CHAR(l);
3011 if (cur == 0) {
3012 SHRINK;
3013 GROW;
3014 cur = CUR_CHAR(l);
3015 }
3016 }
3017 buf[len] = 0;
3018 if (cur != '?') {
3019 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3021 ctxt->sax->error(ctxt->userData,
3022 "xmlParsePI: PI %s never end ...\n", target);
3023 ctxt->wellFormed = 0;
3024 ctxt->disableSAX = 1;
3025 } else {
3026 if (input != ctxt->input) {
3027 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3029 ctxt->sax->error(ctxt->userData,
3030 "PI declaration doesn't start and stop in the same entity\n");
3031 ctxt->wellFormed = 0;
3032 ctxt->disableSAX = 1;
3033 }
3034 SKIP(2);
3035
3036 /*
3037 * SAX: PI detected.
3038 */
3039 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3040 (ctxt->sax->processingInstruction != NULL))
3041 ctxt->sax->processingInstruction(ctxt->userData,
3042 target, buf);
3043 }
3044 xmlFree(buf);
3045 xmlFree(target);
3046 } else {
3047 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3049 ctxt->sax->error(ctxt->userData,
3050 "xmlParsePI : no target name\n");
3051 ctxt->wellFormed = 0;
3052 ctxt->disableSAX = 1;
3053 }
3054 ctxt->instate = state;
3055 }
3056}
3057
3058/**
3059 * xmlParseNotationDecl:
3060 * @ctxt: an XML parser context
3061 *
3062 * parse a notation declaration
3063 *
3064 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3065 *
3066 * Hence there is actually 3 choices:
3067 * 'PUBLIC' S PubidLiteral
3068 * 'PUBLIC' S PubidLiteral S SystemLiteral
3069 * and 'SYSTEM' S SystemLiteral
3070 *
3071 * See the NOTE on xmlParseExternalID().
3072 */
3073
3074void
3075xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3076 xmlChar *name;
3077 xmlChar *Pubid;
3078 xmlChar *Systemid;
3079
3080 if ((RAW == '<') && (NXT(1) == '!') &&
3081 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3082 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3083 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3084 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3085 xmlParserInputPtr input = ctxt->input;
3086 SHRINK;
3087 SKIP(10);
3088 if (!IS_BLANK(CUR)) {
3089 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3091 ctxt->sax->error(ctxt->userData,
3092 "Space required after '<!NOTATION'\n");
3093 ctxt->wellFormed = 0;
3094 ctxt->disableSAX = 1;
3095 return;
3096 }
3097 SKIP_BLANKS;
3098
Daniel Veillard76d66f42001-05-16 21:05:17 +00003099 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003100 if (name == NULL) {
3101 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3103 ctxt->sax->error(ctxt->userData,
3104 "NOTATION: Name expected here\n");
3105 ctxt->wellFormed = 0;
3106 ctxt->disableSAX = 1;
3107 return;
3108 }
3109 if (!IS_BLANK(CUR)) {
3110 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3112 ctxt->sax->error(ctxt->userData,
3113 "Space required after the NOTATION name'\n");
3114 ctxt->wellFormed = 0;
3115 ctxt->disableSAX = 1;
3116 return;
3117 }
3118 SKIP_BLANKS;
3119
3120 /*
3121 * Parse the IDs.
3122 */
3123 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3124 SKIP_BLANKS;
3125
3126 if (RAW == '>') {
3127 if (input != ctxt->input) {
3128 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3130 ctxt->sax->error(ctxt->userData,
3131"Notation declaration doesn't start and stop in the same entity\n");
3132 ctxt->wellFormed = 0;
3133 ctxt->disableSAX = 1;
3134 }
3135 NEXT;
3136 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3137 (ctxt->sax->notationDecl != NULL))
3138 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3139 } else {
3140 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3142 ctxt->sax->error(ctxt->userData,
3143 "'>' required to close NOTATION declaration\n");
3144 ctxt->wellFormed = 0;
3145 ctxt->disableSAX = 1;
3146 }
3147 xmlFree(name);
3148 if (Systemid != NULL) xmlFree(Systemid);
3149 if (Pubid != NULL) xmlFree(Pubid);
3150 }
3151}
3152
3153/**
3154 * xmlParseEntityDecl:
3155 * @ctxt: an XML parser context
3156 *
3157 * parse <!ENTITY declarations
3158 *
3159 * [70] EntityDecl ::= GEDecl | PEDecl
3160 *
3161 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3162 *
3163 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3164 *
3165 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3166 *
3167 * [74] PEDef ::= EntityValue | ExternalID
3168 *
3169 * [76] NDataDecl ::= S 'NDATA' S Name
3170 *
3171 * [ VC: Notation Declared ]
3172 * The Name must match the declared name of a notation.
3173 */
3174
3175void
3176xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3177 xmlChar *name = NULL;
3178 xmlChar *value = NULL;
3179 xmlChar *URI = NULL, *literal = NULL;
3180 xmlChar *ndata = NULL;
3181 int isParameter = 0;
3182 xmlChar *orig = NULL;
3183
3184 GROW;
3185 if ((RAW == '<') && (NXT(1) == '!') &&
3186 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3187 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3188 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3189 xmlParserInputPtr input = ctxt->input;
3190 ctxt->instate = XML_PARSER_ENTITY_DECL;
3191 SHRINK;
3192 SKIP(8);
3193 if (!IS_BLANK(CUR)) {
3194 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3196 ctxt->sax->error(ctxt->userData,
3197 "Space required after '<!ENTITY'\n");
3198 ctxt->wellFormed = 0;
3199 ctxt->disableSAX = 1;
3200 }
3201 SKIP_BLANKS;
3202
3203 if (RAW == '%') {
3204 NEXT;
3205 if (!IS_BLANK(CUR)) {
3206 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3208 ctxt->sax->error(ctxt->userData,
3209 "Space required after '%'\n");
3210 ctxt->wellFormed = 0;
3211 ctxt->disableSAX = 1;
3212 }
3213 SKIP_BLANKS;
3214 isParameter = 1;
3215 }
3216
Daniel Veillard76d66f42001-05-16 21:05:17 +00003217 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003218 if (name == NULL) {
3219 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3220 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3221 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3222 ctxt->wellFormed = 0;
3223 ctxt->disableSAX = 1;
3224 return;
3225 }
3226 if (!IS_BLANK(CUR)) {
3227 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3229 ctxt->sax->error(ctxt->userData,
3230 "Space required after the entity name\n");
3231 ctxt->wellFormed = 0;
3232 ctxt->disableSAX = 1;
3233 }
3234 SKIP_BLANKS;
3235
3236 /*
3237 * handle the various case of definitions...
3238 */
3239 if (isParameter) {
3240 if ((RAW == '"') || (RAW == '\'')) {
3241 value = xmlParseEntityValue(ctxt, &orig);
3242 if (value) {
3243 if ((ctxt->sax != NULL) &&
3244 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3245 ctxt->sax->entityDecl(ctxt->userData, name,
3246 XML_INTERNAL_PARAMETER_ENTITY,
3247 NULL, NULL, value);
3248 }
3249 } else {
3250 URI = xmlParseExternalID(ctxt, &literal, 1);
3251 if ((URI == NULL) && (literal == NULL)) {
3252 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3254 ctxt->sax->error(ctxt->userData,
3255 "Entity value required\n");
3256 ctxt->wellFormed = 0;
3257 ctxt->disableSAX = 1;
3258 }
3259 if (URI) {
3260 xmlURIPtr uri;
3261
3262 uri = xmlParseURI((const char *) URI);
3263 if (uri == NULL) {
3264 ctxt->errNo = XML_ERR_INVALID_URI;
3265 if ((ctxt->sax != NULL) &&
3266 (!ctxt->disableSAX) &&
3267 (ctxt->sax->error != NULL))
3268 ctxt->sax->error(ctxt->userData,
3269 "Invalid URI: %s\n", URI);
3270 ctxt->wellFormed = 0;
3271 } else {
3272 if (uri->fragment != NULL) {
3273 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3274 if ((ctxt->sax != NULL) &&
3275 (!ctxt->disableSAX) &&
3276 (ctxt->sax->error != NULL))
3277 ctxt->sax->error(ctxt->userData,
3278 "Fragment not allowed: %s\n", URI);
3279 ctxt->wellFormed = 0;
3280 } else {
3281 if ((ctxt->sax != NULL) &&
3282 (!ctxt->disableSAX) &&
3283 (ctxt->sax->entityDecl != NULL))
3284 ctxt->sax->entityDecl(ctxt->userData, name,
3285 XML_EXTERNAL_PARAMETER_ENTITY,
3286 literal, URI, NULL);
3287 }
3288 xmlFreeURI(uri);
3289 }
3290 }
3291 }
3292 } else {
3293 if ((RAW == '"') || (RAW == '\'')) {
3294 value = xmlParseEntityValue(ctxt, &orig);
3295 if ((ctxt->sax != NULL) &&
3296 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3297 ctxt->sax->entityDecl(ctxt->userData, name,
3298 XML_INTERNAL_GENERAL_ENTITY,
3299 NULL, NULL, value);
3300 } else {
3301 URI = xmlParseExternalID(ctxt, &literal, 1);
3302 if ((URI == NULL) && (literal == NULL)) {
3303 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3305 ctxt->sax->error(ctxt->userData,
3306 "Entity value required\n");
3307 ctxt->wellFormed = 0;
3308 ctxt->disableSAX = 1;
3309 }
3310 if (URI) {
3311 xmlURIPtr uri;
3312
3313 uri = xmlParseURI((const char *)URI);
3314 if (uri == NULL) {
3315 ctxt->errNo = XML_ERR_INVALID_URI;
3316 if ((ctxt->sax != NULL) &&
3317 (!ctxt->disableSAX) &&
3318 (ctxt->sax->error != NULL))
3319 ctxt->sax->error(ctxt->userData,
3320 "Invalid URI: %s\n", URI);
3321 ctxt->wellFormed = 0;
3322 } else {
3323 if (uri->fragment != NULL) {
3324 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3325 if ((ctxt->sax != NULL) &&
3326 (!ctxt->disableSAX) &&
3327 (ctxt->sax->error != NULL))
3328 ctxt->sax->error(ctxt->userData,
3329 "Fragment not allowed: %s\n", URI);
3330 ctxt->wellFormed = 0;
3331 }
3332 xmlFreeURI(uri);
3333 }
3334 }
3335 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3336 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3338 ctxt->sax->error(ctxt->userData,
3339 "Space required before 'NDATA'\n");
3340 ctxt->wellFormed = 0;
3341 ctxt->disableSAX = 1;
3342 }
3343 SKIP_BLANKS;
3344 if ((RAW == 'N') && (NXT(1) == 'D') &&
3345 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3346 (NXT(4) == 'A')) {
3347 SKIP(5);
3348 if (!IS_BLANK(CUR)) {
3349 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3351 ctxt->sax->error(ctxt->userData,
3352 "Space required after 'NDATA'\n");
3353 ctxt->wellFormed = 0;
3354 ctxt->disableSAX = 1;
3355 }
3356 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003357 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003358 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3359 (ctxt->sax->unparsedEntityDecl != NULL))
3360 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3361 literal, URI, ndata);
3362 } else {
3363 if ((ctxt->sax != NULL) &&
3364 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3365 ctxt->sax->entityDecl(ctxt->userData, name,
3366 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3367 literal, URI, NULL);
3368 }
3369 }
3370 }
3371 SKIP_BLANKS;
3372 if (RAW != '>') {
3373 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3375 ctxt->sax->error(ctxt->userData,
3376 "xmlParseEntityDecl: entity %s not terminated\n", name);
3377 ctxt->wellFormed = 0;
3378 ctxt->disableSAX = 1;
3379 } else {
3380 if (input != ctxt->input) {
3381 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3383 ctxt->sax->error(ctxt->userData,
3384"Entity declaration doesn't start and stop in the same entity\n");
3385 ctxt->wellFormed = 0;
3386 ctxt->disableSAX = 1;
3387 }
3388 NEXT;
3389 }
3390 if (orig != NULL) {
3391 /*
3392 * Ugly mechanism to save the raw entity value.
3393 */
3394 xmlEntityPtr cur = NULL;
3395
3396 if (isParameter) {
3397 if ((ctxt->sax != NULL) &&
3398 (ctxt->sax->getParameterEntity != NULL))
3399 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3400 } else {
3401 if ((ctxt->sax != NULL) &&
3402 (ctxt->sax->getEntity != NULL))
3403 cur = ctxt->sax->getEntity(ctxt->userData, name);
3404 }
3405 if (cur != NULL) {
3406 if (cur->orig != NULL)
3407 xmlFree(orig);
3408 else
3409 cur->orig = orig;
3410 } else
3411 xmlFree(orig);
3412 }
3413 if (name != NULL) xmlFree(name);
3414 if (value != NULL) xmlFree(value);
3415 if (URI != NULL) xmlFree(URI);
3416 if (literal != NULL) xmlFree(literal);
3417 if (ndata != NULL) xmlFree(ndata);
3418 }
3419}
3420
3421/**
3422 * xmlParseDefaultDecl:
3423 * @ctxt: an XML parser context
3424 * @value: Receive a possible fixed default value for the attribute
3425 *
3426 * Parse an attribute default declaration
3427 *
3428 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3429 *
3430 * [ VC: Required Attribute ]
3431 * if the default declaration is the keyword #REQUIRED, then the
3432 * attribute must be specified for all elements of the type in the
3433 * attribute-list declaration.
3434 *
3435 * [ VC: Attribute Default Legal ]
3436 * The declared default value must meet the lexical constraints of
3437 * the declared attribute type c.f. xmlValidateAttributeDecl()
3438 *
3439 * [ VC: Fixed Attribute Default ]
3440 * if an attribute has a default value declared with the #FIXED
3441 * keyword, instances of that attribute must match the default value.
3442 *
3443 * [ WFC: No < in Attribute Values ]
3444 * handled in xmlParseAttValue()
3445 *
3446 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3447 * or XML_ATTRIBUTE_FIXED.
3448 */
3449
3450int
3451xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3452 int val;
3453 xmlChar *ret;
3454
3455 *value = NULL;
3456 if ((RAW == '#') && (NXT(1) == 'R') &&
3457 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3458 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3459 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3460 (NXT(8) == 'D')) {
3461 SKIP(9);
3462 return(XML_ATTRIBUTE_REQUIRED);
3463 }
3464 if ((RAW == '#') && (NXT(1) == 'I') &&
3465 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3466 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3467 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3468 SKIP(8);
3469 return(XML_ATTRIBUTE_IMPLIED);
3470 }
3471 val = XML_ATTRIBUTE_NONE;
3472 if ((RAW == '#') && (NXT(1) == 'F') &&
3473 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3474 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3475 SKIP(6);
3476 val = XML_ATTRIBUTE_FIXED;
3477 if (!IS_BLANK(CUR)) {
3478 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3480 ctxt->sax->error(ctxt->userData,
3481 "Space required after '#FIXED'\n");
3482 ctxt->wellFormed = 0;
3483 ctxt->disableSAX = 1;
3484 }
3485 SKIP_BLANKS;
3486 }
3487 ret = xmlParseAttValue(ctxt);
3488 ctxt->instate = XML_PARSER_DTD;
3489 if (ret == NULL) {
3490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3491 ctxt->sax->error(ctxt->userData,
3492 "Attribute default value declaration error\n");
3493 ctxt->wellFormed = 0;
3494 ctxt->disableSAX = 1;
3495 } else
3496 *value = ret;
3497 return(val);
3498}
3499
3500/**
3501 * xmlParseNotationType:
3502 * @ctxt: an XML parser context
3503 *
3504 * parse an Notation attribute type.
3505 *
3506 * Note: the leading 'NOTATION' S part has already being parsed...
3507 *
3508 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3509 *
3510 * [ VC: Notation Attributes ]
3511 * Values of this type must match one of the notation names included
3512 * in the declaration; all notation names in the declaration must be declared.
3513 *
3514 * Returns: the notation attribute tree built while parsing
3515 */
3516
3517xmlEnumerationPtr
3518xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3519 xmlChar *name;
3520 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3521
3522 if (RAW != '(') {
3523 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3525 ctxt->sax->error(ctxt->userData,
3526 "'(' required to start 'NOTATION'\n");
3527 ctxt->wellFormed = 0;
3528 ctxt->disableSAX = 1;
3529 return(NULL);
3530 }
3531 SHRINK;
3532 do {
3533 NEXT;
3534 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003535 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003536 if (name == NULL) {
3537 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3539 ctxt->sax->error(ctxt->userData,
3540 "Name expected in NOTATION declaration\n");
3541 ctxt->wellFormed = 0;
3542 ctxt->disableSAX = 1;
3543 return(ret);
3544 }
3545 cur = xmlCreateEnumeration(name);
3546 xmlFree(name);
3547 if (cur == NULL) return(ret);
3548 if (last == NULL) ret = last = cur;
3549 else {
3550 last->next = cur;
3551 last = cur;
3552 }
3553 SKIP_BLANKS;
3554 } while (RAW == '|');
3555 if (RAW != ')') {
3556 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3558 ctxt->sax->error(ctxt->userData,
3559 "')' required to finish NOTATION declaration\n");
3560 ctxt->wellFormed = 0;
3561 ctxt->disableSAX = 1;
3562 if ((last != NULL) && (last != ret))
3563 xmlFreeEnumeration(last);
3564 return(ret);
3565 }
3566 NEXT;
3567 return(ret);
3568}
3569
3570/**
3571 * xmlParseEnumerationType:
3572 * @ctxt: an XML parser context
3573 *
3574 * parse an Enumeration attribute type.
3575 *
3576 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3577 *
3578 * [ VC: Enumeration ]
3579 * Values of this type must match one of the Nmtoken tokens in
3580 * the declaration
3581 *
3582 * Returns: the enumeration attribute tree built while parsing
3583 */
3584
3585xmlEnumerationPtr
3586xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3587 xmlChar *name;
3588 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3589
3590 if (RAW != '(') {
3591 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3593 ctxt->sax->error(ctxt->userData,
3594 "'(' required to start ATTLIST enumeration\n");
3595 ctxt->wellFormed = 0;
3596 ctxt->disableSAX = 1;
3597 return(NULL);
3598 }
3599 SHRINK;
3600 do {
3601 NEXT;
3602 SKIP_BLANKS;
3603 name = xmlParseNmtoken(ctxt);
3604 if (name == NULL) {
3605 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3607 ctxt->sax->error(ctxt->userData,
3608 "NmToken expected in ATTLIST enumeration\n");
3609 ctxt->wellFormed = 0;
3610 ctxt->disableSAX = 1;
3611 return(ret);
3612 }
3613 cur = xmlCreateEnumeration(name);
3614 xmlFree(name);
3615 if (cur == NULL) return(ret);
3616 if (last == NULL) ret = last = cur;
3617 else {
3618 last->next = cur;
3619 last = cur;
3620 }
3621 SKIP_BLANKS;
3622 } while (RAW == '|');
3623 if (RAW != ')') {
3624 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3626 ctxt->sax->error(ctxt->userData,
3627 "')' required to finish ATTLIST enumeration\n");
3628 ctxt->wellFormed = 0;
3629 ctxt->disableSAX = 1;
3630 return(ret);
3631 }
3632 NEXT;
3633 return(ret);
3634}
3635
3636/**
3637 * xmlParseEnumeratedType:
3638 * @ctxt: an XML parser context
3639 * @tree: the enumeration tree built while parsing
3640 *
3641 * parse an Enumerated attribute type.
3642 *
3643 * [57] EnumeratedType ::= NotationType | Enumeration
3644 *
3645 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3646 *
3647 *
3648 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3649 */
3650
3651int
3652xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3653 if ((RAW == 'N') && (NXT(1) == 'O') &&
3654 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3655 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3656 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3657 SKIP(8);
3658 if (!IS_BLANK(CUR)) {
3659 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3661 ctxt->sax->error(ctxt->userData,
3662 "Space required after 'NOTATION'\n");
3663 ctxt->wellFormed = 0;
3664 ctxt->disableSAX = 1;
3665 return(0);
3666 }
3667 SKIP_BLANKS;
3668 *tree = xmlParseNotationType(ctxt);
3669 if (*tree == NULL) return(0);
3670 return(XML_ATTRIBUTE_NOTATION);
3671 }
3672 *tree = xmlParseEnumerationType(ctxt);
3673 if (*tree == NULL) return(0);
3674 return(XML_ATTRIBUTE_ENUMERATION);
3675}
3676
3677/**
3678 * xmlParseAttributeType:
3679 * @ctxt: an XML parser context
3680 * @tree: the enumeration tree built while parsing
3681 *
3682 * parse the Attribute list def for an element
3683 *
3684 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3685 *
3686 * [55] StringType ::= 'CDATA'
3687 *
3688 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3689 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3690 *
3691 * Validity constraints for attribute values syntax are checked in
3692 * xmlValidateAttributeValue()
3693 *
3694 * [ VC: ID ]
3695 * Values of type ID must match the Name production. A name must not
3696 * appear more than once in an XML document as a value of this type;
3697 * i.e., ID values must uniquely identify the elements which bear them.
3698 *
3699 * [ VC: One ID per Element Type ]
3700 * No element type may have more than one ID attribute specified.
3701 *
3702 * [ VC: ID Attribute Default ]
3703 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3704 *
3705 * [ VC: IDREF ]
3706 * Values of type IDREF must match the Name production, and values
3707 * of type IDREFS must match Names; each IDREF Name must match the value
3708 * of an ID attribute on some element in the XML document; i.e. IDREF
3709 * values must match the value of some ID attribute.
3710 *
3711 * [ VC: Entity Name ]
3712 * Values of type ENTITY must match the Name production, values
3713 * of type ENTITIES must match Names; each Entity Name must match the
3714 * name of an unparsed entity declared in the DTD.
3715 *
3716 * [ VC: Name Token ]
3717 * Values of type NMTOKEN must match the Nmtoken production; values
3718 * of type NMTOKENS must match Nmtokens.
3719 *
3720 * Returns the attribute type
3721 */
3722int
3723xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3724 SHRINK;
3725 if ((RAW == 'C') && (NXT(1) == 'D') &&
3726 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3727 (NXT(4) == 'A')) {
3728 SKIP(5);
3729 return(XML_ATTRIBUTE_CDATA);
3730 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3731 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3732 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3733 SKIP(6);
3734 return(XML_ATTRIBUTE_IDREFS);
3735 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3736 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3737 (NXT(4) == 'F')) {
3738 SKIP(5);
3739 return(XML_ATTRIBUTE_IDREF);
3740 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3741 SKIP(2);
3742 return(XML_ATTRIBUTE_ID);
3743 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3744 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3745 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3746 SKIP(6);
3747 return(XML_ATTRIBUTE_ENTITY);
3748 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3749 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3750 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3751 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3752 SKIP(8);
3753 return(XML_ATTRIBUTE_ENTITIES);
3754 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3755 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3756 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3757 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3758 SKIP(8);
3759 return(XML_ATTRIBUTE_NMTOKENS);
3760 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3761 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3762 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3763 (NXT(6) == 'N')) {
3764 SKIP(7);
3765 return(XML_ATTRIBUTE_NMTOKEN);
3766 }
3767 return(xmlParseEnumeratedType(ctxt, tree));
3768}
3769
3770/**
3771 * xmlParseAttributeListDecl:
3772 * @ctxt: an XML parser context
3773 *
3774 * : parse the Attribute list def for an element
3775 *
3776 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3777 *
3778 * [53] AttDef ::= S Name S AttType S DefaultDecl
3779 *
3780 */
3781void
3782xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3783 xmlChar *elemName;
3784 xmlChar *attrName;
3785 xmlEnumerationPtr tree;
3786
3787 if ((RAW == '<') && (NXT(1) == '!') &&
3788 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3789 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3790 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3791 (NXT(8) == 'T')) {
3792 xmlParserInputPtr input = ctxt->input;
3793
3794 SKIP(9);
3795 if (!IS_BLANK(CUR)) {
3796 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3798 ctxt->sax->error(ctxt->userData,
3799 "Space required after '<!ATTLIST'\n");
3800 ctxt->wellFormed = 0;
3801 ctxt->disableSAX = 1;
3802 }
3803 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003804 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003805 if (elemName == NULL) {
3806 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3808 ctxt->sax->error(ctxt->userData,
3809 "ATTLIST: no name for Element\n");
3810 ctxt->wellFormed = 0;
3811 ctxt->disableSAX = 1;
3812 return;
3813 }
3814 SKIP_BLANKS;
3815 GROW;
3816 while (RAW != '>') {
3817 const xmlChar *check = CUR_PTR;
3818 int type;
3819 int def;
3820 xmlChar *defaultValue = NULL;
3821
3822 GROW;
3823 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003824 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003825 if (attrName == NULL) {
3826 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3828 ctxt->sax->error(ctxt->userData,
3829 "ATTLIST: no name for Attribute\n");
3830 ctxt->wellFormed = 0;
3831 ctxt->disableSAX = 1;
3832 break;
3833 }
3834 GROW;
3835 if (!IS_BLANK(CUR)) {
3836 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3838 ctxt->sax->error(ctxt->userData,
3839 "Space required after the attribute name\n");
3840 ctxt->wellFormed = 0;
3841 ctxt->disableSAX = 1;
3842 if (attrName != NULL)
3843 xmlFree(attrName);
3844 if (defaultValue != NULL)
3845 xmlFree(defaultValue);
3846 break;
3847 }
3848 SKIP_BLANKS;
3849
3850 type = xmlParseAttributeType(ctxt, &tree);
3851 if (type <= 0) {
3852 if (attrName != NULL)
3853 xmlFree(attrName);
3854 if (defaultValue != NULL)
3855 xmlFree(defaultValue);
3856 break;
3857 }
3858
3859 GROW;
3860 if (!IS_BLANK(CUR)) {
3861 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3863 ctxt->sax->error(ctxt->userData,
3864 "Space required after the attribute type\n");
3865 ctxt->wellFormed = 0;
3866 ctxt->disableSAX = 1;
3867 if (attrName != NULL)
3868 xmlFree(attrName);
3869 if (defaultValue != NULL)
3870 xmlFree(defaultValue);
3871 if (tree != NULL)
3872 xmlFreeEnumeration(tree);
3873 break;
3874 }
3875 SKIP_BLANKS;
3876
3877 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3878 if (def <= 0) {
3879 if (attrName != NULL)
3880 xmlFree(attrName);
3881 if (defaultValue != NULL)
3882 xmlFree(defaultValue);
3883 if (tree != NULL)
3884 xmlFreeEnumeration(tree);
3885 break;
3886 }
3887
3888 GROW;
3889 if (RAW != '>') {
3890 if (!IS_BLANK(CUR)) {
3891 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3893 ctxt->sax->error(ctxt->userData,
3894 "Space required after the attribute default value\n");
3895 ctxt->wellFormed = 0;
3896 ctxt->disableSAX = 1;
3897 if (attrName != NULL)
3898 xmlFree(attrName);
3899 if (defaultValue != NULL)
3900 xmlFree(defaultValue);
3901 if (tree != NULL)
3902 xmlFreeEnumeration(tree);
3903 break;
3904 }
3905 SKIP_BLANKS;
3906 }
3907 if (check == CUR_PTR) {
3908 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3910 ctxt->sax->error(ctxt->userData,
3911 "xmlParseAttributeListDecl: detected internal error\n");
3912 if (attrName != NULL)
3913 xmlFree(attrName);
3914 if (defaultValue != NULL)
3915 xmlFree(defaultValue);
3916 if (tree != NULL)
3917 xmlFreeEnumeration(tree);
3918 break;
3919 }
3920 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3921 (ctxt->sax->attributeDecl != NULL))
3922 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3923 type, def, defaultValue, tree);
3924 if (attrName != NULL)
3925 xmlFree(attrName);
3926 if (defaultValue != NULL)
3927 xmlFree(defaultValue);
3928 GROW;
3929 }
3930 if (RAW == '>') {
3931 if (input != ctxt->input) {
3932 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3934 ctxt->sax->error(ctxt->userData,
3935"Attribute list declaration doesn't start and stop in the same entity\n");
3936 ctxt->wellFormed = 0;
3937 ctxt->disableSAX = 1;
3938 }
3939 NEXT;
3940 }
3941
3942 xmlFree(elemName);
3943 }
3944}
3945
3946/**
3947 * xmlParseElementMixedContentDecl:
3948 * @ctxt: an XML parser context
3949 *
3950 * parse the declaration for a Mixed Element content
3951 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3952 *
3953 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3954 * '(' S? '#PCDATA' S? ')'
3955 *
3956 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3957 *
3958 * [ VC: No Duplicate Types ]
3959 * The same name must not appear more than once in a single
3960 * mixed-content declaration.
3961 *
3962 * returns: the list of the xmlElementContentPtr describing the element choices
3963 */
3964xmlElementContentPtr
3965xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3966 xmlElementContentPtr ret = NULL, cur = NULL, n;
3967 xmlChar *elem = NULL;
3968
3969 GROW;
3970 if ((RAW == '#') && (NXT(1) == 'P') &&
3971 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3972 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3973 (NXT(6) == 'A')) {
3974 SKIP(7);
3975 SKIP_BLANKS;
3976 SHRINK;
3977 if (RAW == ')') {
3978 ctxt->entity = ctxt->input;
3979 NEXT;
3980 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3981 if (RAW == '*') {
3982 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3983 NEXT;
3984 }
3985 return(ret);
3986 }
3987 if ((RAW == '(') || (RAW == '|')) {
3988 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3989 if (ret == NULL) return(NULL);
3990 }
3991 while (RAW == '|') {
3992 NEXT;
3993 if (elem == NULL) {
3994 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3995 if (ret == NULL) return(NULL);
3996 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003997 if (cur != NULL)
3998 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003999 cur = ret;
4000 } else {
4001 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4002 if (n == NULL) return(NULL);
4003 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004004 if (n->c1 != NULL)
4005 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004006 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004007 if (n != NULL)
4008 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004009 cur = n;
4010 xmlFree(elem);
4011 }
4012 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004013 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004014 if (elem == NULL) {
4015 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4017 ctxt->sax->error(ctxt->userData,
4018 "xmlParseElementMixedContentDecl : Name expected\n");
4019 ctxt->wellFormed = 0;
4020 ctxt->disableSAX = 1;
4021 xmlFreeElementContent(cur);
4022 return(NULL);
4023 }
4024 SKIP_BLANKS;
4025 GROW;
4026 }
4027 if ((RAW == ')') && (NXT(1) == '*')) {
4028 if (elem != NULL) {
4029 cur->c2 = xmlNewElementContent(elem,
4030 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004031 if (cur->c2 != NULL)
4032 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004033 xmlFree(elem);
4034 }
4035 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4036 ctxt->entity = ctxt->input;
4037 SKIP(2);
4038 } else {
4039 if (elem != NULL) xmlFree(elem);
4040 xmlFreeElementContent(ret);
4041 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4042 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4043 ctxt->sax->error(ctxt->userData,
4044 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4045 ctxt->wellFormed = 0;
4046 ctxt->disableSAX = 1;
4047 return(NULL);
4048 }
4049
4050 } else {
4051 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4053 ctxt->sax->error(ctxt->userData,
4054 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4055 ctxt->wellFormed = 0;
4056 ctxt->disableSAX = 1;
4057 }
4058 return(ret);
4059}
4060
4061/**
4062 * xmlParseElementChildrenContentDecl:
4063 * @ctxt: an XML parser context
4064 *
4065 * parse the declaration for a Mixed Element content
4066 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4067 *
4068 *
4069 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4070 *
4071 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4072 *
4073 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4074 *
4075 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4076 *
4077 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4078 * TODO Parameter-entity replacement text must be properly nested
4079 * with parenthetized groups. That is to say, if either of the
4080 * opening or closing parentheses in a choice, seq, or Mixed
4081 * construct is contained in the replacement text for a parameter
4082 * entity, both must be contained in the same replacement text. For
4083 * interoperability, if a parameter-entity reference appears in a
4084 * choice, seq, or Mixed construct, its replacement text should not
4085 * be empty, and neither the first nor last non-blank character of
4086 * the replacement text should be a connector (| or ,).
4087 *
4088 * returns: the tree of xmlElementContentPtr describing the element
4089 * hierarchy.
4090 */
4091xmlElementContentPtr
4092#ifdef VMS
4093xmlParseElementChildrenContentD
4094#else
4095xmlParseElementChildrenContentDecl
4096#endif
4097(xmlParserCtxtPtr ctxt) {
4098 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4099 xmlChar *elem;
4100 xmlChar type = 0;
4101
4102 SKIP_BLANKS;
4103 GROW;
4104 if (RAW == '(') {
4105 /* Recurse on first child */
4106 NEXT;
4107 SKIP_BLANKS;
4108 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4109 SKIP_BLANKS;
4110 GROW;
4111 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004112 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004113 if (elem == NULL) {
4114 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4116 ctxt->sax->error(ctxt->userData,
4117 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4118 ctxt->wellFormed = 0;
4119 ctxt->disableSAX = 1;
4120 return(NULL);
4121 }
4122 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4123 GROW;
4124 if (RAW == '?') {
4125 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4126 NEXT;
4127 } else if (RAW == '*') {
4128 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4129 NEXT;
4130 } else if (RAW == '+') {
4131 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4132 NEXT;
4133 } else {
4134 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4135 }
4136 xmlFree(elem);
4137 GROW;
4138 }
4139 SKIP_BLANKS;
4140 SHRINK;
4141 while (RAW != ')') {
4142 /*
4143 * Each loop we parse one separator and one element.
4144 */
4145 if (RAW == ',') {
4146 if (type == 0) type = CUR;
4147
4148 /*
4149 * Detect "Name | Name , Name" error
4150 */
4151 else if (type != CUR) {
4152 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4154 ctxt->sax->error(ctxt->userData,
4155 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4156 type);
4157 ctxt->wellFormed = 0;
4158 ctxt->disableSAX = 1;
4159 if ((op != NULL) && (op != ret))
4160 xmlFreeElementContent(op);
4161 if ((last != NULL) && (last != ret) &&
4162 (last != ret->c1) && (last != ret->c2))
4163 xmlFreeElementContent(last);
4164 if (ret != NULL)
4165 xmlFreeElementContent(ret);
4166 return(NULL);
4167 }
4168 NEXT;
4169
4170 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4171 if (op == NULL) {
4172 xmlFreeElementContent(ret);
4173 return(NULL);
4174 }
4175 if (last == NULL) {
4176 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004177 if (ret != NULL)
4178 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004179 ret = cur = op;
4180 } else {
4181 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004182 if (op != NULL)
4183 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004184 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004185 if (last != NULL)
4186 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004187 cur =op;
4188 last = NULL;
4189 }
4190 } else if (RAW == '|') {
4191 if (type == 0) type = CUR;
4192
4193 /*
4194 * Detect "Name , Name | Name" error
4195 */
4196 else if (type != CUR) {
4197 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4199 ctxt->sax->error(ctxt->userData,
4200 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4201 type);
4202 ctxt->wellFormed = 0;
4203 ctxt->disableSAX = 1;
4204 if ((op != NULL) && (op != ret) && (op != last))
4205 xmlFreeElementContent(op);
4206 if ((last != NULL) && (last != ret) &&
4207 (last != ret->c1) && (last != ret->c2))
4208 xmlFreeElementContent(last);
4209 if (ret != NULL)
4210 xmlFreeElementContent(ret);
4211 return(NULL);
4212 }
4213 NEXT;
4214
4215 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4216 if (op == NULL) {
4217 if ((op != NULL) && (op != ret))
4218 xmlFreeElementContent(op);
4219 if ((last != NULL) && (last != ret) &&
4220 (last != ret->c1) && (last != ret->c2))
4221 xmlFreeElementContent(last);
4222 if (ret != NULL)
4223 xmlFreeElementContent(ret);
4224 return(NULL);
4225 }
4226 if (last == NULL) {
4227 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004228 if (ret != NULL)
4229 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004230 ret = cur = op;
4231 } else {
4232 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004233 if (op != NULL)
4234 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004235 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004236 if (last != NULL)
4237 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004238 cur =op;
4239 last = NULL;
4240 }
4241 } else {
4242 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4244 ctxt->sax->error(ctxt->userData,
4245 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4246 ctxt->wellFormed = 0;
4247 ctxt->disableSAX = 1;
4248 if ((op != NULL) && (op != ret))
4249 xmlFreeElementContent(op);
4250 if ((last != NULL) && (last != ret) &&
4251 (last != ret->c1) && (last != ret->c2))
4252 xmlFreeElementContent(last);
4253 if (ret != NULL)
4254 xmlFreeElementContent(ret);
4255 return(NULL);
4256 }
4257 GROW;
4258 SKIP_BLANKS;
4259 GROW;
4260 if (RAW == '(') {
4261 /* Recurse on second child */
4262 NEXT;
4263 SKIP_BLANKS;
4264 last = xmlParseElementChildrenContentDecl(ctxt);
4265 SKIP_BLANKS;
4266 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004267 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004268 if (elem == NULL) {
4269 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4271 ctxt->sax->error(ctxt->userData,
4272 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4273 ctxt->wellFormed = 0;
4274 ctxt->disableSAX = 1;
4275 if ((op != NULL) && (op != ret))
4276 xmlFreeElementContent(op);
4277 if ((last != NULL) && (last != ret) &&
4278 (last != ret->c1) && (last != ret->c2))
4279 xmlFreeElementContent(last);
4280 if (ret != NULL)
4281 xmlFreeElementContent(ret);
4282 return(NULL);
4283 }
4284 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4285 xmlFree(elem);
4286 if (RAW == '?') {
4287 last->ocur = XML_ELEMENT_CONTENT_OPT;
4288 NEXT;
4289 } else if (RAW == '*') {
4290 last->ocur = XML_ELEMENT_CONTENT_MULT;
4291 NEXT;
4292 } else if (RAW == '+') {
4293 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4294 NEXT;
4295 } else {
4296 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4297 }
4298 }
4299 SKIP_BLANKS;
4300 GROW;
4301 }
4302 if ((cur != NULL) && (last != NULL)) {
4303 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004304 if (last != NULL)
4305 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004306 }
4307 ctxt->entity = ctxt->input;
4308 NEXT;
4309 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004310 if (ret != NULL)
4311 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004312 NEXT;
4313 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004314 if (ret != NULL)
4315 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004316 NEXT;
4317 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004318 if (ret != NULL)
4319 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004320 NEXT;
4321 }
4322 return(ret);
4323}
4324
4325/**
4326 * xmlParseElementContentDecl:
4327 * @ctxt: an XML parser context
4328 * @name: the name of the element being defined.
4329 * @result: the Element Content pointer will be stored here if any
4330 *
4331 * parse the declaration for an Element content either Mixed or Children,
4332 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4333 *
4334 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4335 *
4336 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4337 */
4338
4339int
4340xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4341 xmlElementContentPtr *result) {
4342
4343 xmlElementContentPtr tree = NULL;
4344 xmlParserInputPtr input = ctxt->input;
4345 int res;
4346
4347 *result = NULL;
4348
4349 if (RAW != '(') {
4350 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4352 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004353 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004354 ctxt->wellFormed = 0;
4355 ctxt->disableSAX = 1;
4356 return(-1);
4357 }
4358 NEXT;
4359 GROW;
4360 SKIP_BLANKS;
4361 if ((RAW == '#') && (NXT(1) == 'P') &&
4362 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4363 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4364 (NXT(6) == 'A')) {
4365 tree = xmlParseElementMixedContentDecl(ctxt);
4366 res = XML_ELEMENT_TYPE_MIXED;
4367 } else {
4368 tree = xmlParseElementChildrenContentDecl(ctxt);
4369 res = XML_ELEMENT_TYPE_ELEMENT;
4370 }
4371 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4372 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4374 ctxt->sax->error(ctxt->userData,
4375"Element content declaration doesn't start and stop in the same entity\n");
4376 ctxt->wellFormed = 0;
4377 ctxt->disableSAX = 1;
4378 }
4379 SKIP_BLANKS;
4380 *result = tree;
4381 return(res);
4382}
4383
4384/**
4385 * xmlParseElementDecl:
4386 * @ctxt: an XML parser context
4387 *
4388 * parse an Element declaration.
4389 *
4390 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4391 *
4392 * [ VC: Unique Element Type Declaration ]
4393 * No element type may be declared more than once
4394 *
4395 * Returns the type of the element, or -1 in case of error
4396 */
4397int
4398xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4399 xmlChar *name;
4400 int ret = -1;
4401 xmlElementContentPtr content = NULL;
4402
4403 GROW;
4404 if ((RAW == '<') && (NXT(1) == '!') &&
4405 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4406 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4407 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4408 (NXT(8) == 'T')) {
4409 xmlParserInputPtr input = ctxt->input;
4410
4411 SKIP(9);
4412 if (!IS_BLANK(CUR)) {
4413 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4415 ctxt->sax->error(ctxt->userData,
4416 "Space required after 'ELEMENT'\n");
4417 ctxt->wellFormed = 0;
4418 ctxt->disableSAX = 1;
4419 }
4420 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004421 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004422 if (name == NULL) {
4423 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4425 ctxt->sax->error(ctxt->userData,
4426 "xmlParseElementDecl: no name for Element\n");
4427 ctxt->wellFormed = 0;
4428 ctxt->disableSAX = 1;
4429 return(-1);
4430 }
4431 while ((RAW == 0) && (ctxt->inputNr > 1))
4432 xmlPopInput(ctxt);
4433 if (!IS_BLANK(CUR)) {
4434 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4436 ctxt->sax->error(ctxt->userData,
4437 "Space required after the element name\n");
4438 ctxt->wellFormed = 0;
4439 ctxt->disableSAX = 1;
4440 }
4441 SKIP_BLANKS;
4442 if ((RAW == 'E') && (NXT(1) == 'M') &&
4443 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4444 (NXT(4) == 'Y')) {
4445 SKIP(5);
4446 /*
4447 * Element must always be empty.
4448 */
4449 ret = XML_ELEMENT_TYPE_EMPTY;
4450 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4451 (NXT(2) == 'Y')) {
4452 SKIP(3);
4453 /*
4454 * Element is a generic container.
4455 */
4456 ret = XML_ELEMENT_TYPE_ANY;
4457 } else if (RAW == '(') {
4458 ret = xmlParseElementContentDecl(ctxt, name, &content);
4459 } else {
4460 /*
4461 * [ WFC: PEs in Internal Subset ] error handling.
4462 */
4463 if ((RAW == '%') && (ctxt->external == 0) &&
4464 (ctxt->inputNr == 1)) {
4465 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4467 ctxt->sax->error(ctxt->userData,
4468 "PEReference: forbidden within markup decl in internal subset\n");
4469 } else {
4470 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4472 ctxt->sax->error(ctxt->userData,
4473 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4474 }
4475 ctxt->wellFormed = 0;
4476 ctxt->disableSAX = 1;
4477 if (name != NULL) xmlFree(name);
4478 return(-1);
4479 }
4480
4481 SKIP_BLANKS;
4482 /*
4483 * Pop-up of finished entities.
4484 */
4485 while ((RAW == 0) && (ctxt->inputNr > 1))
4486 xmlPopInput(ctxt);
4487 SKIP_BLANKS;
4488
4489 if (RAW != '>') {
4490 ctxt->errNo = XML_ERR_GT_REQUIRED;
4491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4492 ctxt->sax->error(ctxt->userData,
4493 "xmlParseElementDecl: expected '>' at the end\n");
4494 ctxt->wellFormed = 0;
4495 ctxt->disableSAX = 1;
4496 } else {
4497 if (input != ctxt->input) {
4498 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4500 ctxt->sax->error(ctxt->userData,
4501"Element declaration doesn't start and stop in the same entity\n");
4502 ctxt->wellFormed = 0;
4503 ctxt->disableSAX = 1;
4504 }
4505
4506 NEXT;
4507 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4508 (ctxt->sax->elementDecl != NULL))
4509 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4510 content);
4511 }
4512 if (content != NULL) {
4513 xmlFreeElementContent(content);
4514 }
4515 if (name != NULL) {
4516 xmlFree(name);
4517 }
4518 }
4519 return(ret);
4520}
4521
4522/**
4523 * xmlParseMarkupDecl:
4524 * @ctxt: an XML parser context
4525 *
4526 * parse Markup declarations
4527 *
4528 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4529 * NotationDecl | PI | Comment
4530 *
4531 * [ VC: Proper Declaration/PE Nesting ]
4532 * Parameter-entity replacement text must be properly nested with
4533 * markup declarations. That is to say, if either the first character
4534 * or the last character of a markup declaration (markupdecl above) is
4535 * contained in the replacement text for a parameter-entity reference,
4536 * both must be contained in the same replacement text.
4537 *
4538 * [ WFC: PEs in Internal Subset ]
4539 * In the internal DTD subset, parameter-entity references can occur
4540 * only where markup declarations can occur, not within markup declarations.
4541 * (This does not apply to references that occur in external parameter
4542 * entities or to the external subset.)
4543 */
4544void
4545xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4546 GROW;
4547 xmlParseElementDecl(ctxt);
4548 xmlParseAttributeListDecl(ctxt);
4549 xmlParseEntityDecl(ctxt);
4550 xmlParseNotationDecl(ctxt);
4551 xmlParsePI(ctxt);
4552 xmlParseComment(ctxt);
4553 /*
4554 * This is only for internal subset. On external entities,
4555 * the replacement is done before parsing stage
4556 */
4557 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4558 xmlParsePEReference(ctxt);
4559 ctxt->instate = XML_PARSER_DTD;
4560}
4561
4562/**
4563 * xmlParseTextDecl:
4564 * @ctxt: an XML parser context
4565 *
4566 * parse an XML declaration header for external entities
4567 *
4568 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4569 *
4570 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4571 */
4572
4573void
4574xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4575 xmlChar *version;
4576
4577 /*
4578 * We know that '<?xml' is here.
4579 */
4580 if ((RAW == '<') && (NXT(1) == '?') &&
4581 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4582 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4583 SKIP(5);
4584 } else {
4585 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4587 ctxt->sax->error(ctxt->userData,
4588 "Text declaration '<?xml' required\n");
4589 ctxt->wellFormed = 0;
4590 ctxt->disableSAX = 1;
4591
4592 return;
4593 }
4594
4595 if (!IS_BLANK(CUR)) {
4596 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4598 ctxt->sax->error(ctxt->userData,
4599 "Space needed after '<?xml'\n");
4600 ctxt->wellFormed = 0;
4601 ctxt->disableSAX = 1;
4602 }
4603 SKIP_BLANKS;
4604
4605 /*
4606 * We may have the VersionInfo here.
4607 */
4608 version = xmlParseVersionInfo(ctxt);
4609 if (version == NULL)
4610 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4611 ctxt->input->version = version;
4612
4613 /*
4614 * We must have the encoding declaration
4615 */
4616 if (!IS_BLANK(CUR)) {
4617 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4619 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4620 ctxt->wellFormed = 0;
4621 ctxt->disableSAX = 1;
4622 }
4623 xmlParseEncodingDecl(ctxt);
4624 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4625 /*
4626 * The XML REC instructs us to stop parsing right here
4627 */
4628 return;
4629 }
4630
4631 SKIP_BLANKS;
4632 if ((RAW == '?') && (NXT(1) == '>')) {
4633 SKIP(2);
4634 } else if (RAW == '>') {
4635 /* Deprecated old WD ... */
4636 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4638 ctxt->sax->error(ctxt->userData,
4639 "XML declaration must end-up with '?>'\n");
4640 ctxt->wellFormed = 0;
4641 ctxt->disableSAX = 1;
4642 NEXT;
4643 } else {
4644 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4646 ctxt->sax->error(ctxt->userData,
4647 "parsing XML declaration: '?>' expected\n");
4648 ctxt->wellFormed = 0;
4649 ctxt->disableSAX = 1;
4650 MOVETO_ENDTAG(CUR_PTR);
4651 NEXT;
4652 }
4653}
4654
4655/*
4656 * xmlParseConditionalSections
4657 * @ctxt: an XML parser context
4658 *
4659 * [61] conditionalSect ::= includeSect | ignoreSect
4660 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4661 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4662 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4663 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4664 */
4665
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004666static void
Owen Taylor3473f882001-02-23 17:55:21 +00004667xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4668 SKIP(3);
4669 SKIP_BLANKS;
4670 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4671 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4672 (NXT(6) == 'E')) {
4673 SKIP(7);
4674 SKIP_BLANKS;
4675 if (RAW != '[') {
4676 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4678 ctxt->sax->error(ctxt->userData,
4679 "XML conditional section '[' expected\n");
4680 ctxt->wellFormed = 0;
4681 ctxt->disableSAX = 1;
4682 } else {
4683 NEXT;
4684 }
4685 if (xmlParserDebugEntities) {
4686 if ((ctxt->input != NULL) && (ctxt->input->filename))
4687 xmlGenericError(xmlGenericErrorContext,
4688 "%s(%d): ", ctxt->input->filename,
4689 ctxt->input->line);
4690 xmlGenericError(xmlGenericErrorContext,
4691 "Entering INCLUDE Conditional Section\n");
4692 }
4693
4694 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4695 (NXT(2) != '>'))) {
4696 const xmlChar *check = CUR_PTR;
4697 int cons = ctxt->input->consumed;
4698 int tok = ctxt->token;
4699
4700 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4701 xmlParseConditionalSections(ctxt);
4702 } else if (IS_BLANK(CUR)) {
4703 NEXT;
4704 } else if (RAW == '%') {
4705 xmlParsePEReference(ctxt);
4706 } else
4707 xmlParseMarkupDecl(ctxt);
4708
4709 /*
4710 * Pop-up of finished entities.
4711 */
4712 while ((RAW == 0) && (ctxt->inputNr > 1))
4713 xmlPopInput(ctxt);
4714
4715 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4716 (tok == ctxt->token)) {
4717 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4719 ctxt->sax->error(ctxt->userData,
4720 "Content error in the external subset\n");
4721 ctxt->wellFormed = 0;
4722 ctxt->disableSAX = 1;
4723 break;
4724 }
4725 }
4726 if (xmlParserDebugEntities) {
4727 if ((ctxt->input != NULL) && (ctxt->input->filename))
4728 xmlGenericError(xmlGenericErrorContext,
4729 "%s(%d): ", ctxt->input->filename,
4730 ctxt->input->line);
4731 xmlGenericError(xmlGenericErrorContext,
4732 "Leaving INCLUDE Conditional Section\n");
4733 }
4734
4735 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4736 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4737 int state;
4738 int instate;
4739 int depth = 0;
4740
4741 SKIP(6);
4742 SKIP_BLANKS;
4743 if (RAW != '[') {
4744 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4746 ctxt->sax->error(ctxt->userData,
4747 "XML conditional section '[' expected\n");
4748 ctxt->wellFormed = 0;
4749 ctxt->disableSAX = 1;
4750 } else {
4751 NEXT;
4752 }
4753 if (xmlParserDebugEntities) {
4754 if ((ctxt->input != NULL) && (ctxt->input->filename))
4755 xmlGenericError(xmlGenericErrorContext,
4756 "%s(%d): ", ctxt->input->filename,
4757 ctxt->input->line);
4758 xmlGenericError(xmlGenericErrorContext,
4759 "Entering IGNORE Conditional Section\n");
4760 }
4761
4762 /*
4763 * Parse up to the end of the conditionnal section
4764 * But disable SAX event generating DTD building in the meantime
4765 */
4766 state = ctxt->disableSAX;
4767 instate = ctxt->instate;
4768 ctxt->disableSAX = 1;
4769 ctxt->instate = XML_PARSER_IGNORE;
4770
4771 while (depth >= 0) {
4772 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4773 depth++;
4774 SKIP(3);
4775 continue;
4776 }
4777 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4778 if (--depth >= 0) SKIP(3);
4779 continue;
4780 }
4781 NEXT;
4782 continue;
4783 }
4784
4785 ctxt->disableSAX = state;
4786 ctxt->instate = instate;
4787
4788 if (xmlParserDebugEntities) {
4789 if ((ctxt->input != NULL) && (ctxt->input->filename))
4790 xmlGenericError(xmlGenericErrorContext,
4791 "%s(%d): ", ctxt->input->filename,
4792 ctxt->input->line);
4793 xmlGenericError(xmlGenericErrorContext,
4794 "Leaving IGNORE Conditional Section\n");
4795 }
4796
4797 } else {
4798 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4800 ctxt->sax->error(ctxt->userData,
4801 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4802 ctxt->wellFormed = 0;
4803 ctxt->disableSAX = 1;
4804 }
4805
4806 if (RAW == 0)
4807 SHRINK;
4808
4809 if (RAW == 0) {
4810 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4812 ctxt->sax->error(ctxt->userData,
4813 "XML conditional section not closed\n");
4814 ctxt->wellFormed = 0;
4815 ctxt->disableSAX = 1;
4816 } else {
4817 SKIP(3);
4818 }
4819}
4820
4821/**
4822 * xmlParseExternalSubset:
4823 * @ctxt: an XML parser context
4824 * @ExternalID: the external identifier
4825 * @SystemID: the system identifier (or URL)
4826 *
4827 * parse Markup declarations from an external subset
4828 *
4829 * [30] extSubset ::= textDecl? extSubsetDecl
4830 *
4831 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4832 */
4833void
4834xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4835 const xmlChar *SystemID) {
4836 GROW;
4837 if ((RAW == '<') && (NXT(1) == '?') &&
4838 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4839 (NXT(4) == 'l')) {
4840 xmlParseTextDecl(ctxt);
4841 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4842 /*
4843 * The XML REC instructs us to stop parsing right here
4844 */
4845 ctxt->instate = XML_PARSER_EOF;
4846 return;
4847 }
4848 }
4849 if (ctxt->myDoc == NULL) {
4850 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4851 }
4852 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4853 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4854
4855 ctxt->instate = XML_PARSER_DTD;
4856 ctxt->external = 1;
4857 while (((RAW == '<') && (NXT(1) == '?')) ||
4858 ((RAW == '<') && (NXT(1) == '!')) ||
4859 IS_BLANK(CUR)) {
4860 const xmlChar *check = CUR_PTR;
4861 int cons = ctxt->input->consumed;
4862 int tok = ctxt->token;
4863
4864 GROW;
4865 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4866 xmlParseConditionalSections(ctxt);
4867 } else if (IS_BLANK(CUR)) {
4868 NEXT;
4869 } else if (RAW == '%') {
4870 xmlParsePEReference(ctxt);
4871 } else
4872 xmlParseMarkupDecl(ctxt);
4873
4874 /*
4875 * Pop-up of finished entities.
4876 */
4877 while ((RAW == 0) && (ctxt->inputNr > 1))
4878 xmlPopInput(ctxt);
4879
4880 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4881 (tok == ctxt->token)) {
4882 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4884 ctxt->sax->error(ctxt->userData,
4885 "Content error in the external subset\n");
4886 ctxt->wellFormed = 0;
4887 ctxt->disableSAX = 1;
4888 break;
4889 }
4890 }
4891
4892 if (RAW != 0) {
4893 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4895 ctxt->sax->error(ctxt->userData,
4896 "Extra content at the end of the document\n");
4897 ctxt->wellFormed = 0;
4898 ctxt->disableSAX = 1;
4899 }
4900
4901}
4902
4903/**
4904 * xmlParseReference:
4905 * @ctxt: an XML parser context
4906 *
4907 * parse and handle entity references in content, depending on the SAX
4908 * interface, this may end-up in a call to character() if this is a
4909 * CharRef, a predefined entity, if there is no reference() callback.
4910 * or if the parser was asked to switch to that mode.
4911 *
4912 * [67] Reference ::= EntityRef | CharRef
4913 */
4914void
4915xmlParseReference(xmlParserCtxtPtr ctxt) {
4916 xmlEntityPtr ent;
4917 xmlChar *val;
4918 if (RAW != '&') return;
4919
4920 if (NXT(1) == '#') {
4921 int i = 0;
4922 xmlChar out[10];
4923 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004924 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004925
4926 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4927 /*
4928 * So we are using non-UTF-8 buffers
4929 * Check that the char fit on 8bits, if not
4930 * generate a CharRef.
4931 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004932 if (value <= 0xFF) {
4933 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004934 out[1] = 0;
4935 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4936 (!ctxt->disableSAX))
4937 ctxt->sax->characters(ctxt->userData, out, 1);
4938 } else {
4939 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004940 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004941 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004942 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004943 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4944 (!ctxt->disableSAX))
4945 ctxt->sax->reference(ctxt->userData, out);
4946 }
4947 } else {
4948 /*
4949 * Just encode the value in UTF-8
4950 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004951 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004952 out[i] = 0;
4953 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4954 (!ctxt->disableSAX))
4955 ctxt->sax->characters(ctxt->userData, out, i);
4956 }
4957 } else {
4958 ent = xmlParseEntityRef(ctxt);
4959 if (ent == NULL) return;
4960 if ((ent->name != NULL) &&
4961 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4962 xmlNodePtr list = NULL;
4963 int ret;
4964
4965
4966 /*
4967 * The first reference to the entity trigger a parsing phase
4968 * where the ent->children is filled with the result from
4969 * the parsing.
4970 */
4971 if (ent->children == NULL) {
4972 xmlChar *value;
4973 value = ent->content;
4974
4975 /*
4976 * Check that this entity is well formed
4977 */
4978 if ((value != NULL) &&
4979 (value[1] == 0) && (value[0] == '<') &&
4980 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4981 /*
4982 * DONE: get definite answer on this !!!
4983 * Lots of entity decls are used to declare a single
4984 * char
4985 * <!ENTITY lt "<">
4986 * Which seems to be valid since
4987 * 2.4: The ampersand character (&) and the left angle
4988 * bracket (<) may appear in their literal form only
4989 * when used ... They are also legal within the literal
4990 * entity value of an internal entity declaration;i
4991 * see "4.3.2 Well-Formed Parsed Entities".
4992 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4993 * Looking at the OASIS test suite and James Clark
4994 * tests, this is broken. However the XML REC uses
4995 * it. Is the XML REC not well-formed ????
4996 * This is a hack to avoid this problem
4997 *
4998 * ANSWER: since lt gt amp .. are already defined,
4999 * this is a redefinition and hence the fact that the
5000 * contentis not well balanced is not a Wf error, this
5001 * is lousy but acceptable.
5002 */
5003 list = xmlNewDocText(ctxt->myDoc, value);
5004 if (list != NULL) {
5005 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5006 (ent->children == NULL)) {
5007 ent->children = list;
5008 ent->last = list;
5009 list->parent = (xmlNodePtr) ent;
5010 } else {
5011 xmlFreeNodeList(list);
5012 }
5013 } else if (list != NULL) {
5014 xmlFreeNodeList(list);
5015 }
5016 } else {
5017 /*
5018 * 4.3.2: An internal general parsed entity is well-formed
5019 * if its replacement text matches the production labeled
5020 * content.
5021 */
5022 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5023 ctxt->depth++;
5024 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5025 ctxt->sax, NULL, ctxt->depth,
5026 value, &list);
5027 ctxt->depth--;
5028 } else if (ent->etype ==
5029 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5030 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005031 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005032 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005033 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005034 ctxt->depth--;
5035 } else {
5036 ret = -1;
5037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5038 ctxt->sax->error(ctxt->userData,
5039 "Internal: invalid entity type\n");
5040 }
5041 if (ret == XML_ERR_ENTITY_LOOP) {
5042 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5044 ctxt->sax->error(ctxt->userData,
5045 "Detected entity reference loop\n");
5046 ctxt->wellFormed = 0;
5047 ctxt->disableSAX = 1;
5048 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005049 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5050 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005051 (ent->children == NULL)) {
5052 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005053 if (ctxt->replaceEntities) {
5054 /*
5055 * Prune it directly in the generated document
5056 * except for single text nodes.
5057 */
5058 if ((list->type == XML_TEXT_NODE) &&
5059 (list->next == NULL)) {
5060 list->parent = (xmlNodePtr) ent;
5061 list = NULL;
5062 } else {
5063 while (list != NULL) {
5064 list->parent = (xmlNodePtr) ctxt->node;
5065 if (list->next == NULL)
5066 ent->last = list;
5067 list = list->next;
5068 }
5069 list = ent->children;
5070 }
5071 } else {
5072 while (list != NULL) {
5073 list->parent = (xmlNodePtr) ent;
5074 if (list->next == NULL)
5075 ent->last = list;
5076 list = list->next;
5077 }
Owen Taylor3473f882001-02-23 17:55:21 +00005078 }
5079 } else {
5080 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005081 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005082 }
5083 } else if (ret > 0) {
5084 ctxt->errNo = ret;
5085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5086 ctxt->sax->error(ctxt->userData,
5087 "Entity value required\n");
5088 ctxt->wellFormed = 0;
5089 ctxt->disableSAX = 1;
5090 } else if (list != NULL) {
5091 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005092 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005093 }
5094 }
5095 }
5096 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5097 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5098 /*
5099 * Create a node.
5100 */
5101 ctxt->sax->reference(ctxt->userData, ent->name);
5102 return;
5103 } else if (ctxt->replaceEntities) {
5104 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5105 /*
5106 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005107 * a simple tree copy for all references except the first
5108 * In the first occurence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005109 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005110 if (list == NULL) {
5111 xmlNodePtr new, cur;
5112 cur = ent->children;
5113 while (cur != NULL) {
5114 new = xmlCopyNode(cur, 1);
5115 xmlAddChild(ctxt->node, new);
5116 if (cur == ent->last)
5117 break;
5118 cur = cur->next;
5119 }
5120 } else {
5121 /*
5122 * the name change is to avoid coalescing of the
5123 * node with a prossible previous text one which
5124 * would make ent->children a dandling pointer
5125 */
5126 if (ent->children->type == XML_TEXT_NODE)
5127 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5128 if ((ent->last != ent->children) &&
5129 (ent->last->type == XML_TEXT_NODE))
5130 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5131 xmlAddChildList(ctxt->node, ent->children);
5132 }
5133
Owen Taylor3473f882001-02-23 17:55:21 +00005134 /*
5135 * This is to avoid a nasty side effect, see
5136 * characters() in SAX.c
5137 */
5138 ctxt->nodemem = 0;
5139 ctxt->nodelen = 0;
5140 return;
5141 } else {
5142 /*
5143 * Probably running in SAX mode
5144 */
5145 xmlParserInputPtr input;
5146
5147 input = xmlNewEntityInputStream(ctxt, ent);
5148 xmlPushInput(ctxt, input);
5149 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5150 (RAW == '<') && (NXT(1) == '?') &&
5151 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5152 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5153 xmlParseTextDecl(ctxt);
5154 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5155 /*
5156 * The XML REC instructs us to stop parsing right here
5157 */
5158 ctxt->instate = XML_PARSER_EOF;
5159 return;
5160 }
5161 if (input->standalone == 1) {
5162 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5163 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5164 ctxt->sax->error(ctxt->userData,
5165 "external parsed entities cannot be standalone\n");
5166 ctxt->wellFormed = 0;
5167 ctxt->disableSAX = 1;
5168 }
5169 }
5170 return;
5171 }
5172 }
5173 } else {
5174 val = ent->content;
5175 if (val == NULL) return;
5176 /*
5177 * inline the entity.
5178 */
5179 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5180 (!ctxt->disableSAX))
5181 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5182 }
5183 }
5184}
5185
5186/**
5187 * xmlParseEntityRef:
5188 * @ctxt: an XML parser context
5189 *
5190 * parse ENTITY references declarations
5191 *
5192 * [68] EntityRef ::= '&' Name ';'
5193 *
5194 * [ WFC: Entity Declared ]
5195 * In a document without any DTD, a document with only an internal DTD
5196 * subset which contains no parameter entity references, or a document
5197 * with "standalone='yes'", the Name given in the entity reference
5198 * must match that in an entity declaration, except that well-formed
5199 * documents need not declare any of the following entities: amp, lt,
5200 * gt, apos, quot. The declaration of a parameter entity must precede
5201 * any reference to it. Similarly, the declaration of a general entity
5202 * must precede any reference to it which appears in a default value in an
5203 * attribute-list declaration. Note that if entities are declared in the
5204 * external subset or in external parameter entities, a non-validating
5205 * processor is not obligated to read and process their declarations;
5206 * for such documents, the rule that an entity must be declared is a
5207 * well-formedness constraint only if standalone='yes'.
5208 *
5209 * [ WFC: Parsed Entity ]
5210 * An entity reference must not contain the name of an unparsed entity
5211 *
5212 * Returns the xmlEntityPtr if found, or NULL otherwise.
5213 */
5214xmlEntityPtr
5215xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5216 xmlChar *name;
5217 xmlEntityPtr ent = NULL;
5218
5219 GROW;
5220
5221 if (RAW == '&') {
5222 NEXT;
5223 name = xmlParseName(ctxt);
5224 if (name == NULL) {
5225 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5227 ctxt->sax->error(ctxt->userData,
5228 "xmlParseEntityRef: no name\n");
5229 ctxt->wellFormed = 0;
5230 ctxt->disableSAX = 1;
5231 } else {
5232 if (RAW == ';') {
5233 NEXT;
5234 /*
5235 * Ask first SAX for entity resolution, otherwise try the
5236 * predefined set.
5237 */
5238 if (ctxt->sax != NULL) {
5239 if (ctxt->sax->getEntity != NULL)
5240 ent = ctxt->sax->getEntity(ctxt->userData, name);
5241 if (ent == NULL)
5242 ent = xmlGetPredefinedEntity(name);
5243 }
5244 /*
5245 * [ WFC: Entity Declared ]
5246 * In a document without any DTD, a document with only an
5247 * internal DTD subset which contains no parameter entity
5248 * references, or a document with "standalone='yes'", the
5249 * Name given in the entity reference must match that in an
5250 * entity declaration, except that well-formed documents
5251 * need not declare any of the following entities: amp, lt,
5252 * gt, apos, quot.
5253 * The declaration of a parameter entity must precede any
5254 * reference to it.
5255 * Similarly, the declaration of a general entity must
5256 * precede any reference to it which appears in a default
5257 * value in an attribute-list declaration. Note that if
5258 * entities are declared in the external subset or in
5259 * external parameter entities, a non-validating processor
5260 * is not obligated to read and process their declarations;
5261 * for such documents, the rule that an entity must be
5262 * declared is a well-formedness constraint only if
5263 * standalone='yes'.
5264 */
5265 if (ent == NULL) {
5266 if ((ctxt->standalone == 1) ||
5267 ((ctxt->hasExternalSubset == 0) &&
5268 (ctxt->hasPErefs == 0))) {
5269 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5271 ctxt->sax->error(ctxt->userData,
5272 "Entity '%s' not defined\n", name);
5273 ctxt->wellFormed = 0;
5274 ctxt->disableSAX = 1;
5275 } else {
5276 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5277 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005278 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005279 "Entity '%s' not defined\n", name);
5280 }
5281 }
5282
5283 /*
5284 * [ WFC: Parsed Entity ]
5285 * An entity reference must not contain the name of an
5286 * unparsed entity
5287 */
5288 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5289 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5290 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5291 ctxt->sax->error(ctxt->userData,
5292 "Entity reference to unparsed entity %s\n", name);
5293 ctxt->wellFormed = 0;
5294 ctxt->disableSAX = 1;
5295 }
5296
5297 /*
5298 * [ WFC: No External Entity References ]
5299 * Attribute values cannot contain direct or indirect
5300 * entity references to external entities.
5301 */
5302 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5303 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5304 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5306 ctxt->sax->error(ctxt->userData,
5307 "Attribute references external entity '%s'\n", name);
5308 ctxt->wellFormed = 0;
5309 ctxt->disableSAX = 1;
5310 }
5311 /*
5312 * [ WFC: No < in Attribute Values ]
5313 * The replacement text of any entity referred to directly or
5314 * indirectly in an attribute value (other than "&lt;") must
5315 * not contain a <.
5316 */
5317 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5318 (ent != NULL) &&
5319 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5320 (ent->content != NULL) &&
5321 (xmlStrchr(ent->content, '<'))) {
5322 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5324 ctxt->sax->error(ctxt->userData,
5325 "'<' in entity '%s' is not allowed in attributes values\n", name);
5326 ctxt->wellFormed = 0;
5327 ctxt->disableSAX = 1;
5328 }
5329
5330 /*
5331 * Internal check, no parameter entities here ...
5332 */
5333 else {
5334 switch (ent->etype) {
5335 case XML_INTERNAL_PARAMETER_ENTITY:
5336 case XML_EXTERNAL_PARAMETER_ENTITY:
5337 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5339 ctxt->sax->error(ctxt->userData,
5340 "Attempt to reference the parameter entity '%s'\n", name);
5341 ctxt->wellFormed = 0;
5342 ctxt->disableSAX = 1;
5343 break;
5344 default:
5345 break;
5346 }
5347 }
5348
5349 /*
5350 * [ WFC: No Recursion ]
5351 * A parsed entity must not contain a recursive reference
5352 * to itself, either directly or indirectly.
5353 * Done somewhere else
5354 */
5355
5356 } else {
5357 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5359 ctxt->sax->error(ctxt->userData,
5360 "xmlParseEntityRef: expecting ';'\n");
5361 ctxt->wellFormed = 0;
5362 ctxt->disableSAX = 1;
5363 }
5364 xmlFree(name);
5365 }
5366 }
5367 return(ent);
5368}
5369
5370/**
5371 * xmlParseStringEntityRef:
5372 * @ctxt: an XML parser context
5373 * @str: a pointer to an index in the string
5374 *
5375 * parse ENTITY references declarations, but this version parses it from
5376 * a string value.
5377 *
5378 * [68] EntityRef ::= '&' Name ';'
5379 *
5380 * [ WFC: Entity Declared ]
5381 * In a document without any DTD, a document with only an internal DTD
5382 * subset which contains no parameter entity references, or a document
5383 * with "standalone='yes'", the Name given in the entity reference
5384 * must match that in an entity declaration, except that well-formed
5385 * documents need not declare any of the following entities: amp, lt,
5386 * gt, apos, quot. The declaration of a parameter entity must precede
5387 * any reference to it. Similarly, the declaration of a general entity
5388 * must precede any reference to it which appears in a default value in an
5389 * attribute-list declaration. Note that if entities are declared in the
5390 * external subset or in external parameter entities, a non-validating
5391 * processor is not obligated to read and process their declarations;
5392 * for such documents, the rule that an entity must be declared is a
5393 * well-formedness constraint only if standalone='yes'.
5394 *
5395 * [ WFC: Parsed Entity ]
5396 * An entity reference must not contain the name of an unparsed entity
5397 *
5398 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5399 * is updated to the current location in the string.
5400 */
5401xmlEntityPtr
5402xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5403 xmlChar *name;
5404 const xmlChar *ptr;
5405 xmlChar cur;
5406 xmlEntityPtr ent = NULL;
5407
5408 if ((str == NULL) || (*str == NULL))
5409 return(NULL);
5410 ptr = *str;
5411 cur = *ptr;
5412 if (cur == '&') {
5413 ptr++;
5414 cur = *ptr;
5415 name = xmlParseStringName(ctxt, &ptr);
5416 if (name == NULL) {
5417 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5418 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5419 ctxt->sax->error(ctxt->userData,
5420 "xmlParseEntityRef: no name\n");
5421 ctxt->wellFormed = 0;
5422 ctxt->disableSAX = 1;
5423 } else {
5424 if (*ptr == ';') {
5425 ptr++;
5426 /*
5427 * Ask first SAX for entity resolution, otherwise try the
5428 * predefined set.
5429 */
5430 if (ctxt->sax != NULL) {
5431 if (ctxt->sax->getEntity != NULL)
5432 ent = ctxt->sax->getEntity(ctxt->userData, name);
5433 if (ent == NULL)
5434 ent = xmlGetPredefinedEntity(name);
5435 }
5436 /*
5437 * [ WFC: Entity Declared ]
5438 * In a document without any DTD, a document with only an
5439 * internal DTD subset which contains no parameter entity
5440 * references, or a document with "standalone='yes'", the
5441 * Name given in the entity reference must match that in an
5442 * entity declaration, except that well-formed documents
5443 * need not declare any of the following entities: amp, lt,
5444 * gt, apos, quot.
5445 * The declaration of a parameter entity must precede any
5446 * reference to it.
5447 * Similarly, the declaration of a general entity must
5448 * precede any reference to it which appears in a default
5449 * value in an attribute-list declaration. Note that if
5450 * entities are declared in the external subset or in
5451 * external parameter entities, a non-validating processor
5452 * is not obligated to read and process their declarations;
5453 * for such documents, the rule that an entity must be
5454 * declared is a well-formedness constraint only if
5455 * standalone='yes'.
5456 */
5457 if (ent == NULL) {
5458 if ((ctxt->standalone == 1) ||
5459 ((ctxt->hasExternalSubset == 0) &&
5460 (ctxt->hasPErefs == 0))) {
5461 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5463 ctxt->sax->error(ctxt->userData,
5464 "Entity '%s' not defined\n", name);
5465 ctxt->wellFormed = 0;
5466 ctxt->disableSAX = 1;
5467 } else {
5468 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5469 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5470 ctxt->sax->warning(ctxt->userData,
5471 "Entity '%s' not defined\n", name);
5472 }
5473 }
5474
5475 /*
5476 * [ WFC: Parsed Entity ]
5477 * An entity reference must not contain the name of an
5478 * unparsed entity
5479 */
5480 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5481 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5483 ctxt->sax->error(ctxt->userData,
5484 "Entity reference to unparsed entity %s\n", name);
5485 ctxt->wellFormed = 0;
5486 ctxt->disableSAX = 1;
5487 }
5488
5489 /*
5490 * [ WFC: No External Entity References ]
5491 * Attribute values cannot contain direct or indirect
5492 * entity references to external entities.
5493 */
5494 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5495 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5496 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5498 ctxt->sax->error(ctxt->userData,
5499 "Attribute references external entity '%s'\n", name);
5500 ctxt->wellFormed = 0;
5501 ctxt->disableSAX = 1;
5502 }
5503 /*
5504 * [ WFC: No < in Attribute Values ]
5505 * The replacement text of any entity referred to directly or
5506 * indirectly in an attribute value (other than "&lt;") must
5507 * not contain a <.
5508 */
5509 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5510 (ent != NULL) &&
5511 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5512 (ent->content != NULL) &&
5513 (xmlStrchr(ent->content, '<'))) {
5514 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5516 ctxt->sax->error(ctxt->userData,
5517 "'<' in entity '%s' is not allowed in attributes values\n", name);
5518 ctxt->wellFormed = 0;
5519 ctxt->disableSAX = 1;
5520 }
5521
5522 /*
5523 * Internal check, no parameter entities here ...
5524 */
5525 else {
5526 switch (ent->etype) {
5527 case XML_INTERNAL_PARAMETER_ENTITY:
5528 case XML_EXTERNAL_PARAMETER_ENTITY:
5529 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5531 ctxt->sax->error(ctxt->userData,
5532 "Attempt to reference the parameter entity '%s'\n", name);
5533 ctxt->wellFormed = 0;
5534 ctxt->disableSAX = 1;
5535 break;
5536 default:
5537 break;
5538 }
5539 }
5540
5541 /*
5542 * [ WFC: No Recursion ]
5543 * A parsed entity must not contain a recursive reference
5544 * to itself, either directly or indirectly.
5545 * Done somewhwere else
5546 */
5547
5548 } else {
5549 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5551 ctxt->sax->error(ctxt->userData,
5552 "xmlParseEntityRef: expecting ';'\n");
5553 ctxt->wellFormed = 0;
5554 ctxt->disableSAX = 1;
5555 }
5556 xmlFree(name);
5557 }
5558 }
5559 *str = ptr;
5560 return(ent);
5561}
5562
5563/**
5564 * xmlParsePEReference:
5565 * @ctxt: an XML parser context
5566 *
5567 * parse PEReference declarations
5568 * The entity content is handled directly by pushing it's content as
5569 * a new input stream.
5570 *
5571 * [69] PEReference ::= '%' Name ';'
5572 *
5573 * [ WFC: No Recursion ]
5574 * A parsed entity must not contain a recursive
5575 * reference to itself, either directly or indirectly.
5576 *
5577 * [ WFC: Entity Declared ]
5578 * In a document without any DTD, a document with only an internal DTD
5579 * subset which contains no parameter entity references, or a document
5580 * with "standalone='yes'", ... ... The declaration of a parameter
5581 * entity must precede any reference to it...
5582 *
5583 * [ VC: Entity Declared ]
5584 * In a document with an external subset or external parameter entities
5585 * with "standalone='no'", ... ... The declaration of a parameter entity
5586 * must precede any reference to it...
5587 *
5588 * [ WFC: In DTD ]
5589 * Parameter-entity references may only appear in the DTD.
5590 * NOTE: misleading but this is handled.
5591 */
5592void
5593xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5594 xmlChar *name;
5595 xmlEntityPtr entity = NULL;
5596 xmlParserInputPtr input;
5597
5598 if (RAW == '%') {
5599 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005600 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005601 if (name == NULL) {
5602 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5604 ctxt->sax->error(ctxt->userData,
5605 "xmlParsePEReference: no name\n");
5606 ctxt->wellFormed = 0;
5607 ctxt->disableSAX = 1;
5608 } else {
5609 if (RAW == ';') {
5610 NEXT;
5611 if ((ctxt->sax != NULL) &&
5612 (ctxt->sax->getParameterEntity != NULL))
5613 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5614 name);
5615 if (entity == NULL) {
5616 /*
5617 * [ WFC: Entity Declared ]
5618 * In a document without any DTD, a document with only an
5619 * internal DTD subset which contains no parameter entity
5620 * references, or a document with "standalone='yes'", ...
5621 * ... The declaration of a parameter entity must precede
5622 * any reference to it...
5623 */
5624 if ((ctxt->standalone == 1) ||
5625 ((ctxt->hasExternalSubset == 0) &&
5626 (ctxt->hasPErefs == 0))) {
5627 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5628 if ((!ctxt->disableSAX) &&
5629 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5630 ctxt->sax->error(ctxt->userData,
5631 "PEReference: %%%s; not found\n", name);
5632 ctxt->wellFormed = 0;
5633 ctxt->disableSAX = 1;
5634 } else {
5635 /*
5636 * [ VC: Entity Declared ]
5637 * In a document with an external subset or external
5638 * parameter entities with "standalone='no'", ...
5639 * ... The declaration of a parameter entity must precede
5640 * any reference to it...
5641 */
5642 if ((!ctxt->disableSAX) &&
5643 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5644 ctxt->sax->warning(ctxt->userData,
5645 "PEReference: %%%s; not found\n", name);
5646 ctxt->valid = 0;
5647 }
5648 } else {
5649 /*
5650 * Internal checking in case the entity quest barfed
5651 */
5652 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5653 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5654 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5655 ctxt->sax->warning(ctxt->userData,
5656 "Internal: %%%s; is not a parameter entity\n", name);
5657 } else {
5658 /*
5659 * TODO !!!
5660 * handle the extra spaces added before and after
5661 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5662 */
5663 input = xmlNewEntityInputStream(ctxt, entity);
5664 xmlPushInput(ctxt, input);
5665 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5666 (RAW == '<') && (NXT(1) == '?') &&
5667 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5668 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5669 xmlParseTextDecl(ctxt);
5670 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5671 /*
5672 * The XML REC instructs us to stop parsing
5673 * right here
5674 */
5675 ctxt->instate = XML_PARSER_EOF;
5676 xmlFree(name);
5677 return;
5678 }
5679 }
5680 if (ctxt->token == 0)
5681 ctxt->token = ' ';
5682 }
5683 }
5684 ctxt->hasPErefs = 1;
5685 } else {
5686 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5688 ctxt->sax->error(ctxt->userData,
5689 "xmlParsePEReference: expecting ';'\n");
5690 ctxt->wellFormed = 0;
5691 ctxt->disableSAX = 1;
5692 }
5693 xmlFree(name);
5694 }
5695 }
5696}
5697
5698/**
5699 * xmlParseStringPEReference:
5700 * @ctxt: an XML parser context
5701 * @str: a pointer to an index in the string
5702 *
5703 * parse PEReference declarations
5704 *
5705 * [69] PEReference ::= '%' Name ';'
5706 *
5707 * [ WFC: No Recursion ]
5708 * A parsed entity must not contain a recursive
5709 * reference to itself, either directly or indirectly.
5710 *
5711 * [ WFC: Entity Declared ]
5712 * In a document without any DTD, a document with only an internal DTD
5713 * subset which contains no parameter entity references, or a document
5714 * with "standalone='yes'", ... ... The declaration of a parameter
5715 * entity must precede any reference to it...
5716 *
5717 * [ VC: Entity Declared ]
5718 * In a document with an external subset or external parameter entities
5719 * with "standalone='no'", ... ... The declaration of a parameter entity
5720 * must precede any reference to it...
5721 *
5722 * [ WFC: In DTD ]
5723 * Parameter-entity references may only appear in the DTD.
5724 * NOTE: misleading but this is handled.
5725 *
5726 * Returns the string of the entity content.
5727 * str is updated to the current value of the index
5728 */
5729xmlEntityPtr
5730xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5731 const xmlChar *ptr;
5732 xmlChar cur;
5733 xmlChar *name;
5734 xmlEntityPtr entity = NULL;
5735
5736 if ((str == NULL) || (*str == NULL)) return(NULL);
5737 ptr = *str;
5738 cur = *ptr;
5739 if (cur == '%') {
5740 ptr++;
5741 cur = *ptr;
5742 name = xmlParseStringName(ctxt, &ptr);
5743 if (name == NULL) {
5744 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5746 ctxt->sax->error(ctxt->userData,
5747 "xmlParseStringPEReference: no name\n");
5748 ctxt->wellFormed = 0;
5749 ctxt->disableSAX = 1;
5750 } else {
5751 cur = *ptr;
5752 if (cur == ';') {
5753 ptr++;
5754 cur = *ptr;
5755 if ((ctxt->sax != NULL) &&
5756 (ctxt->sax->getParameterEntity != NULL))
5757 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5758 name);
5759 if (entity == NULL) {
5760 /*
5761 * [ WFC: Entity Declared ]
5762 * In a document without any DTD, a document with only an
5763 * internal DTD subset which contains no parameter entity
5764 * references, or a document with "standalone='yes'", ...
5765 * ... The declaration of a parameter entity must precede
5766 * any reference to it...
5767 */
5768 if ((ctxt->standalone == 1) ||
5769 ((ctxt->hasExternalSubset == 0) &&
5770 (ctxt->hasPErefs == 0))) {
5771 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5773 ctxt->sax->error(ctxt->userData,
5774 "PEReference: %%%s; not found\n", name);
5775 ctxt->wellFormed = 0;
5776 ctxt->disableSAX = 1;
5777 } else {
5778 /*
5779 * [ VC: Entity Declared ]
5780 * In a document with an external subset or external
5781 * parameter entities with "standalone='no'", ...
5782 * ... The declaration of a parameter entity must
5783 * precede any reference to it...
5784 */
5785 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5786 ctxt->sax->warning(ctxt->userData,
5787 "PEReference: %%%s; not found\n", name);
5788 ctxt->valid = 0;
5789 }
5790 } else {
5791 /*
5792 * Internal checking in case the entity quest barfed
5793 */
5794 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5795 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5796 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5797 ctxt->sax->warning(ctxt->userData,
5798 "Internal: %%%s; is not a parameter entity\n", name);
5799 }
5800 }
5801 ctxt->hasPErefs = 1;
5802 } else {
5803 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5804 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5805 ctxt->sax->error(ctxt->userData,
5806 "xmlParseStringPEReference: expecting ';'\n");
5807 ctxt->wellFormed = 0;
5808 ctxt->disableSAX = 1;
5809 }
5810 xmlFree(name);
5811 }
5812 }
5813 *str = ptr;
5814 return(entity);
5815}
5816
5817/**
5818 * xmlParseDocTypeDecl:
5819 * @ctxt: an XML parser context
5820 *
5821 * parse a DOCTYPE declaration
5822 *
5823 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5824 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5825 *
5826 * [ VC: Root Element Type ]
5827 * The Name in the document type declaration must match the element
5828 * type of the root element.
5829 */
5830
5831void
5832xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5833 xmlChar *name = NULL;
5834 xmlChar *ExternalID = NULL;
5835 xmlChar *URI = NULL;
5836
5837 /*
5838 * We know that '<!DOCTYPE' has been detected.
5839 */
5840 SKIP(9);
5841
5842 SKIP_BLANKS;
5843
5844 /*
5845 * Parse the DOCTYPE name.
5846 */
5847 name = xmlParseName(ctxt);
5848 if (name == NULL) {
5849 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5851 ctxt->sax->error(ctxt->userData,
5852 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5853 ctxt->wellFormed = 0;
5854 ctxt->disableSAX = 1;
5855 }
5856 ctxt->intSubName = name;
5857
5858 SKIP_BLANKS;
5859
5860 /*
5861 * Check for SystemID and ExternalID
5862 */
5863 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5864
5865 if ((URI != NULL) || (ExternalID != NULL)) {
5866 ctxt->hasExternalSubset = 1;
5867 }
5868 ctxt->extSubURI = URI;
5869 ctxt->extSubSystem = ExternalID;
5870
5871 SKIP_BLANKS;
5872
5873 /*
5874 * Create and update the internal subset.
5875 */
5876 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5877 (!ctxt->disableSAX))
5878 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5879
5880 /*
5881 * Is there any internal subset declarations ?
5882 * they are handled separately in xmlParseInternalSubset()
5883 */
5884 if (RAW == '[')
5885 return;
5886
5887 /*
5888 * We should be at the end of the DOCTYPE declaration.
5889 */
5890 if (RAW != '>') {
5891 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5893 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5894 ctxt->wellFormed = 0;
5895 ctxt->disableSAX = 1;
5896 }
5897 NEXT;
5898}
5899
5900/**
5901 * xmlParseInternalsubset:
5902 * @ctxt: an XML parser context
5903 *
5904 * parse the internal subset declaration
5905 *
5906 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5907 */
5908
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005909static void
Owen Taylor3473f882001-02-23 17:55:21 +00005910xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5911 /*
5912 * Is there any DTD definition ?
5913 */
5914 if (RAW == '[') {
5915 ctxt->instate = XML_PARSER_DTD;
5916 NEXT;
5917 /*
5918 * Parse the succession of Markup declarations and
5919 * PEReferences.
5920 * Subsequence (markupdecl | PEReference | S)*
5921 */
5922 while (RAW != ']') {
5923 const xmlChar *check = CUR_PTR;
5924 int cons = ctxt->input->consumed;
5925
5926 SKIP_BLANKS;
5927 xmlParseMarkupDecl(ctxt);
5928 xmlParsePEReference(ctxt);
5929
5930 /*
5931 * Pop-up of finished entities.
5932 */
5933 while ((RAW == 0) && (ctxt->inputNr > 1))
5934 xmlPopInput(ctxt);
5935
5936 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5937 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5939 ctxt->sax->error(ctxt->userData,
5940 "xmlParseInternalSubset: error detected in Markup declaration\n");
5941 ctxt->wellFormed = 0;
5942 ctxt->disableSAX = 1;
5943 break;
5944 }
5945 }
5946 if (RAW == ']') {
5947 NEXT;
5948 SKIP_BLANKS;
5949 }
5950 }
5951
5952 /*
5953 * We should be at the end of the DOCTYPE declaration.
5954 */
5955 if (RAW != '>') {
5956 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5957 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5958 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5959 ctxt->wellFormed = 0;
5960 ctxt->disableSAX = 1;
5961 }
5962 NEXT;
5963}
5964
5965/**
5966 * xmlParseAttribute:
5967 * @ctxt: an XML parser context
5968 * @value: a xmlChar ** used to store the value of the attribute
5969 *
5970 * parse an attribute
5971 *
5972 * [41] Attribute ::= Name Eq AttValue
5973 *
5974 * [ WFC: No External Entity References ]
5975 * Attribute values cannot contain direct or indirect entity references
5976 * to external entities.
5977 *
5978 * [ WFC: No < in Attribute Values ]
5979 * The replacement text of any entity referred to directly or indirectly in
5980 * an attribute value (other than "&lt;") must not contain a <.
5981 *
5982 * [ VC: Attribute Value Type ]
5983 * The attribute must have been declared; the value must be of the type
5984 * declared for it.
5985 *
5986 * [25] Eq ::= S? '=' S?
5987 *
5988 * With namespace:
5989 *
5990 * [NS 11] Attribute ::= QName Eq AttValue
5991 *
5992 * Also the case QName == xmlns:??? is handled independently as a namespace
5993 * definition.
5994 *
5995 * Returns the attribute name, and the value in *value.
5996 */
5997
5998xmlChar *
5999xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6000 xmlChar *name, *val;
6001
6002 *value = NULL;
6003 name = xmlParseName(ctxt);
6004 if (name == NULL) {
6005 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6007 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6008 ctxt->wellFormed = 0;
6009 ctxt->disableSAX = 1;
6010 return(NULL);
6011 }
6012
6013 /*
6014 * read the value
6015 */
6016 SKIP_BLANKS;
6017 if (RAW == '=') {
6018 NEXT;
6019 SKIP_BLANKS;
6020 val = xmlParseAttValue(ctxt);
6021 ctxt->instate = XML_PARSER_CONTENT;
6022 } else {
6023 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6025 ctxt->sax->error(ctxt->userData,
6026 "Specification mandate value for attribute %s\n", name);
6027 ctxt->wellFormed = 0;
6028 ctxt->disableSAX = 1;
6029 xmlFree(name);
6030 return(NULL);
6031 }
6032
6033 /*
6034 * Check that xml:lang conforms to the specification
6035 * No more registered as an error, just generate a warning now
6036 * since this was deprecated in XML second edition
6037 */
6038 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6039 if (!xmlCheckLanguageID(val)) {
6040 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6041 ctxt->sax->warning(ctxt->userData,
6042 "Malformed value for xml:lang : %s\n", val);
6043 }
6044 }
6045
6046 /*
6047 * Check that xml:space conforms to the specification
6048 */
6049 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6050 if (xmlStrEqual(val, BAD_CAST "default"))
6051 *(ctxt->space) = 0;
6052 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6053 *(ctxt->space) = 1;
6054 else {
6055 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6057 ctxt->sax->error(ctxt->userData,
6058"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6059 val);
6060 ctxt->wellFormed = 0;
6061 ctxt->disableSAX = 1;
6062 }
6063 }
6064
6065 *value = val;
6066 return(name);
6067}
6068
6069/**
6070 * xmlParseStartTag:
6071 * @ctxt: an XML parser context
6072 *
6073 * parse a start of tag either for rule element or
6074 * EmptyElement. In both case we don't parse the tag closing chars.
6075 *
6076 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6077 *
6078 * [ WFC: Unique Att Spec ]
6079 * No attribute name may appear more than once in the same start-tag or
6080 * empty-element tag.
6081 *
6082 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6083 *
6084 * [ WFC: Unique Att Spec ]
6085 * No attribute name may appear more than once in the same start-tag or
6086 * empty-element tag.
6087 *
6088 * With namespace:
6089 *
6090 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6091 *
6092 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6093 *
6094 * Returns the element name parsed
6095 */
6096
6097xmlChar *
6098xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6099 xmlChar *name;
6100 xmlChar *attname;
6101 xmlChar *attvalue;
6102 const xmlChar **atts = NULL;
6103 int nbatts = 0;
6104 int maxatts = 0;
6105 int i;
6106
6107 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006108 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006109
6110 name = xmlParseName(ctxt);
6111 if (name == NULL) {
6112 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6114 ctxt->sax->error(ctxt->userData,
6115 "xmlParseStartTag: invalid element name\n");
6116 ctxt->wellFormed = 0;
6117 ctxt->disableSAX = 1;
6118 return(NULL);
6119 }
6120
6121 /*
6122 * Now parse the attributes, it ends up with the ending
6123 *
6124 * (S Attribute)* S?
6125 */
6126 SKIP_BLANKS;
6127 GROW;
6128
Daniel Veillard21a0f912001-02-25 19:54:14 +00006129 while ((RAW != '>') &&
6130 ((RAW != '/') || (NXT(1) != '>')) &&
6131 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006132 const xmlChar *q = CUR_PTR;
6133 int cons = ctxt->input->consumed;
6134
6135 attname = xmlParseAttribute(ctxt, &attvalue);
6136 if ((attname != NULL) && (attvalue != NULL)) {
6137 /*
6138 * [ WFC: Unique Att Spec ]
6139 * No attribute name may appear more than once in the same
6140 * start-tag or empty-element tag.
6141 */
6142 for (i = 0; i < nbatts;i += 2) {
6143 if (xmlStrEqual(atts[i], attname)) {
6144 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6145 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6146 ctxt->sax->error(ctxt->userData,
6147 "Attribute %s redefined\n",
6148 attname);
6149 ctxt->wellFormed = 0;
6150 ctxt->disableSAX = 1;
6151 xmlFree(attname);
6152 xmlFree(attvalue);
6153 goto failed;
6154 }
6155 }
6156
6157 /*
6158 * Add the pair to atts
6159 */
6160 if (atts == NULL) {
6161 maxatts = 10;
6162 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6163 if (atts == NULL) {
6164 xmlGenericError(xmlGenericErrorContext,
6165 "malloc of %ld byte failed\n",
6166 maxatts * (long)sizeof(xmlChar *));
6167 return(NULL);
6168 }
6169 } else if (nbatts + 4 > maxatts) {
6170 maxatts *= 2;
6171 atts = (const xmlChar **) xmlRealloc((void *) atts,
6172 maxatts * sizeof(xmlChar *));
6173 if (atts == NULL) {
6174 xmlGenericError(xmlGenericErrorContext,
6175 "realloc of %ld byte failed\n",
6176 maxatts * (long)sizeof(xmlChar *));
6177 return(NULL);
6178 }
6179 }
6180 atts[nbatts++] = attname;
6181 atts[nbatts++] = attvalue;
6182 atts[nbatts] = NULL;
6183 atts[nbatts + 1] = NULL;
6184 } else {
6185 if (attname != NULL)
6186 xmlFree(attname);
6187 if (attvalue != NULL)
6188 xmlFree(attvalue);
6189 }
6190
6191failed:
6192
6193 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6194 break;
6195 if (!IS_BLANK(RAW)) {
6196 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6198 ctxt->sax->error(ctxt->userData,
6199 "attributes construct error\n");
6200 ctxt->wellFormed = 0;
6201 ctxt->disableSAX = 1;
6202 }
6203 SKIP_BLANKS;
6204 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6205 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6207 ctxt->sax->error(ctxt->userData,
6208 "xmlParseStartTag: problem parsing attributes\n");
6209 ctxt->wellFormed = 0;
6210 ctxt->disableSAX = 1;
6211 break;
6212 }
6213 GROW;
6214 }
6215
6216 /*
6217 * SAX: Start of Element !
6218 */
6219 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6220 (!ctxt->disableSAX))
6221 ctxt->sax->startElement(ctxt->userData, name, atts);
6222
6223 if (atts != NULL) {
6224 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6225 xmlFree((void *) atts);
6226 }
6227 return(name);
6228}
6229
6230/**
6231 * xmlParseEndTag:
6232 * @ctxt: an XML parser context
6233 *
6234 * parse an end of tag
6235 *
6236 * [42] ETag ::= '</' Name S? '>'
6237 *
6238 * With namespace
6239 *
6240 * [NS 9] ETag ::= '</' QName S? '>'
6241 */
6242
6243void
6244xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6245 xmlChar *name;
6246 xmlChar *oldname;
6247
6248 GROW;
6249 if ((RAW != '<') || (NXT(1) != '/')) {
6250 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6251 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6252 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6253 ctxt->wellFormed = 0;
6254 ctxt->disableSAX = 1;
6255 return;
6256 }
6257 SKIP(2);
6258
6259 name = xmlParseName(ctxt);
6260
6261 /*
6262 * We should definitely be at the ending "S? '>'" part
6263 */
6264 GROW;
6265 SKIP_BLANKS;
6266 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6267 ctxt->errNo = XML_ERR_GT_REQUIRED;
6268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6269 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6270 ctxt->wellFormed = 0;
6271 ctxt->disableSAX = 1;
6272 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006273 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006274
6275 /*
6276 * [ WFC: Element Type Match ]
6277 * The Name in an element's end-tag must match the element type in the
6278 * start-tag.
6279 *
6280 */
6281 if ((name == NULL) || (ctxt->name == NULL) ||
6282 (!xmlStrEqual(name, ctxt->name))) {
6283 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6285 if ((name != NULL) && (ctxt->name != NULL)) {
6286 ctxt->sax->error(ctxt->userData,
6287 "Opening and ending tag mismatch: %s and %s\n",
6288 ctxt->name, name);
6289 } else if (ctxt->name != NULL) {
6290 ctxt->sax->error(ctxt->userData,
6291 "Ending tag eror for: %s\n", ctxt->name);
6292 } else {
6293 ctxt->sax->error(ctxt->userData,
6294 "Ending tag error: internal error ???\n");
6295 }
6296
6297 }
6298 ctxt->wellFormed = 0;
6299 ctxt->disableSAX = 1;
6300 }
6301
6302 /*
6303 * SAX: End of Tag
6304 */
6305 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6306 (!ctxt->disableSAX))
6307 ctxt->sax->endElement(ctxt->userData, name);
6308
6309 if (name != NULL)
6310 xmlFree(name);
6311 oldname = namePop(ctxt);
6312 spacePop(ctxt);
6313 if (oldname != NULL) {
6314#ifdef DEBUG_STACK
6315 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6316#endif
6317 xmlFree(oldname);
6318 }
6319 return;
6320}
6321
6322/**
6323 * xmlParseCDSect:
6324 * @ctxt: an XML parser context
6325 *
6326 * Parse escaped pure raw content.
6327 *
6328 * [18] CDSect ::= CDStart CData CDEnd
6329 *
6330 * [19] CDStart ::= '<![CDATA['
6331 *
6332 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6333 *
6334 * [21] CDEnd ::= ']]>'
6335 */
6336void
6337xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6338 xmlChar *buf = NULL;
6339 int len = 0;
6340 int size = XML_PARSER_BUFFER_SIZE;
6341 int r, rl;
6342 int s, sl;
6343 int cur, l;
6344 int count = 0;
6345
6346 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6347 (NXT(2) == '[') && (NXT(3) == 'C') &&
6348 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6349 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6350 (NXT(8) == '[')) {
6351 SKIP(9);
6352 } else
6353 return;
6354
6355 ctxt->instate = XML_PARSER_CDATA_SECTION;
6356 r = CUR_CHAR(rl);
6357 if (!IS_CHAR(r)) {
6358 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6360 ctxt->sax->error(ctxt->userData,
6361 "CData section not finished\n");
6362 ctxt->wellFormed = 0;
6363 ctxt->disableSAX = 1;
6364 ctxt->instate = XML_PARSER_CONTENT;
6365 return;
6366 }
6367 NEXTL(rl);
6368 s = CUR_CHAR(sl);
6369 if (!IS_CHAR(s)) {
6370 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6372 ctxt->sax->error(ctxt->userData,
6373 "CData section not finished\n");
6374 ctxt->wellFormed = 0;
6375 ctxt->disableSAX = 1;
6376 ctxt->instate = XML_PARSER_CONTENT;
6377 return;
6378 }
6379 NEXTL(sl);
6380 cur = CUR_CHAR(l);
6381 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6382 if (buf == NULL) {
6383 xmlGenericError(xmlGenericErrorContext,
6384 "malloc of %d byte failed\n", size);
6385 return;
6386 }
6387 while (IS_CHAR(cur) &&
6388 ((r != ']') || (s != ']') || (cur != '>'))) {
6389 if (len + 5 >= size) {
6390 size *= 2;
6391 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6392 if (buf == NULL) {
6393 xmlGenericError(xmlGenericErrorContext,
6394 "realloc of %d byte failed\n", size);
6395 return;
6396 }
6397 }
6398 COPY_BUF(rl,buf,len,r);
6399 r = s;
6400 rl = sl;
6401 s = cur;
6402 sl = l;
6403 count++;
6404 if (count > 50) {
6405 GROW;
6406 count = 0;
6407 }
6408 NEXTL(l);
6409 cur = CUR_CHAR(l);
6410 }
6411 buf[len] = 0;
6412 ctxt->instate = XML_PARSER_CONTENT;
6413 if (cur != '>') {
6414 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6416 ctxt->sax->error(ctxt->userData,
6417 "CData section not finished\n%.50s\n", buf);
6418 ctxt->wellFormed = 0;
6419 ctxt->disableSAX = 1;
6420 xmlFree(buf);
6421 return;
6422 }
6423 NEXTL(l);
6424
6425 /*
6426 * Ok the buffer is to be consumed as cdata.
6427 */
6428 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6429 if (ctxt->sax->cdataBlock != NULL)
6430 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6431 }
6432 xmlFree(buf);
6433}
6434
6435/**
6436 * xmlParseContent:
6437 * @ctxt: an XML parser context
6438 *
6439 * Parse a content:
6440 *
6441 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6442 */
6443
6444void
6445xmlParseContent(xmlParserCtxtPtr ctxt) {
6446 GROW;
6447 while (((RAW != 0) || (ctxt->token != 0)) &&
6448 ((RAW != '<') || (NXT(1) != '/'))) {
6449 const xmlChar *test = CUR_PTR;
6450 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006451 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006452 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006453
6454 /*
6455 * Handle possible processed charrefs.
6456 */
6457 if (ctxt->token != 0) {
6458 xmlParseCharData(ctxt, 0);
6459 }
6460 /*
6461 * First case : a Processing Instruction.
6462 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006463 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006464 xmlParsePI(ctxt);
6465 }
6466
6467 /*
6468 * Second case : a CDSection
6469 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006470 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006471 (NXT(2) == '[') && (NXT(3) == 'C') &&
6472 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6473 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6474 (NXT(8) == '[')) {
6475 xmlParseCDSect(ctxt);
6476 }
6477
6478 /*
6479 * Third case : a comment
6480 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006481 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006482 (NXT(2) == '-') && (NXT(3) == '-')) {
6483 xmlParseComment(ctxt);
6484 ctxt->instate = XML_PARSER_CONTENT;
6485 }
6486
6487 /*
6488 * Fourth case : a sub-element.
6489 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006490 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006491 xmlParseElement(ctxt);
6492 }
6493
6494 /*
6495 * Fifth case : a reference. If if has not been resolved,
6496 * parsing returns it's Name, create the node
6497 */
6498
Daniel Veillard21a0f912001-02-25 19:54:14 +00006499 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006500 xmlParseReference(ctxt);
6501 }
6502
6503 /*
6504 * Last case, text. Note that References are handled directly.
6505 */
6506 else {
6507 xmlParseCharData(ctxt, 0);
6508 }
6509
6510 GROW;
6511 /*
6512 * Pop-up of finished entities.
6513 */
6514 while ((RAW == 0) && (ctxt->inputNr > 1))
6515 xmlPopInput(ctxt);
6516 SHRINK;
6517
6518 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6519 (tok == ctxt->token)) {
6520 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6522 ctxt->sax->error(ctxt->userData,
6523 "detected an error in element content\n");
6524 ctxt->wellFormed = 0;
6525 ctxt->disableSAX = 1;
6526 ctxt->instate = XML_PARSER_EOF;
6527 break;
6528 }
6529 }
6530}
6531
6532/**
6533 * xmlParseElement:
6534 * @ctxt: an XML parser context
6535 *
6536 * parse an XML element, this is highly recursive
6537 *
6538 * [39] element ::= EmptyElemTag | STag content ETag
6539 *
6540 * [ WFC: Element Type Match ]
6541 * The Name in an element's end-tag must match the element type in the
6542 * start-tag.
6543 *
6544 * [ VC: Element Valid ]
6545 * An element is valid if there is a declaration matching elementdecl
6546 * where the Name matches the element type and one of the following holds:
6547 * - The declaration matches EMPTY and the element has no content.
6548 * - The declaration matches children and the sequence of child elements
6549 * belongs to the language generated by the regular expression in the
6550 * content model, with optional white space (characters matching the
6551 * nonterminal S) between each pair of child elements.
6552 * - The declaration matches Mixed and the content consists of character
6553 * data and child elements whose types match names in the content model.
6554 * - The declaration matches ANY, and the types of any child elements have
6555 * been declared.
6556 */
6557
6558void
6559xmlParseElement(xmlParserCtxtPtr ctxt) {
6560 const xmlChar *openTag = CUR_PTR;
6561 xmlChar *name;
6562 xmlChar *oldname;
6563 xmlParserNodeInfo node_info;
6564 xmlNodePtr ret;
6565
6566 /* Capture start position */
6567 if (ctxt->record_info) {
6568 node_info.begin_pos = ctxt->input->consumed +
6569 (CUR_PTR - ctxt->input->base);
6570 node_info.begin_line = ctxt->input->line;
6571 }
6572
6573 if (ctxt->spaceNr == 0)
6574 spacePush(ctxt, -1);
6575 else
6576 spacePush(ctxt, *ctxt->space);
6577
6578 name = xmlParseStartTag(ctxt);
6579 if (name == NULL) {
6580 spacePop(ctxt);
6581 return;
6582 }
6583 namePush(ctxt, name);
6584 ret = ctxt->node;
6585
6586 /*
6587 * [ VC: Root Element Type ]
6588 * The Name in the document type declaration must match the element
6589 * type of the root element.
6590 */
6591 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6592 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6593 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6594
6595 /*
6596 * Check for an Empty Element.
6597 */
6598 if ((RAW == '/') && (NXT(1) == '>')) {
6599 SKIP(2);
6600 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6601 (!ctxt->disableSAX))
6602 ctxt->sax->endElement(ctxt->userData, name);
6603 oldname = namePop(ctxt);
6604 spacePop(ctxt);
6605 if (oldname != NULL) {
6606#ifdef DEBUG_STACK
6607 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6608#endif
6609 xmlFree(oldname);
6610 }
6611 if ( ret != NULL && ctxt->record_info ) {
6612 node_info.end_pos = ctxt->input->consumed +
6613 (CUR_PTR - ctxt->input->base);
6614 node_info.end_line = ctxt->input->line;
6615 node_info.node = ret;
6616 xmlParserAddNodeInfo(ctxt, &node_info);
6617 }
6618 return;
6619 }
6620 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006621 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006622 } else {
6623 ctxt->errNo = XML_ERR_GT_REQUIRED;
6624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6625 ctxt->sax->error(ctxt->userData,
6626 "Couldn't find end of Start Tag\n%.30s\n",
6627 openTag);
6628 ctxt->wellFormed = 0;
6629 ctxt->disableSAX = 1;
6630
6631 /*
6632 * end of parsing of this node.
6633 */
6634 nodePop(ctxt);
6635 oldname = namePop(ctxt);
6636 spacePop(ctxt);
6637 if (oldname != NULL) {
6638#ifdef DEBUG_STACK
6639 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6640#endif
6641 xmlFree(oldname);
6642 }
6643
6644 /*
6645 * Capture end position and add node
6646 */
6647 if ( ret != NULL && ctxt->record_info ) {
6648 node_info.end_pos = ctxt->input->consumed +
6649 (CUR_PTR - ctxt->input->base);
6650 node_info.end_line = ctxt->input->line;
6651 node_info.node = ret;
6652 xmlParserAddNodeInfo(ctxt, &node_info);
6653 }
6654 return;
6655 }
6656
6657 /*
6658 * Parse the content of the element:
6659 */
6660 xmlParseContent(ctxt);
6661 if (!IS_CHAR(RAW)) {
6662 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6664 ctxt->sax->error(ctxt->userData,
6665 "Premature end of data in tag %.30s\n", openTag);
6666 ctxt->wellFormed = 0;
6667 ctxt->disableSAX = 1;
6668
6669 /*
6670 * end of parsing of this node.
6671 */
6672 nodePop(ctxt);
6673 oldname = namePop(ctxt);
6674 spacePop(ctxt);
6675 if (oldname != NULL) {
6676#ifdef DEBUG_STACK
6677 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6678#endif
6679 xmlFree(oldname);
6680 }
6681 return;
6682 }
6683
6684 /*
6685 * parse the end of tag: '</' should be here.
6686 */
6687 xmlParseEndTag(ctxt);
6688
6689 /*
6690 * Capture end position and add node
6691 */
6692 if ( ret != NULL && ctxt->record_info ) {
6693 node_info.end_pos = ctxt->input->consumed +
6694 (CUR_PTR - ctxt->input->base);
6695 node_info.end_line = ctxt->input->line;
6696 node_info.node = ret;
6697 xmlParserAddNodeInfo(ctxt, &node_info);
6698 }
6699}
6700
6701/**
6702 * xmlParseVersionNum:
6703 * @ctxt: an XML parser context
6704 *
6705 * parse the XML version value.
6706 *
6707 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6708 *
6709 * Returns the string giving the XML version number, or NULL
6710 */
6711xmlChar *
6712xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6713 xmlChar *buf = NULL;
6714 int len = 0;
6715 int size = 10;
6716 xmlChar cur;
6717
6718 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6719 if (buf == NULL) {
6720 xmlGenericError(xmlGenericErrorContext,
6721 "malloc of %d byte failed\n", size);
6722 return(NULL);
6723 }
6724 cur = CUR;
6725 while (((cur >= 'a') && (cur <= 'z')) ||
6726 ((cur >= 'A') && (cur <= 'Z')) ||
6727 ((cur >= '0') && (cur <= '9')) ||
6728 (cur == '_') || (cur == '.') ||
6729 (cur == ':') || (cur == '-')) {
6730 if (len + 1 >= size) {
6731 size *= 2;
6732 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6733 if (buf == NULL) {
6734 xmlGenericError(xmlGenericErrorContext,
6735 "realloc of %d byte failed\n", size);
6736 return(NULL);
6737 }
6738 }
6739 buf[len++] = cur;
6740 NEXT;
6741 cur=CUR;
6742 }
6743 buf[len] = 0;
6744 return(buf);
6745}
6746
6747/**
6748 * xmlParseVersionInfo:
6749 * @ctxt: an XML parser context
6750 *
6751 * parse the XML version.
6752 *
6753 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6754 *
6755 * [25] Eq ::= S? '=' S?
6756 *
6757 * Returns the version string, e.g. "1.0"
6758 */
6759
6760xmlChar *
6761xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6762 xmlChar *version = NULL;
6763 const xmlChar *q;
6764
6765 if ((RAW == 'v') && (NXT(1) == 'e') &&
6766 (NXT(2) == 'r') && (NXT(3) == 's') &&
6767 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6768 (NXT(6) == 'n')) {
6769 SKIP(7);
6770 SKIP_BLANKS;
6771 if (RAW != '=') {
6772 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6774 ctxt->sax->error(ctxt->userData,
6775 "xmlParseVersionInfo : expected '='\n");
6776 ctxt->wellFormed = 0;
6777 ctxt->disableSAX = 1;
6778 return(NULL);
6779 }
6780 NEXT;
6781 SKIP_BLANKS;
6782 if (RAW == '"') {
6783 NEXT;
6784 q = CUR_PTR;
6785 version = xmlParseVersionNum(ctxt);
6786 if (RAW != '"') {
6787 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6789 ctxt->sax->error(ctxt->userData,
6790 "String not closed\n%.50s\n", q);
6791 ctxt->wellFormed = 0;
6792 ctxt->disableSAX = 1;
6793 } else
6794 NEXT;
6795 } else if (RAW == '\''){
6796 NEXT;
6797 q = CUR_PTR;
6798 version = xmlParseVersionNum(ctxt);
6799 if (RAW != '\'') {
6800 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6802 ctxt->sax->error(ctxt->userData,
6803 "String not closed\n%.50s\n", q);
6804 ctxt->wellFormed = 0;
6805 ctxt->disableSAX = 1;
6806 } else
6807 NEXT;
6808 } else {
6809 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6811 ctxt->sax->error(ctxt->userData,
6812 "xmlParseVersionInfo : expected ' or \"\n");
6813 ctxt->wellFormed = 0;
6814 ctxt->disableSAX = 1;
6815 }
6816 }
6817 return(version);
6818}
6819
6820/**
6821 * xmlParseEncName:
6822 * @ctxt: an XML parser context
6823 *
6824 * parse the XML encoding name
6825 *
6826 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6827 *
6828 * Returns the encoding name value or NULL
6829 */
6830xmlChar *
6831xmlParseEncName(xmlParserCtxtPtr ctxt) {
6832 xmlChar *buf = NULL;
6833 int len = 0;
6834 int size = 10;
6835 xmlChar cur;
6836
6837 cur = CUR;
6838 if (((cur >= 'a') && (cur <= 'z')) ||
6839 ((cur >= 'A') && (cur <= 'Z'))) {
6840 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6841 if (buf == NULL) {
6842 xmlGenericError(xmlGenericErrorContext,
6843 "malloc of %d byte failed\n", size);
6844 return(NULL);
6845 }
6846
6847 buf[len++] = cur;
6848 NEXT;
6849 cur = CUR;
6850 while (((cur >= 'a') && (cur <= 'z')) ||
6851 ((cur >= 'A') && (cur <= 'Z')) ||
6852 ((cur >= '0') && (cur <= '9')) ||
6853 (cur == '.') || (cur == '_') ||
6854 (cur == '-')) {
6855 if (len + 1 >= size) {
6856 size *= 2;
6857 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6858 if (buf == NULL) {
6859 xmlGenericError(xmlGenericErrorContext,
6860 "realloc of %d byte failed\n", size);
6861 return(NULL);
6862 }
6863 }
6864 buf[len++] = cur;
6865 NEXT;
6866 cur = CUR;
6867 if (cur == 0) {
6868 SHRINK;
6869 GROW;
6870 cur = CUR;
6871 }
6872 }
6873 buf[len] = 0;
6874 } else {
6875 ctxt->errNo = XML_ERR_ENCODING_NAME;
6876 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6877 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6878 ctxt->wellFormed = 0;
6879 ctxt->disableSAX = 1;
6880 }
6881 return(buf);
6882}
6883
6884/**
6885 * xmlParseEncodingDecl:
6886 * @ctxt: an XML parser context
6887 *
6888 * parse the XML encoding declaration
6889 *
6890 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6891 *
6892 * this setups the conversion filters.
6893 *
6894 * Returns the encoding value or NULL
6895 */
6896
6897xmlChar *
6898xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6899 xmlChar *encoding = NULL;
6900 const xmlChar *q;
6901
6902 SKIP_BLANKS;
6903 if ((RAW == 'e') && (NXT(1) == 'n') &&
6904 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6905 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6906 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6907 SKIP(8);
6908 SKIP_BLANKS;
6909 if (RAW != '=') {
6910 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6912 ctxt->sax->error(ctxt->userData,
6913 "xmlParseEncodingDecl : expected '='\n");
6914 ctxt->wellFormed = 0;
6915 ctxt->disableSAX = 1;
6916 return(NULL);
6917 }
6918 NEXT;
6919 SKIP_BLANKS;
6920 if (RAW == '"') {
6921 NEXT;
6922 q = CUR_PTR;
6923 encoding = xmlParseEncName(ctxt);
6924 if (RAW != '"') {
6925 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6927 ctxt->sax->error(ctxt->userData,
6928 "String not closed\n%.50s\n", q);
6929 ctxt->wellFormed = 0;
6930 ctxt->disableSAX = 1;
6931 } else
6932 NEXT;
6933 } else if (RAW == '\''){
6934 NEXT;
6935 q = CUR_PTR;
6936 encoding = xmlParseEncName(ctxt);
6937 if (RAW != '\'') {
6938 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6940 ctxt->sax->error(ctxt->userData,
6941 "String not closed\n%.50s\n", q);
6942 ctxt->wellFormed = 0;
6943 ctxt->disableSAX = 1;
6944 } else
6945 NEXT;
6946 } else if (RAW == '"'){
6947 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6949 ctxt->sax->error(ctxt->userData,
6950 "xmlParseEncodingDecl : expected ' or \"\n");
6951 ctxt->wellFormed = 0;
6952 ctxt->disableSAX = 1;
6953 }
6954 if (encoding != NULL) {
6955 xmlCharEncoding enc;
6956 xmlCharEncodingHandlerPtr handler;
6957
6958 if (ctxt->input->encoding != NULL)
6959 xmlFree((xmlChar *) ctxt->input->encoding);
6960 ctxt->input->encoding = encoding;
6961
6962 enc = xmlParseCharEncoding((const char *) encoding);
6963 /*
6964 * registered set of known encodings
6965 */
6966 if (enc != XML_CHAR_ENCODING_ERROR) {
6967 xmlSwitchEncoding(ctxt, enc);
6968 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6969 xmlFree(encoding);
6970 return(NULL);
6971 }
6972 } else {
6973 /*
6974 * fallback for unknown encodings
6975 */
6976 handler = xmlFindCharEncodingHandler((const char *) encoding);
6977 if (handler != NULL) {
6978 xmlSwitchToEncoding(ctxt, handler);
6979 } else {
6980 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6981 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6982 ctxt->sax->error(ctxt->userData,
6983 "Unsupported encoding %s\n", encoding);
6984 return(NULL);
6985 }
6986 }
6987 }
6988 }
6989 return(encoding);
6990}
6991
6992/**
6993 * xmlParseSDDecl:
6994 * @ctxt: an XML parser context
6995 *
6996 * parse the XML standalone declaration
6997 *
6998 * [32] SDDecl ::= S 'standalone' Eq
6999 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7000 *
7001 * [ VC: Standalone Document Declaration ]
7002 * TODO The standalone document declaration must have the value "no"
7003 * if any external markup declarations contain declarations of:
7004 * - attributes with default values, if elements to which these
7005 * attributes apply appear in the document without specifications
7006 * of values for these attributes, or
7007 * - entities (other than amp, lt, gt, apos, quot), if references
7008 * to those entities appear in the document, or
7009 * - attributes with values subject to normalization, where the
7010 * attribute appears in the document with a value which will change
7011 * as a result of normalization, or
7012 * - element types with element content, if white space occurs directly
7013 * within any instance of those types.
7014 *
7015 * Returns 1 if standalone, 0 otherwise
7016 */
7017
7018int
7019xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7020 int standalone = -1;
7021
7022 SKIP_BLANKS;
7023 if ((RAW == 's') && (NXT(1) == 't') &&
7024 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7025 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7026 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7027 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7028 SKIP(10);
7029 SKIP_BLANKS;
7030 if (RAW != '=') {
7031 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7033 ctxt->sax->error(ctxt->userData,
7034 "XML standalone declaration : expected '='\n");
7035 ctxt->wellFormed = 0;
7036 ctxt->disableSAX = 1;
7037 return(standalone);
7038 }
7039 NEXT;
7040 SKIP_BLANKS;
7041 if (RAW == '\''){
7042 NEXT;
7043 if ((RAW == 'n') && (NXT(1) == 'o')) {
7044 standalone = 0;
7045 SKIP(2);
7046 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7047 (NXT(2) == 's')) {
7048 standalone = 1;
7049 SKIP(3);
7050 } else {
7051 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7053 ctxt->sax->error(ctxt->userData,
7054 "standalone accepts only 'yes' or 'no'\n");
7055 ctxt->wellFormed = 0;
7056 ctxt->disableSAX = 1;
7057 }
7058 if (RAW != '\'') {
7059 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7061 ctxt->sax->error(ctxt->userData, "String not closed\n");
7062 ctxt->wellFormed = 0;
7063 ctxt->disableSAX = 1;
7064 } else
7065 NEXT;
7066 } else if (RAW == '"'){
7067 NEXT;
7068 if ((RAW == 'n') && (NXT(1) == 'o')) {
7069 standalone = 0;
7070 SKIP(2);
7071 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7072 (NXT(2) == 's')) {
7073 standalone = 1;
7074 SKIP(3);
7075 } else {
7076 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7078 ctxt->sax->error(ctxt->userData,
7079 "standalone accepts only 'yes' or 'no'\n");
7080 ctxt->wellFormed = 0;
7081 ctxt->disableSAX = 1;
7082 }
7083 if (RAW != '"') {
7084 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7086 ctxt->sax->error(ctxt->userData, "String not closed\n");
7087 ctxt->wellFormed = 0;
7088 ctxt->disableSAX = 1;
7089 } else
7090 NEXT;
7091 } else {
7092 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7094 ctxt->sax->error(ctxt->userData,
7095 "Standalone value not found\n");
7096 ctxt->wellFormed = 0;
7097 ctxt->disableSAX = 1;
7098 }
7099 }
7100 return(standalone);
7101}
7102
7103/**
7104 * xmlParseXMLDecl:
7105 * @ctxt: an XML parser context
7106 *
7107 * parse an XML declaration header
7108 *
7109 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7110 */
7111
7112void
7113xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7114 xmlChar *version;
7115
7116 /*
7117 * We know that '<?xml' is here.
7118 */
7119 SKIP(5);
7120
7121 if (!IS_BLANK(RAW)) {
7122 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7123 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7124 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7125 ctxt->wellFormed = 0;
7126 ctxt->disableSAX = 1;
7127 }
7128 SKIP_BLANKS;
7129
7130 /*
7131 * We should have the VersionInfo here.
7132 */
7133 version = xmlParseVersionInfo(ctxt);
7134 if (version == NULL)
7135 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7136 ctxt->version = xmlStrdup(version);
7137 xmlFree(version);
7138
7139 /*
7140 * We may have the encoding declaration
7141 */
7142 if (!IS_BLANK(RAW)) {
7143 if ((RAW == '?') && (NXT(1) == '>')) {
7144 SKIP(2);
7145 return;
7146 }
7147 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7149 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7150 ctxt->wellFormed = 0;
7151 ctxt->disableSAX = 1;
7152 }
7153 xmlParseEncodingDecl(ctxt);
7154 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7155 /*
7156 * The XML REC instructs us to stop parsing right here
7157 */
7158 return;
7159 }
7160
7161 /*
7162 * We may have the standalone status.
7163 */
7164 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7165 if ((RAW == '?') && (NXT(1) == '>')) {
7166 SKIP(2);
7167 return;
7168 }
7169 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7171 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7172 ctxt->wellFormed = 0;
7173 ctxt->disableSAX = 1;
7174 }
7175 SKIP_BLANKS;
7176 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7177
7178 SKIP_BLANKS;
7179 if ((RAW == '?') && (NXT(1) == '>')) {
7180 SKIP(2);
7181 } else if (RAW == '>') {
7182 /* Deprecated old WD ... */
7183 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7185 ctxt->sax->error(ctxt->userData,
7186 "XML declaration must end-up with '?>'\n");
7187 ctxt->wellFormed = 0;
7188 ctxt->disableSAX = 1;
7189 NEXT;
7190 } else {
7191 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7193 ctxt->sax->error(ctxt->userData,
7194 "parsing XML declaration: '?>' expected\n");
7195 ctxt->wellFormed = 0;
7196 ctxt->disableSAX = 1;
7197 MOVETO_ENDTAG(CUR_PTR);
7198 NEXT;
7199 }
7200}
7201
7202/**
7203 * xmlParseMisc:
7204 * @ctxt: an XML parser context
7205 *
7206 * parse an XML Misc* optionnal field.
7207 *
7208 * [27] Misc ::= Comment | PI | S
7209 */
7210
7211void
7212xmlParseMisc(xmlParserCtxtPtr ctxt) {
7213 while (((RAW == '<') && (NXT(1) == '?')) ||
7214 ((RAW == '<') && (NXT(1) == '!') &&
7215 (NXT(2) == '-') && (NXT(3) == '-')) ||
7216 IS_BLANK(CUR)) {
7217 if ((RAW == '<') && (NXT(1) == '?')) {
7218 xmlParsePI(ctxt);
7219 } else if (IS_BLANK(CUR)) {
7220 NEXT;
7221 } else
7222 xmlParseComment(ctxt);
7223 }
7224}
7225
7226/**
7227 * xmlParseDocument:
7228 * @ctxt: an XML parser context
7229 *
7230 * parse an XML document (and build a tree if using the standard SAX
7231 * interface).
7232 *
7233 * [1] document ::= prolog element Misc*
7234 *
7235 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7236 *
7237 * Returns 0, -1 in case of error. the parser context is augmented
7238 * as a result of the parsing.
7239 */
7240
7241int
7242xmlParseDocument(xmlParserCtxtPtr ctxt) {
7243 xmlChar start[4];
7244 xmlCharEncoding enc;
7245
7246 xmlInitParser();
7247
7248 GROW;
7249
7250 /*
7251 * SAX: beginning of the document processing.
7252 */
7253 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7254 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7255
7256 /*
7257 * Get the 4 first bytes and decode the charset
7258 * if enc != XML_CHAR_ENCODING_NONE
7259 * plug some encoding conversion routines.
7260 */
7261 start[0] = RAW;
7262 start[1] = NXT(1);
7263 start[2] = NXT(2);
7264 start[3] = NXT(3);
7265 enc = xmlDetectCharEncoding(start, 4);
7266 if (enc != XML_CHAR_ENCODING_NONE) {
7267 xmlSwitchEncoding(ctxt, enc);
7268 }
7269
7270
7271 if (CUR == 0) {
7272 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7274 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7275 ctxt->wellFormed = 0;
7276 ctxt->disableSAX = 1;
7277 }
7278
7279 /*
7280 * Check for the XMLDecl in the Prolog.
7281 */
7282 GROW;
7283 if ((RAW == '<') && (NXT(1) == '?') &&
7284 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7285 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7286
7287 /*
7288 * Note that we will switch encoding on the fly.
7289 */
7290 xmlParseXMLDecl(ctxt);
7291 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7292 /*
7293 * The XML REC instructs us to stop parsing right here
7294 */
7295 return(-1);
7296 }
7297 ctxt->standalone = ctxt->input->standalone;
7298 SKIP_BLANKS;
7299 } else {
7300 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7301 }
7302 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7303 ctxt->sax->startDocument(ctxt->userData);
7304
7305 /*
7306 * The Misc part of the Prolog
7307 */
7308 GROW;
7309 xmlParseMisc(ctxt);
7310
7311 /*
7312 * Then possibly doc type declaration(s) and more Misc
7313 * (doctypedecl Misc*)?
7314 */
7315 GROW;
7316 if ((RAW == '<') && (NXT(1) == '!') &&
7317 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7318 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7319 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7320 (NXT(8) == 'E')) {
7321
7322 ctxt->inSubset = 1;
7323 xmlParseDocTypeDecl(ctxt);
7324 if (RAW == '[') {
7325 ctxt->instate = XML_PARSER_DTD;
7326 xmlParseInternalSubset(ctxt);
7327 }
7328
7329 /*
7330 * Create and update the external subset.
7331 */
7332 ctxt->inSubset = 2;
7333 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7334 (!ctxt->disableSAX))
7335 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7336 ctxt->extSubSystem, ctxt->extSubURI);
7337 ctxt->inSubset = 0;
7338
7339
7340 ctxt->instate = XML_PARSER_PROLOG;
7341 xmlParseMisc(ctxt);
7342 }
7343
7344 /*
7345 * Time to start parsing the tree itself
7346 */
7347 GROW;
7348 if (RAW != '<') {
7349 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7351 ctxt->sax->error(ctxt->userData,
7352 "Start tag expected, '<' not found\n");
7353 ctxt->wellFormed = 0;
7354 ctxt->disableSAX = 1;
7355 ctxt->instate = XML_PARSER_EOF;
7356 } else {
7357 ctxt->instate = XML_PARSER_CONTENT;
7358 xmlParseElement(ctxt);
7359 ctxt->instate = XML_PARSER_EPILOG;
7360
7361
7362 /*
7363 * The Misc part at the end
7364 */
7365 xmlParseMisc(ctxt);
7366
7367 if (RAW != 0) {
7368 ctxt->errNo = XML_ERR_DOCUMENT_END;
7369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7370 ctxt->sax->error(ctxt->userData,
7371 "Extra content at the end of the document\n");
7372 ctxt->wellFormed = 0;
7373 ctxt->disableSAX = 1;
7374 }
7375 ctxt->instate = XML_PARSER_EOF;
7376 }
7377
7378 /*
7379 * SAX: end of the document processing.
7380 */
7381 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7382 (!ctxt->disableSAX))
7383 ctxt->sax->endDocument(ctxt->userData);
7384
7385 if (! ctxt->wellFormed) return(-1);
7386 return(0);
7387}
7388
7389/**
7390 * xmlParseExtParsedEnt:
7391 * @ctxt: an XML parser context
7392 *
7393 * parse a genreral parsed entity
7394 * An external general parsed entity is well-formed if it matches the
7395 * production labeled extParsedEnt.
7396 *
7397 * [78] extParsedEnt ::= TextDecl? content
7398 *
7399 * Returns 0, -1 in case of error. the parser context is augmented
7400 * as a result of the parsing.
7401 */
7402
7403int
7404xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7405 xmlChar start[4];
7406 xmlCharEncoding enc;
7407
7408 xmlDefaultSAXHandlerInit();
7409
7410 GROW;
7411
7412 /*
7413 * SAX: beginning of the document processing.
7414 */
7415 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7416 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7417
7418 /*
7419 * Get the 4 first bytes and decode the charset
7420 * if enc != XML_CHAR_ENCODING_NONE
7421 * plug some encoding conversion routines.
7422 */
7423 start[0] = RAW;
7424 start[1] = NXT(1);
7425 start[2] = NXT(2);
7426 start[3] = NXT(3);
7427 enc = xmlDetectCharEncoding(start, 4);
7428 if (enc != XML_CHAR_ENCODING_NONE) {
7429 xmlSwitchEncoding(ctxt, enc);
7430 }
7431
7432
7433 if (CUR == 0) {
7434 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7436 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7437 ctxt->wellFormed = 0;
7438 ctxt->disableSAX = 1;
7439 }
7440
7441 /*
7442 * Check for the XMLDecl in the Prolog.
7443 */
7444 GROW;
7445 if ((RAW == '<') && (NXT(1) == '?') &&
7446 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7447 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7448
7449 /*
7450 * Note that we will switch encoding on the fly.
7451 */
7452 xmlParseXMLDecl(ctxt);
7453 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7454 /*
7455 * The XML REC instructs us to stop parsing right here
7456 */
7457 return(-1);
7458 }
7459 SKIP_BLANKS;
7460 } else {
7461 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7462 }
7463 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7464 ctxt->sax->startDocument(ctxt->userData);
7465
7466 /*
7467 * Doing validity checking on chunk doesn't make sense
7468 */
7469 ctxt->instate = XML_PARSER_CONTENT;
7470 ctxt->validate = 0;
7471 ctxt->loadsubset = 0;
7472 ctxt->depth = 0;
7473
7474 xmlParseContent(ctxt);
7475
7476 if ((RAW == '<') && (NXT(1) == '/')) {
7477 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7479 ctxt->sax->error(ctxt->userData,
7480 "chunk is not well balanced\n");
7481 ctxt->wellFormed = 0;
7482 ctxt->disableSAX = 1;
7483 } else if (RAW != 0) {
7484 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7486 ctxt->sax->error(ctxt->userData,
7487 "extra content at the end of well balanced chunk\n");
7488 ctxt->wellFormed = 0;
7489 ctxt->disableSAX = 1;
7490 }
7491
7492 /*
7493 * SAX: end of the document processing.
7494 */
7495 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7496 (!ctxt->disableSAX))
7497 ctxt->sax->endDocument(ctxt->userData);
7498
7499 if (! ctxt->wellFormed) return(-1);
7500 return(0);
7501}
7502
7503/************************************************************************
7504 * *
7505 * Progressive parsing interfaces *
7506 * *
7507 ************************************************************************/
7508
7509/**
7510 * xmlParseLookupSequence:
7511 * @ctxt: an XML parser context
7512 * @first: the first char to lookup
7513 * @next: the next char to lookup or zero
7514 * @third: the next char to lookup or zero
7515 *
7516 * Try to find if a sequence (first, next, third) or just (first next) or
7517 * (first) is available in the input stream.
7518 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7519 * to avoid rescanning sequences of bytes, it DOES change the state of the
7520 * parser, do not use liberally.
7521 *
7522 * Returns the index to the current parsing point if the full sequence
7523 * is available, -1 otherwise.
7524 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007525static int
Owen Taylor3473f882001-02-23 17:55:21 +00007526xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7527 xmlChar next, xmlChar third) {
7528 int base, len;
7529 xmlParserInputPtr in;
7530 const xmlChar *buf;
7531
7532 in = ctxt->input;
7533 if (in == NULL) return(-1);
7534 base = in->cur - in->base;
7535 if (base < 0) return(-1);
7536 if (ctxt->checkIndex > base)
7537 base = ctxt->checkIndex;
7538 if (in->buf == NULL) {
7539 buf = in->base;
7540 len = in->length;
7541 } else {
7542 buf = in->buf->buffer->content;
7543 len = in->buf->buffer->use;
7544 }
7545 /* take into account the sequence length */
7546 if (third) len -= 2;
7547 else if (next) len --;
7548 for (;base < len;base++) {
7549 if (buf[base] == first) {
7550 if (third != 0) {
7551 if ((buf[base + 1] != next) ||
7552 (buf[base + 2] != third)) continue;
7553 } else if (next != 0) {
7554 if (buf[base + 1] != next) continue;
7555 }
7556 ctxt->checkIndex = 0;
7557#ifdef DEBUG_PUSH
7558 if (next == 0)
7559 xmlGenericError(xmlGenericErrorContext,
7560 "PP: lookup '%c' found at %d\n",
7561 first, base);
7562 else if (third == 0)
7563 xmlGenericError(xmlGenericErrorContext,
7564 "PP: lookup '%c%c' found at %d\n",
7565 first, next, base);
7566 else
7567 xmlGenericError(xmlGenericErrorContext,
7568 "PP: lookup '%c%c%c' found at %d\n",
7569 first, next, third, base);
7570#endif
7571 return(base - (in->cur - in->base));
7572 }
7573 }
7574 ctxt->checkIndex = base;
7575#ifdef DEBUG_PUSH
7576 if (next == 0)
7577 xmlGenericError(xmlGenericErrorContext,
7578 "PP: lookup '%c' failed\n", first);
7579 else if (third == 0)
7580 xmlGenericError(xmlGenericErrorContext,
7581 "PP: lookup '%c%c' failed\n", first, next);
7582 else
7583 xmlGenericError(xmlGenericErrorContext,
7584 "PP: lookup '%c%c%c' failed\n", first, next, third);
7585#endif
7586 return(-1);
7587}
7588
7589/**
7590 * xmlParseTryOrFinish:
7591 * @ctxt: an XML parser context
7592 * @terminate: last chunk indicator
7593 *
7594 * Try to progress on parsing
7595 *
7596 * Returns zero if no parsing was possible
7597 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007598static int
Owen Taylor3473f882001-02-23 17:55:21 +00007599xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7600 int ret = 0;
7601 int avail;
7602 xmlChar cur, next;
7603
7604#ifdef DEBUG_PUSH
7605 switch (ctxt->instate) {
7606 case XML_PARSER_EOF:
7607 xmlGenericError(xmlGenericErrorContext,
7608 "PP: try EOF\n"); break;
7609 case XML_PARSER_START:
7610 xmlGenericError(xmlGenericErrorContext,
7611 "PP: try START\n"); break;
7612 case XML_PARSER_MISC:
7613 xmlGenericError(xmlGenericErrorContext,
7614 "PP: try MISC\n");break;
7615 case XML_PARSER_COMMENT:
7616 xmlGenericError(xmlGenericErrorContext,
7617 "PP: try COMMENT\n");break;
7618 case XML_PARSER_PROLOG:
7619 xmlGenericError(xmlGenericErrorContext,
7620 "PP: try PROLOG\n");break;
7621 case XML_PARSER_START_TAG:
7622 xmlGenericError(xmlGenericErrorContext,
7623 "PP: try START_TAG\n");break;
7624 case XML_PARSER_CONTENT:
7625 xmlGenericError(xmlGenericErrorContext,
7626 "PP: try CONTENT\n");break;
7627 case XML_PARSER_CDATA_SECTION:
7628 xmlGenericError(xmlGenericErrorContext,
7629 "PP: try CDATA_SECTION\n");break;
7630 case XML_PARSER_END_TAG:
7631 xmlGenericError(xmlGenericErrorContext,
7632 "PP: try END_TAG\n");break;
7633 case XML_PARSER_ENTITY_DECL:
7634 xmlGenericError(xmlGenericErrorContext,
7635 "PP: try ENTITY_DECL\n");break;
7636 case XML_PARSER_ENTITY_VALUE:
7637 xmlGenericError(xmlGenericErrorContext,
7638 "PP: try ENTITY_VALUE\n");break;
7639 case XML_PARSER_ATTRIBUTE_VALUE:
7640 xmlGenericError(xmlGenericErrorContext,
7641 "PP: try ATTRIBUTE_VALUE\n");break;
7642 case XML_PARSER_DTD:
7643 xmlGenericError(xmlGenericErrorContext,
7644 "PP: try DTD\n");break;
7645 case XML_PARSER_EPILOG:
7646 xmlGenericError(xmlGenericErrorContext,
7647 "PP: try EPILOG\n");break;
7648 case XML_PARSER_PI:
7649 xmlGenericError(xmlGenericErrorContext,
7650 "PP: try PI\n");break;
7651 case XML_PARSER_IGNORE:
7652 xmlGenericError(xmlGenericErrorContext,
7653 "PP: try IGNORE\n");break;
7654 }
7655#endif
7656
7657 while (1) {
7658 /*
7659 * Pop-up of finished entities.
7660 */
7661 while ((RAW == 0) && (ctxt->inputNr > 1))
7662 xmlPopInput(ctxt);
7663
7664 if (ctxt->input ==NULL) break;
7665 if (ctxt->input->buf == NULL)
7666 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7667 else
7668 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7669 if (avail < 1)
7670 goto done;
7671 switch (ctxt->instate) {
7672 case XML_PARSER_EOF:
7673 /*
7674 * Document parsing is done !
7675 */
7676 goto done;
7677 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007678 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7679 xmlChar start[4];
7680 xmlCharEncoding enc;
7681
7682 /*
7683 * Very first chars read from the document flow.
7684 */
7685 if (avail < 4)
7686 goto done;
7687
7688 /*
7689 * Get the 4 first bytes and decode the charset
7690 * if enc != XML_CHAR_ENCODING_NONE
7691 * plug some encoding conversion routines.
7692 */
7693 start[0] = RAW;
7694 start[1] = NXT(1);
7695 start[2] = NXT(2);
7696 start[3] = NXT(3);
7697 enc = xmlDetectCharEncoding(start, 4);
7698 if (enc != XML_CHAR_ENCODING_NONE) {
7699 xmlSwitchEncoding(ctxt, enc);
7700 }
7701 break;
7702 }
Owen Taylor3473f882001-02-23 17:55:21 +00007703
7704 cur = ctxt->input->cur[0];
7705 next = ctxt->input->cur[1];
7706 if (cur == 0) {
7707 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7708 ctxt->sax->setDocumentLocator(ctxt->userData,
7709 &xmlDefaultSAXLocator);
7710 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7712 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7713 ctxt->wellFormed = 0;
7714 ctxt->disableSAX = 1;
7715 ctxt->instate = XML_PARSER_EOF;
7716#ifdef DEBUG_PUSH
7717 xmlGenericError(xmlGenericErrorContext,
7718 "PP: entering EOF\n");
7719#endif
7720 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7721 ctxt->sax->endDocument(ctxt->userData);
7722 goto done;
7723 }
7724 if ((cur == '<') && (next == '?')) {
7725 /* PI or XML decl */
7726 if (avail < 5) return(ret);
7727 if ((!terminate) &&
7728 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7729 return(ret);
7730 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7731 ctxt->sax->setDocumentLocator(ctxt->userData,
7732 &xmlDefaultSAXLocator);
7733 if ((ctxt->input->cur[2] == 'x') &&
7734 (ctxt->input->cur[3] == 'm') &&
7735 (ctxt->input->cur[4] == 'l') &&
7736 (IS_BLANK(ctxt->input->cur[5]))) {
7737 ret += 5;
7738#ifdef DEBUG_PUSH
7739 xmlGenericError(xmlGenericErrorContext,
7740 "PP: Parsing XML Decl\n");
7741#endif
7742 xmlParseXMLDecl(ctxt);
7743 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7744 /*
7745 * The XML REC instructs us to stop parsing right
7746 * here
7747 */
7748 ctxt->instate = XML_PARSER_EOF;
7749 return(0);
7750 }
7751 ctxt->standalone = ctxt->input->standalone;
7752 if ((ctxt->encoding == NULL) &&
7753 (ctxt->input->encoding != NULL))
7754 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7755 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7756 (!ctxt->disableSAX))
7757 ctxt->sax->startDocument(ctxt->userData);
7758 ctxt->instate = XML_PARSER_MISC;
7759#ifdef DEBUG_PUSH
7760 xmlGenericError(xmlGenericErrorContext,
7761 "PP: entering MISC\n");
7762#endif
7763 } else {
7764 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7765 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7766 (!ctxt->disableSAX))
7767 ctxt->sax->startDocument(ctxt->userData);
7768 ctxt->instate = XML_PARSER_MISC;
7769#ifdef DEBUG_PUSH
7770 xmlGenericError(xmlGenericErrorContext,
7771 "PP: entering MISC\n");
7772#endif
7773 }
7774 } else {
7775 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7776 ctxt->sax->setDocumentLocator(ctxt->userData,
7777 &xmlDefaultSAXLocator);
7778 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7779 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7780 (!ctxt->disableSAX))
7781 ctxt->sax->startDocument(ctxt->userData);
7782 ctxt->instate = XML_PARSER_MISC;
7783#ifdef DEBUG_PUSH
7784 xmlGenericError(xmlGenericErrorContext,
7785 "PP: entering MISC\n");
7786#endif
7787 }
7788 break;
7789 case XML_PARSER_MISC:
7790 SKIP_BLANKS;
7791 if (ctxt->input->buf == NULL)
7792 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7793 else
7794 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7795 if (avail < 2)
7796 goto done;
7797 cur = ctxt->input->cur[0];
7798 next = ctxt->input->cur[1];
7799 if ((cur == '<') && (next == '?')) {
7800 if ((!terminate) &&
7801 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7802 goto done;
7803#ifdef DEBUG_PUSH
7804 xmlGenericError(xmlGenericErrorContext,
7805 "PP: Parsing PI\n");
7806#endif
7807 xmlParsePI(ctxt);
7808 } else if ((cur == '<') && (next == '!') &&
7809 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7810 if ((!terminate) &&
7811 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7812 goto done;
7813#ifdef DEBUG_PUSH
7814 xmlGenericError(xmlGenericErrorContext,
7815 "PP: Parsing Comment\n");
7816#endif
7817 xmlParseComment(ctxt);
7818 ctxt->instate = XML_PARSER_MISC;
7819 } else if ((cur == '<') && (next == '!') &&
7820 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7821 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7822 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7823 (ctxt->input->cur[8] == 'E')) {
7824 if ((!terminate) &&
7825 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7826 goto done;
7827#ifdef DEBUG_PUSH
7828 xmlGenericError(xmlGenericErrorContext,
7829 "PP: Parsing internal subset\n");
7830#endif
7831 ctxt->inSubset = 1;
7832 xmlParseDocTypeDecl(ctxt);
7833 if (RAW == '[') {
7834 ctxt->instate = XML_PARSER_DTD;
7835#ifdef DEBUG_PUSH
7836 xmlGenericError(xmlGenericErrorContext,
7837 "PP: entering DTD\n");
7838#endif
7839 } else {
7840 /*
7841 * Create and update the external subset.
7842 */
7843 ctxt->inSubset = 2;
7844 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7845 (ctxt->sax->externalSubset != NULL))
7846 ctxt->sax->externalSubset(ctxt->userData,
7847 ctxt->intSubName, ctxt->extSubSystem,
7848 ctxt->extSubURI);
7849 ctxt->inSubset = 0;
7850 ctxt->instate = XML_PARSER_PROLOG;
7851#ifdef DEBUG_PUSH
7852 xmlGenericError(xmlGenericErrorContext,
7853 "PP: entering PROLOG\n");
7854#endif
7855 }
7856 } else if ((cur == '<') && (next == '!') &&
7857 (avail < 9)) {
7858 goto done;
7859 } else {
7860 ctxt->instate = XML_PARSER_START_TAG;
7861#ifdef DEBUG_PUSH
7862 xmlGenericError(xmlGenericErrorContext,
7863 "PP: entering START_TAG\n");
7864#endif
7865 }
7866 break;
7867 case XML_PARSER_IGNORE:
7868 xmlGenericError(xmlGenericErrorContext,
7869 "PP: internal error, state == IGNORE");
7870 ctxt->instate = XML_PARSER_DTD;
7871#ifdef DEBUG_PUSH
7872 xmlGenericError(xmlGenericErrorContext,
7873 "PP: entering DTD\n");
7874#endif
7875 break;
7876 case XML_PARSER_PROLOG:
7877 SKIP_BLANKS;
7878 if (ctxt->input->buf == NULL)
7879 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7880 else
7881 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7882 if (avail < 2)
7883 goto done;
7884 cur = ctxt->input->cur[0];
7885 next = ctxt->input->cur[1];
7886 if ((cur == '<') && (next == '?')) {
7887 if ((!terminate) &&
7888 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7889 goto done;
7890#ifdef DEBUG_PUSH
7891 xmlGenericError(xmlGenericErrorContext,
7892 "PP: Parsing PI\n");
7893#endif
7894 xmlParsePI(ctxt);
7895 } else if ((cur == '<') && (next == '!') &&
7896 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7897 if ((!terminate) &&
7898 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7899 goto done;
7900#ifdef DEBUG_PUSH
7901 xmlGenericError(xmlGenericErrorContext,
7902 "PP: Parsing Comment\n");
7903#endif
7904 xmlParseComment(ctxt);
7905 ctxt->instate = XML_PARSER_PROLOG;
7906 } else if ((cur == '<') && (next == '!') &&
7907 (avail < 4)) {
7908 goto done;
7909 } else {
7910 ctxt->instate = XML_PARSER_START_TAG;
7911#ifdef DEBUG_PUSH
7912 xmlGenericError(xmlGenericErrorContext,
7913 "PP: entering START_TAG\n");
7914#endif
7915 }
7916 break;
7917 case XML_PARSER_EPILOG:
7918 SKIP_BLANKS;
7919 if (ctxt->input->buf == NULL)
7920 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7921 else
7922 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7923 if (avail < 2)
7924 goto done;
7925 cur = ctxt->input->cur[0];
7926 next = ctxt->input->cur[1];
7927 if ((cur == '<') && (next == '?')) {
7928 if ((!terminate) &&
7929 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7930 goto done;
7931#ifdef DEBUG_PUSH
7932 xmlGenericError(xmlGenericErrorContext,
7933 "PP: Parsing PI\n");
7934#endif
7935 xmlParsePI(ctxt);
7936 ctxt->instate = XML_PARSER_EPILOG;
7937 } else if ((cur == '<') && (next == '!') &&
7938 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7939 if ((!terminate) &&
7940 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7941 goto done;
7942#ifdef DEBUG_PUSH
7943 xmlGenericError(xmlGenericErrorContext,
7944 "PP: Parsing Comment\n");
7945#endif
7946 xmlParseComment(ctxt);
7947 ctxt->instate = XML_PARSER_EPILOG;
7948 } else if ((cur == '<') && (next == '!') &&
7949 (avail < 4)) {
7950 goto done;
7951 } else {
7952 ctxt->errNo = XML_ERR_DOCUMENT_END;
7953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7954 ctxt->sax->error(ctxt->userData,
7955 "Extra content at the end of the document\n");
7956 ctxt->wellFormed = 0;
7957 ctxt->disableSAX = 1;
7958 ctxt->instate = XML_PARSER_EOF;
7959#ifdef DEBUG_PUSH
7960 xmlGenericError(xmlGenericErrorContext,
7961 "PP: entering EOF\n");
7962#endif
7963 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7964 (!ctxt->disableSAX))
7965 ctxt->sax->endDocument(ctxt->userData);
7966 goto done;
7967 }
7968 break;
7969 case XML_PARSER_START_TAG: {
7970 xmlChar *name, *oldname;
7971
7972 if ((avail < 2) && (ctxt->inputNr == 1))
7973 goto done;
7974 cur = ctxt->input->cur[0];
7975 if (cur != '<') {
7976 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7978 ctxt->sax->error(ctxt->userData,
7979 "Start tag expect, '<' not found\n");
7980 ctxt->wellFormed = 0;
7981 ctxt->disableSAX = 1;
7982 ctxt->instate = XML_PARSER_EOF;
7983#ifdef DEBUG_PUSH
7984 xmlGenericError(xmlGenericErrorContext,
7985 "PP: entering EOF\n");
7986#endif
7987 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7988 (!ctxt->disableSAX))
7989 ctxt->sax->endDocument(ctxt->userData);
7990 goto done;
7991 }
7992 if ((!terminate) &&
7993 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7994 goto done;
7995 if (ctxt->spaceNr == 0)
7996 spacePush(ctxt, -1);
7997 else
7998 spacePush(ctxt, *ctxt->space);
7999 name = xmlParseStartTag(ctxt);
8000 if (name == NULL) {
8001 spacePop(ctxt);
8002 ctxt->instate = XML_PARSER_EOF;
8003#ifdef DEBUG_PUSH
8004 xmlGenericError(xmlGenericErrorContext,
8005 "PP: entering EOF\n");
8006#endif
8007 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8008 (!ctxt->disableSAX))
8009 ctxt->sax->endDocument(ctxt->userData);
8010 goto done;
8011 }
8012 namePush(ctxt, xmlStrdup(name));
8013
8014 /*
8015 * [ VC: Root Element Type ]
8016 * The Name in the document type declaration must match
8017 * the element type of the root element.
8018 */
8019 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8020 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8021 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8022
8023 /*
8024 * Check for an Empty Element.
8025 */
8026 if ((RAW == '/') && (NXT(1) == '>')) {
8027 SKIP(2);
8028 if ((ctxt->sax != NULL) &&
8029 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8030 ctxt->sax->endElement(ctxt->userData, name);
8031 xmlFree(name);
8032 oldname = namePop(ctxt);
8033 spacePop(ctxt);
8034 if (oldname != NULL) {
8035#ifdef DEBUG_STACK
8036 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8037#endif
8038 xmlFree(oldname);
8039 }
8040 if (ctxt->name == NULL) {
8041 ctxt->instate = XML_PARSER_EPILOG;
8042#ifdef DEBUG_PUSH
8043 xmlGenericError(xmlGenericErrorContext,
8044 "PP: entering EPILOG\n");
8045#endif
8046 } else {
8047 ctxt->instate = XML_PARSER_CONTENT;
8048#ifdef DEBUG_PUSH
8049 xmlGenericError(xmlGenericErrorContext,
8050 "PP: entering CONTENT\n");
8051#endif
8052 }
8053 break;
8054 }
8055 if (RAW == '>') {
8056 NEXT;
8057 } else {
8058 ctxt->errNo = XML_ERR_GT_REQUIRED;
8059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8060 ctxt->sax->error(ctxt->userData,
8061 "Couldn't find end of Start Tag %s\n",
8062 name);
8063 ctxt->wellFormed = 0;
8064 ctxt->disableSAX = 1;
8065
8066 /*
8067 * end of parsing of this node.
8068 */
8069 nodePop(ctxt);
8070 oldname = namePop(ctxt);
8071 spacePop(ctxt);
8072 if (oldname != NULL) {
8073#ifdef DEBUG_STACK
8074 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8075#endif
8076 xmlFree(oldname);
8077 }
8078 }
8079 xmlFree(name);
8080 ctxt->instate = XML_PARSER_CONTENT;
8081#ifdef DEBUG_PUSH
8082 xmlGenericError(xmlGenericErrorContext,
8083 "PP: entering CONTENT\n");
8084#endif
8085 break;
8086 }
8087 case XML_PARSER_CONTENT: {
8088 const xmlChar *test;
8089 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008090 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008091
8092 /*
8093 * Handle preparsed entities and charRef
8094 */
8095 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008096 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008097
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008098 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008099 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8100 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008101 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008102 ctxt->token = 0;
8103 }
8104 if ((avail < 2) && (ctxt->inputNr == 1))
8105 goto done;
8106 cur = ctxt->input->cur[0];
8107 next = ctxt->input->cur[1];
8108
8109 test = CUR_PTR;
8110 cons = ctxt->input->consumed;
8111 tok = ctxt->token;
8112 if ((cur == '<') && (next == '?')) {
8113 if ((!terminate) &&
8114 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8115 goto done;
8116#ifdef DEBUG_PUSH
8117 xmlGenericError(xmlGenericErrorContext,
8118 "PP: Parsing PI\n");
8119#endif
8120 xmlParsePI(ctxt);
8121 } else if ((cur == '<') && (next == '!') &&
8122 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8123 if ((!terminate) &&
8124 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8125 goto done;
8126#ifdef DEBUG_PUSH
8127 xmlGenericError(xmlGenericErrorContext,
8128 "PP: Parsing Comment\n");
8129#endif
8130 xmlParseComment(ctxt);
8131 ctxt->instate = XML_PARSER_CONTENT;
8132 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8133 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8134 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8135 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8136 (ctxt->input->cur[8] == '[')) {
8137 SKIP(9);
8138 ctxt->instate = XML_PARSER_CDATA_SECTION;
8139#ifdef DEBUG_PUSH
8140 xmlGenericError(xmlGenericErrorContext,
8141 "PP: entering CDATA_SECTION\n");
8142#endif
8143 break;
8144 } else if ((cur == '<') && (next == '!') &&
8145 (avail < 9)) {
8146 goto done;
8147 } else if ((cur == '<') && (next == '/')) {
8148 ctxt->instate = XML_PARSER_END_TAG;
8149#ifdef DEBUG_PUSH
8150 xmlGenericError(xmlGenericErrorContext,
8151 "PP: entering END_TAG\n");
8152#endif
8153 break;
8154 } else if (cur == '<') {
8155 ctxt->instate = XML_PARSER_START_TAG;
8156#ifdef DEBUG_PUSH
8157 xmlGenericError(xmlGenericErrorContext,
8158 "PP: entering START_TAG\n");
8159#endif
8160 break;
8161 } else if (cur == '&') {
8162 if ((!terminate) &&
8163 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8164 goto done;
8165#ifdef DEBUG_PUSH
8166 xmlGenericError(xmlGenericErrorContext,
8167 "PP: Parsing Reference\n");
8168#endif
8169 xmlParseReference(ctxt);
8170 } else {
8171 /* TODO Avoid the extra copy, handle directly !!! */
8172 /*
8173 * Goal of the following test is:
8174 * - minimize calls to the SAX 'character' callback
8175 * when they are mergeable
8176 * - handle an problem for isBlank when we only parse
8177 * a sequence of blank chars and the next one is
8178 * not available to check against '<' presence.
8179 * - tries to homogenize the differences in SAX
8180 * callbacks beween the push and pull versions
8181 * of the parser.
8182 */
8183 if ((ctxt->inputNr == 1) &&
8184 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8185 if ((!terminate) &&
8186 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8187 goto done;
8188 }
8189 ctxt->checkIndex = 0;
8190#ifdef DEBUG_PUSH
8191 xmlGenericError(xmlGenericErrorContext,
8192 "PP: Parsing char data\n");
8193#endif
8194 xmlParseCharData(ctxt, 0);
8195 }
8196 /*
8197 * Pop-up of finished entities.
8198 */
8199 while ((RAW == 0) && (ctxt->inputNr > 1))
8200 xmlPopInput(ctxt);
8201 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8202 (tok == ctxt->token)) {
8203 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8205 ctxt->sax->error(ctxt->userData,
8206 "detected an error in element content\n");
8207 ctxt->wellFormed = 0;
8208 ctxt->disableSAX = 1;
8209 ctxt->instate = XML_PARSER_EOF;
8210 break;
8211 }
8212 break;
8213 }
8214 case XML_PARSER_CDATA_SECTION: {
8215 /*
8216 * The Push mode need to have the SAX callback for
8217 * cdataBlock merge back contiguous callbacks.
8218 */
8219 int base;
8220
8221 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8222 if (base < 0) {
8223 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8224 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8225 if (ctxt->sax->cdataBlock != NULL)
8226 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8227 XML_PARSER_BIG_BUFFER_SIZE);
8228 }
8229 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8230 ctxt->checkIndex = 0;
8231 }
8232 goto done;
8233 } else {
8234 if ((ctxt->sax != NULL) && (base > 0) &&
8235 (!ctxt->disableSAX)) {
8236 if (ctxt->sax->cdataBlock != NULL)
8237 ctxt->sax->cdataBlock(ctxt->userData,
8238 ctxt->input->cur, base);
8239 }
8240 SKIP(base + 3);
8241 ctxt->checkIndex = 0;
8242 ctxt->instate = XML_PARSER_CONTENT;
8243#ifdef DEBUG_PUSH
8244 xmlGenericError(xmlGenericErrorContext,
8245 "PP: entering CONTENT\n");
8246#endif
8247 }
8248 break;
8249 }
8250 case XML_PARSER_END_TAG:
8251 if (avail < 2)
8252 goto done;
8253 if ((!terminate) &&
8254 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8255 goto done;
8256 xmlParseEndTag(ctxt);
8257 if (ctxt->name == NULL) {
8258 ctxt->instate = XML_PARSER_EPILOG;
8259#ifdef DEBUG_PUSH
8260 xmlGenericError(xmlGenericErrorContext,
8261 "PP: entering EPILOG\n");
8262#endif
8263 } else {
8264 ctxt->instate = XML_PARSER_CONTENT;
8265#ifdef DEBUG_PUSH
8266 xmlGenericError(xmlGenericErrorContext,
8267 "PP: entering CONTENT\n");
8268#endif
8269 }
8270 break;
8271 case XML_PARSER_DTD: {
8272 /*
8273 * Sorry but progressive parsing of the internal subset
8274 * is not expected to be supported. We first check that
8275 * the full content of the internal subset is available and
8276 * the parsing is launched only at that point.
8277 * Internal subset ends up with "']' S? '>'" in an unescaped
8278 * section and not in a ']]>' sequence which are conditional
8279 * sections (whoever argued to keep that crap in XML deserve
8280 * a place in hell !).
8281 */
8282 int base, i;
8283 xmlChar *buf;
8284 xmlChar quote = 0;
8285
8286 base = ctxt->input->cur - ctxt->input->base;
8287 if (base < 0) return(0);
8288 if (ctxt->checkIndex > base)
8289 base = ctxt->checkIndex;
8290 buf = ctxt->input->buf->buffer->content;
8291 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8292 base++) {
8293 if (quote != 0) {
8294 if (buf[base] == quote)
8295 quote = 0;
8296 continue;
8297 }
8298 if (buf[base] == '"') {
8299 quote = '"';
8300 continue;
8301 }
8302 if (buf[base] == '\'') {
8303 quote = '\'';
8304 continue;
8305 }
8306 if (buf[base] == ']') {
8307 if ((unsigned int) base +1 >=
8308 ctxt->input->buf->buffer->use)
8309 break;
8310 if (buf[base + 1] == ']') {
8311 /* conditional crap, skip both ']' ! */
8312 base++;
8313 continue;
8314 }
8315 for (i = 0;
8316 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8317 i++) {
8318 if (buf[base + i] == '>')
8319 goto found_end_int_subset;
8320 }
8321 break;
8322 }
8323 }
8324 /*
8325 * We didn't found the end of the Internal subset
8326 */
8327 if (quote == 0)
8328 ctxt->checkIndex = base;
8329#ifdef DEBUG_PUSH
8330 if (next == 0)
8331 xmlGenericError(xmlGenericErrorContext,
8332 "PP: lookup of int subset end filed\n");
8333#endif
8334 goto done;
8335
8336found_end_int_subset:
8337 xmlParseInternalSubset(ctxt);
8338 ctxt->inSubset = 2;
8339 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8340 (ctxt->sax->externalSubset != NULL))
8341 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8342 ctxt->extSubSystem, ctxt->extSubURI);
8343 ctxt->inSubset = 0;
8344 ctxt->instate = XML_PARSER_PROLOG;
8345 ctxt->checkIndex = 0;
8346#ifdef DEBUG_PUSH
8347 xmlGenericError(xmlGenericErrorContext,
8348 "PP: entering PROLOG\n");
8349#endif
8350 break;
8351 }
8352 case XML_PARSER_COMMENT:
8353 xmlGenericError(xmlGenericErrorContext,
8354 "PP: internal error, state == COMMENT\n");
8355 ctxt->instate = XML_PARSER_CONTENT;
8356#ifdef DEBUG_PUSH
8357 xmlGenericError(xmlGenericErrorContext,
8358 "PP: entering CONTENT\n");
8359#endif
8360 break;
8361 case XML_PARSER_PI:
8362 xmlGenericError(xmlGenericErrorContext,
8363 "PP: internal error, state == PI\n");
8364 ctxt->instate = XML_PARSER_CONTENT;
8365#ifdef DEBUG_PUSH
8366 xmlGenericError(xmlGenericErrorContext,
8367 "PP: entering CONTENT\n");
8368#endif
8369 break;
8370 case XML_PARSER_ENTITY_DECL:
8371 xmlGenericError(xmlGenericErrorContext,
8372 "PP: internal error, state == ENTITY_DECL\n");
8373 ctxt->instate = XML_PARSER_DTD;
8374#ifdef DEBUG_PUSH
8375 xmlGenericError(xmlGenericErrorContext,
8376 "PP: entering DTD\n");
8377#endif
8378 break;
8379 case XML_PARSER_ENTITY_VALUE:
8380 xmlGenericError(xmlGenericErrorContext,
8381 "PP: internal error, state == ENTITY_VALUE\n");
8382 ctxt->instate = XML_PARSER_CONTENT;
8383#ifdef DEBUG_PUSH
8384 xmlGenericError(xmlGenericErrorContext,
8385 "PP: entering DTD\n");
8386#endif
8387 break;
8388 case XML_PARSER_ATTRIBUTE_VALUE:
8389 xmlGenericError(xmlGenericErrorContext,
8390 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8391 ctxt->instate = XML_PARSER_START_TAG;
8392#ifdef DEBUG_PUSH
8393 xmlGenericError(xmlGenericErrorContext,
8394 "PP: entering START_TAG\n");
8395#endif
8396 break;
8397 case XML_PARSER_SYSTEM_LITERAL:
8398 xmlGenericError(xmlGenericErrorContext,
8399 "PP: internal error, state == SYSTEM_LITERAL\n");
8400 ctxt->instate = XML_PARSER_START_TAG;
8401#ifdef DEBUG_PUSH
8402 xmlGenericError(xmlGenericErrorContext,
8403 "PP: entering START_TAG\n");
8404#endif
8405 break;
8406 }
8407 }
8408done:
8409#ifdef DEBUG_PUSH
8410 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8411#endif
8412 return(ret);
8413}
8414
8415/**
Owen Taylor3473f882001-02-23 17:55:21 +00008416 * xmlParseChunk:
8417 * @ctxt: an XML parser context
8418 * @chunk: an char array
8419 * @size: the size in byte of the chunk
8420 * @terminate: last chunk indicator
8421 *
8422 * Parse a Chunk of memory
8423 *
8424 * Returns zero if no error, the xmlParserErrors otherwise.
8425 */
8426int
8427xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8428 int terminate) {
8429 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8430 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8431 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8432 int cur = ctxt->input->cur - ctxt->input->base;
8433
8434 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8435 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8436 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008437 ctxt->input->end =
8438 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008439#ifdef DEBUG_PUSH
8440 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8441#endif
8442
8443 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8444 xmlParseTryOrFinish(ctxt, terminate);
8445 } else if (ctxt->instate != XML_PARSER_EOF) {
8446 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8447 xmlParserInputBufferPtr in = ctxt->input->buf;
8448 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8449 (in->raw != NULL)) {
8450 int nbchars;
8451
8452 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8453 if (nbchars < 0) {
8454 xmlGenericError(xmlGenericErrorContext,
8455 "xmlParseChunk: encoder error\n");
8456 return(XML_ERR_INVALID_ENCODING);
8457 }
8458 }
8459 }
8460 }
8461 xmlParseTryOrFinish(ctxt, terminate);
8462 if (terminate) {
8463 /*
8464 * Check for termination
8465 */
8466 if ((ctxt->instate != XML_PARSER_EOF) &&
8467 (ctxt->instate != XML_PARSER_EPILOG)) {
8468 ctxt->errNo = XML_ERR_DOCUMENT_END;
8469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8470 ctxt->sax->error(ctxt->userData,
8471 "Extra content at the end of the document\n");
8472 ctxt->wellFormed = 0;
8473 ctxt->disableSAX = 1;
8474 }
8475 if (ctxt->instate != XML_PARSER_EOF) {
8476 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8477 (!ctxt->disableSAX))
8478 ctxt->sax->endDocument(ctxt->userData);
8479 }
8480 ctxt->instate = XML_PARSER_EOF;
8481 }
8482 return((xmlParserErrors) ctxt->errNo);
8483}
8484
8485/************************************************************************
8486 * *
8487 * I/O front end functions to the parser *
8488 * *
8489 ************************************************************************/
8490
8491/**
8492 * xmlStopParser:
8493 * @ctxt: an XML parser context
8494 *
8495 * Blocks further parser processing
8496 */
8497void
8498xmlStopParser(xmlParserCtxtPtr ctxt) {
8499 ctxt->instate = XML_PARSER_EOF;
8500 if (ctxt->input != NULL)
8501 ctxt->input->cur = BAD_CAST"";
8502}
8503
8504/**
8505 * xmlCreatePushParserCtxt:
8506 * @sax: a SAX handler
8507 * @user_data: The user data returned on SAX callbacks
8508 * @chunk: a pointer to an array of chars
8509 * @size: number of chars in the array
8510 * @filename: an optional file name or URI
8511 *
8512 * Create a parser context for using the XML parser in push mode
8513 * To allow content encoding detection, @size should be >= 4
8514 * The value of @filename is used for fetching external entities
8515 * and error/warning reports.
8516 *
8517 * Returns the new parser context or NULL
8518 */
8519xmlParserCtxtPtr
8520xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8521 const char *chunk, int size, const char *filename) {
8522 xmlParserCtxtPtr ctxt;
8523 xmlParserInputPtr inputStream;
8524 xmlParserInputBufferPtr buf;
8525 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8526
8527 /*
8528 * plug some encoding conversion routines
8529 */
8530 if ((chunk != NULL) && (size >= 4))
8531 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8532
8533 buf = xmlAllocParserInputBuffer(enc);
8534 if (buf == NULL) return(NULL);
8535
8536 ctxt = xmlNewParserCtxt();
8537 if (ctxt == NULL) {
8538 xmlFree(buf);
8539 return(NULL);
8540 }
8541 if (sax != NULL) {
8542 if (ctxt->sax != &xmlDefaultSAXHandler)
8543 xmlFree(ctxt->sax);
8544 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8545 if (ctxt->sax == NULL) {
8546 xmlFree(buf);
8547 xmlFree(ctxt);
8548 return(NULL);
8549 }
8550 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8551 if (user_data != NULL)
8552 ctxt->userData = user_data;
8553 }
8554 if (filename == NULL) {
8555 ctxt->directory = NULL;
8556 } else {
8557 ctxt->directory = xmlParserGetDirectory(filename);
8558 }
8559
8560 inputStream = xmlNewInputStream(ctxt);
8561 if (inputStream == NULL) {
8562 xmlFreeParserCtxt(ctxt);
8563 return(NULL);
8564 }
8565
8566 if (filename == NULL)
8567 inputStream->filename = NULL;
8568 else
8569 inputStream->filename = xmlMemStrdup(filename);
8570 inputStream->buf = buf;
8571 inputStream->base = inputStream->buf->buffer->content;
8572 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008573 inputStream->end =
8574 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008575
8576 inputPush(ctxt, inputStream);
8577
8578 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8579 (ctxt->input->buf != NULL)) {
8580 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8581#ifdef DEBUG_PUSH
8582 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8583#endif
8584 }
8585
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008586 if (enc != XML_CHAR_ENCODING_NONE) {
8587 xmlSwitchEncoding(ctxt, enc);
8588 }
8589
Owen Taylor3473f882001-02-23 17:55:21 +00008590 return(ctxt);
8591}
8592
8593/**
8594 * xmlCreateIOParserCtxt:
8595 * @sax: a SAX handler
8596 * @user_data: The user data returned on SAX callbacks
8597 * @ioread: an I/O read function
8598 * @ioclose: an I/O close function
8599 * @ioctx: an I/O handler
8600 * @enc: the charset encoding if known
8601 *
8602 * Create a parser context for using the XML parser with an existing
8603 * I/O stream
8604 *
8605 * Returns the new parser context or NULL
8606 */
8607xmlParserCtxtPtr
8608xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8609 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8610 void *ioctx, xmlCharEncoding enc) {
8611 xmlParserCtxtPtr ctxt;
8612 xmlParserInputPtr inputStream;
8613 xmlParserInputBufferPtr buf;
8614
8615 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8616 if (buf == NULL) return(NULL);
8617
8618 ctxt = xmlNewParserCtxt();
8619 if (ctxt == NULL) {
8620 xmlFree(buf);
8621 return(NULL);
8622 }
8623 if (sax != NULL) {
8624 if (ctxt->sax != &xmlDefaultSAXHandler)
8625 xmlFree(ctxt->sax);
8626 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8627 if (ctxt->sax == NULL) {
8628 xmlFree(buf);
8629 xmlFree(ctxt);
8630 return(NULL);
8631 }
8632 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8633 if (user_data != NULL)
8634 ctxt->userData = user_data;
8635 }
8636
8637 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8638 if (inputStream == NULL) {
8639 xmlFreeParserCtxt(ctxt);
8640 return(NULL);
8641 }
8642 inputPush(ctxt, inputStream);
8643
8644 return(ctxt);
8645}
8646
8647/************************************************************************
8648 * *
8649 * Front ends when parsing a Dtd *
8650 * *
8651 ************************************************************************/
8652
8653/**
8654 * xmlIOParseDTD:
8655 * @sax: the SAX handler block or NULL
8656 * @input: an Input Buffer
8657 * @enc: the charset encoding if known
8658 *
8659 * Load and parse a DTD
8660 *
8661 * Returns the resulting xmlDtdPtr or NULL in case of error.
8662 * @input will be freed at parsing end.
8663 */
8664
8665xmlDtdPtr
8666xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8667 xmlCharEncoding enc) {
8668 xmlDtdPtr ret = NULL;
8669 xmlParserCtxtPtr ctxt;
8670 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008671 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008672
8673 if (input == NULL)
8674 return(NULL);
8675
8676 ctxt = xmlNewParserCtxt();
8677 if (ctxt == NULL) {
8678 return(NULL);
8679 }
8680
8681 /*
8682 * Set-up the SAX context
8683 */
8684 if (sax != NULL) {
8685 if (ctxt->sax != NULL)
8686 xmlFree(ctxt->sax);
8687 ctxt->sax = sax;
8688 ctxt->userData = NULL;
8689 }
8690
8691 /*
8692 * generate a parser input from the I/O handler
8693 */
8694
8695 pinput = xmlNewIOInputStream(ctxt, input, enc);
8696 if (pinput == NULL) {
8697 if (sax != NULL) ctxt->sax = NULL;
8698 xmlFreeParserCtxt(ctxt);
8699 return(NULL);
8700 }
8701
8702 /*
8703 * plug some encoding conversion routines here.
8704 */
8705 xmlPushInput(ctxt, pinput);
8706
8707 pinput->filename = NULL;
8708 pinput->line = 1;
8709 pinput->col = 1;
8710 pinput->base = ctxt->input->cur;
8711 pinput->cur = ctxt->input->cur;
8712 pinput->free = NULL;
8713
8714 /*
8715 * let's parse that entity knowing it's an external subset.
8716 */
8717 ctxt->inSubset = 2;
8718 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8719 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8720 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008721
8722 if (enc == XML_CHAR_ENCODING_NONE) {
8723 /*
8724 * Get the 4 first bytes and decode the charset
8725 * if enc != XML_CHAR_ENCODING_NONE
8726 * plug some encoding conversion routines.
8727 */
8728 start[0] = RAW;
8729 start[1] = NXT(1);
8730 start[2] = NXT(2);
8731 start[3] = NXT(3);
8732 enc = xmlDetectCharEncoding(start, 4);
8733 if (enc != XML_CHAR_ENCODING_NONE) {
8734 xmlSwitchEncoding(ctxt, enc);
8735 }
8736 }
8737
Owen Taylor3473f882001-02-23 17:55:21 +00008738 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8739
8740 if (ctxt->myDoc != NULL) {
8741 if (ctxt->wellFormed) {
8742 ret = ctxt->myDoc->extSubset;
8743 ctxt->myDoc->extSubset = NULL;
8744 } else {
8745 ret = NULL;
8746 }
8747 xmlFreeDoc(ctxt->myDoc);
8748 ctxt->myDoc = NULL;
8749 }
8750 if (sax != NULL) ctxt->sax = NULL;
8751 xmlFreeParserCtxt(ctxt);
8752
8753 return(ret);
8754}
8755
8756/**
8757 * xmlSAXParseDTD:
8758 * @sax: the SAX handler block
8759 * @ExternalID: a NAME* containing the External ID of the DTD
8760 * @SystemID: a NAME* containing the URL to the DTD
8761 *
8762 * Load and parse an external subset.
8763 *
8764 * Returns the resulting xmlDtdPtr or NULL in case of error.
8765 */
8766
8767xmlDtdPtr
8768xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8769 const xmlChar *SystemID) {
8770 xmlDtdPtr ret = NULL;
8771 xmlParserCtxtPtr ctxt;
8772 xmlParserInputPtr input = NULL;
8773 xmlCharEncoding enc;
8774
8775 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8776
8777 ctxt = xmlNewParserCtxt();
8778 if (ctxt == NULL) {
8779 return(NULL);
8780 }
8781
8782 /*
8783 * Set-up the SAX context
8784 */
8785 if (sax != NULL) {
8786 if (ctxt->sax != NULL)
8787 xmlFree(ctxt->sax);
8788 ctxt->sax = sax;
8789 ctxt->userData = NULL;
8790 }
8791
8792 /*
8793 * Ask the Entity resolver to load the damn thing
8794 */
8795
8796 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8797 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8798 if (input == NULL) {
8799 if (sax != NULL) ctxt->sax = NULL;
8800 xmlFreeParserCtxt(ctxt);
8801 return(NULL);
8802 }
8803
8804 /*
8805 * plug some encoding conversion routines here.
8806 */
8807 xmlPushInput(ctxt, input);
8808 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8809 xmlSwitchEncoding(ctxt, enc);
8810
8811 if (input->filename == NULL)
8812 input->filename = (char *) xmlStrdup(SystemID);
8813 input->line = 1;
8814 input->col = 1;
8815 input->base = ctxt->input->cur;
8816 input->cur = ctxt->input->cur;
8817 input->free = NULL;
8818
8819 /*
8820 * let's parse that entity knowing it's an external subset.
8821 */
8822 ctxt->inSubset = 2;
8823 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8824 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8825 ExternalID, SystemID);
8826 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8827
8828 if (ctxt->myDoc != NULL) {
8829 if (ctxt->wellFormed) {
8830 ret = ctxt->myDoc->extSubset;
8831 ctxt->myDoc->extSubset = NULL;
8832 } else {
8833 ret = NULL;
8834 }
8835 xmlFreeDoc(ctxt->myDoc);
8836 ctxt->myDoc = NULL;
8837 }
8838 if (sax != NULL) ctxt->sax = NULL;
8839 xmlFreeParserCtxt(ctxt);
8840
8841 return(ret);
8842}
8843
8844/**
8845 * xmlParseDTD:
8846 * @ExternalID: a NAME* containing the External ID of the DTD
8847 * @SystemID: a NAME* containing the URL to the DTD
8848 *
8849 * Load and parse an external subset.
8850 *
8851 * Returns the resulting xmlDtdPtr or NULL in case of error.
8852 */
8853
8854xmlDtdPtr
8855xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8856 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8857}
8858
8859/************************************************************************
8860 * *
8861 * Front ends when parsing an Entity *
8862 * *
8863 ************************************************************************/
8864
8865/**
Owen Taylor3473f882001-02-23 17:55:21 +00008866 * xmlParseCtxtExternalEntity:
8867 * @ctx: the existing parsing context
8868 * @URL: the URL for the entity to load
8869 * @ID: the System ID for the entity to load
8870 * @list: the return value for the set of parsed nodes
8871 *
8872 * Parse an external general entity within an existing parsing context
8873 * An external general parsed entity is well-formed if it matches the
8874 * production labeled extParsedEnt.
8875 *
8876 * [78] extParsedEnt ::= TextDecl? content
8877 *
8878 * Returns 0 if the entity is well formed, -1 in case of args problem and
8879 * the parser error code otherwise
8880 */
8881
8882int
8883xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8884 const xmlChar *ID, xmlNodePtr *list) {
8885 xmlParserCtxtPtr ctxt;
8886 xmlDocPtr newDoc;
8887 xmlSAXHandlerPtr oldsax = NULL;
8888 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008889 xmlChar start[4];
8890 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00008891
8892 if (ctx->depth > 40) {
8893 return(XML_ERR_ENTITY_LOOP);
8894 }
8895
8896 if (list != NULL)
8897 *list = NULL;
8898 if ((URL == NULL) && (ID == NULL))
8899 return(-1);
8900 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8901 return(-1);
8902
8903
8904 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8905 if (ctxt == NULL) return(-1);
8906 ctxt->userData = ctxt;
8907 oldsax = ctxt->sax;
8908 ctxt->sax = ctx->sax;
8909 newDoc = xmlNewDoc(BAD_CAST "1.0");
8910 if (newDoc == NULL) {
8911 xmlFreeParserCtxt(ctxt);
8912 return(-1);
8913 }
8914 if (ctx->myDoc != NULL) {
8915 newDoc->intSubset = ctx->myDoc->intSubset;
8916 newDoc->extSubset = ctx->myDoc->extSubset;
8917 }
8918 if (ctx->myDoc->URL != NULL) {
8919 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8920 }
8921 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8922 if (newDoc->children == NULL) {
8923 ctxt->sax = oldsax;
8924 xmlFreeParserCtxt(ctxt);
8925 newDoc->intSubset = NULL;
8926 newDoc->extSubset = NULL;
8927 xmlFreeDoc(newDoc);
8928 return(-1);
8929 }
8930 nodePush(ctxt, newDoc->children);
8931 if (ctx->myDoc == NULL) {
8932 ctxt->myDoc = newDoc;
8933 } else {
8934 ctxt->myDoc = ctx->myDoc;
8935 newDoc->children->doc = ctx->myDoc;
8936 }
8937
Daniel Veillard87a764e2001-06-20 17:41:10 +00008938 /*
8939 * Get the 4 first bytes and decode the charset
8940 * if enc != XML_CHAR_ENCODING_NONE
8941 * plug some encoding conversion routines.
8942 */
8943 GROW
8944 start[0] = RAW;
8945 start[1] = NXT(1);
8946 start[2] = NXT(2);
8947 start[3] = NXT(3);
8948 enc = xmlDetectCharEncoding(start, 4);
8949 if (enc != XML_CHAR_ENCODING_NONE) {
8950 xmlSwitchEncoding(ctxt, enc);
8951 }
8952
Owen Taylor3473f882001-02-23 17:55:21 +00008953 /*
8954 * Parse a possible text declaration first
8955 */
Owen Taylor3473f882001-02-23 17:55:21 +00008956 if ((RAW == '<') && (NXT(1) == '?') &&
8957 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8958 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8959 xmlParseTextDecl(ctxt);
8960 }
8961
8962 /*
8963 * Doing validity checking on chunk doesn't make sense
8964 */
8965 ctxt->instate = XML_PARSER_CONTENT;
8966 ctxt->validate = ctx->validate;
8967 ctxt->loadsubset = ctx->loadsubset;
8968 ctxt->depth = ctx->depth + 1;
8969 ctxt->replaceEntities = ctx->replaceEntities;
8970 if (ctxt->validate) {
8971 ctxt->vctxt.error = ctx->vctxt.error;
8972 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00008973 } else {
8974 ctxt->vctxt.error = NULL;
8975 ctxt->vctxt.warning = NULL;
8976 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00008977 ctxt->vctxt.nodeTab = NULL;
8978 ctxt->vctxt.nodeNr = 0;
8979 ctxt->vctxt.nodeMax = 0;
8980 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008981
8982 xmlParseContent(ctxt);
8983
8984 if ((RAW == '<') && (NXT(1) == '/')) {
8985 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8987 ctxt->sax->error(ctxt->userData,
8988 "chunk is not well balanced\n");
8989 ctxt->wellFormed = 0;
8990 ctxt->disableSAX = 1;
8991 } else if (RAW != 0) {
8992 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8994 ctxt->sax->error(ctxt->userData,
8995 "extra content at the end of well balanced chunk\n");
8996 ctxt->wellFormed = 0;
8997 ctxt->disableSAX = 1;
8998 }
8999 if (ctxt->node != newDoc->children) {
9000 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9002 ctxt->sax->error(ctxt->userData,
9003 "chunk is not well balanced\n");
9004 ctxt->wellFormed = 0;
9005 ctxt->disableSAX = 1;
9006 }
9007
9008 if (!ctxt->wellFormed) {
9009 if (ctxt->errNo == 0)
9010 ret = 1;
9011 else
9012 ret = ctxt->errNo;
9013 } else {
9014 if (list != NULL) {
9015 xmlNodePtr cur;
9016
9017 /*
9018 * Return the newly created nodeset after unlinking it from
9019 * they pseudo parent.
9020 */
9021 cur = newDoc->children->children;
9022 *list = cur;
9023 while (cur != NULL) {
9024 cur->parent = NULL;
9025 cur = cur->next;
9026 }
9027 newDoc->children->children = NULL;
9028 }
9029 ret = 0;
9030 }
9031 ctxt->sax = oldsax;
9032 xmlFreeParserCtxt(ctxt);
9033 newDoc->intSubset = NULL;
9034 newDoc->extSubset = NULL;
9035 xmlFreeDoc(newDoc);
9036
9037 return(ret);
9038}
9039
9040/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009041 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009042 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009043 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009044 * @sax: the SAX handler bloc (possibly NULL)
9045 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9046 * @depth: Used for loop detection, use 0
9047 * @URL: the URL for the entity to load
9048 * @ID: the System ID for the entity to load
9049 * @list: the return value for the set of parsed nodes
9050 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009051 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009052 *
9053 * Returns 0 if the entity is well formed, -1 in case of args problem and
9054 * the parser error code otherwise
9055 */
9056
Daniel Veillard257d9102001-05-08 10:41:44 +00009057static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009058xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9059 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009060 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009061 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009062 xmlParserCtxtPtr ctxt;
9063 xmlDocPtr newDoc;
9064 xmlSAXHandlerPtr oldsax = NULL;
9065 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009066 xmlChar start[4];
9067 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009068
9069 if (depth > 40) {
9070 return(XML_ERR_ENTITY_LOOP);
9071 }
9072
9073
9074
9075 if (list != NULL)
9076 *list = NULL;
9077 if ((URL == NULL) && (ID == NULL))
9078 return(-1);
9079 if (doc == NULL) /* @@ relax but check for dereferences */
9080 return(-1);
9081
9082
9083 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9084 if (ctxt == NULL) return(-1);
9085 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009086 if (oldctxt != NULL) {
9087 ctxt->_private = oldctxt->_private;
9088 ctxt->loadsubset = oldctxt->loadsubset;
9089 ctxt->validate = oldctxt->validate;
9090 ctxt->external = oldctxt->external;
9091 } else {
9092 /*
9093 * Doing validity checking on chunk without context
9094 * doesn't make sense
9095 */
9096 ctxt->_private = NULL;
9097 ctxt->validate = 0;
9098 ctxt->external = 2;
9099 ctxt->loadsubset = 0;
9100 }
Owen Taylor3473f882001-02-23 17:55:21 +00009101 if (sax != NULL) {
9102 oldsax = ctxt->sax;
9103 ctxt->sax = sax;
9104 if (user_data != NULL)
9105 ctxt->userData = user_data;
9106 }
9107 newDoc = xmlNewDoc(BAD_CAST "1.0");
9108 if (newDoc == NULL) {
9109 xmlFreeParserCtxt(ctxt);
9110 return(-1);
9111 }
9112 if (doc != NULL) {
9113 newDoc->intSubset = doc->intSubset;
9114 newDoc->extSubset = doc->extSubset;
9115 }
9116 if (doc->URL != NULL) {
9117 newDoc->URL = xmlStrdup(doc->URL);
9118 }
9119 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9120 if (newDoc->children == NULL) {
9121 if (sax != NULL)
9122 ctxt->sax = oldsax;
9123 xmlFreeParserCtxt(ctxt);
9124 newDoc->intSubset = NULL;
9125 newDoc->extSubset = NULL;
9126 xmlFreeDoc(newDoc);
9127 return(-1);
9128 }
9129 nodePush(ctxt, newDoc->children);
9130 if (doc == NULL) {
9131 ctxt->myDoc = newDoc;
9132 } else {
9133 ctxt->myDoc = doc;
9134 newDoc->children->doc = doc;
9135 }
9136
Daniel Veillard87a764e2001-06-20 17:41:10 +00009137 /*
9138 * Get the 4 first bytes and decode the charset
9139 * if enc != XML_CHAR_ENCODING_NONE
9140 * plug some encoding conversion routines.
9141 */
9142 GROW;
9143 start[0] = RAW;
9144 start[1] = NXT(1);
9145 start[2] = NXT(2);
9146 start[3] = NXT(3);
9147 enc = xmlDetectCharEncoding(start, 4);
9148 if (enc != XML_CHAR_ENCODING_NONE) {
9149 xmlSwitchEncoding(ctxt, enc);
9150 }
9151
Owen Taylor3473f882001-02-23 17:55:21 +00009152 /*
9153 * Parse a possible text declaration first
9154 */
Owen Taylor3473f882001-02-23 17:55:21 +00009155 if ((RAW == '<') && (NXT(1) == '?') &&
9156 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9157 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9158 xmlParseTextDecl(ctxt);
9159 }
9160
Owen Taylor3473f882001-02-23 17:55:21 +00009161 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009162 ctxt->depth = depth;
9163
9164 xmlParseContent(ctxt);
9165
9166 if ((RAW == '<') && (NXT(1) == '/')) {
9167 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9168 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9169 ctxt->sax->error(ctxt->userData,
9170 "chunk is not well balanced\n");
9171 ctxt->wellFormed = 0;
9172 ctxt->disableSAX = 1;
9173 } else if (RAW != 0) {
9174 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9176 ctxt->sax->error(ctxt->userData,
9177 "extra content at the end of well balanced chunk\n");
9178 ctxt->wellFormed = 0;
9179 ctxt->disableSAX = 1;
9180 }
9181 if (ctxt->node != newDoc->children) {
9182 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9184 ctxt->sax->error(ctxt->userData,
9185 "chunk is not well balanced\n");
9186 ctxt->wellFormed = 0;
9187 ctxt->disableSAX = 1;
9188 }
9189
9190 if (!ctxt->wellFormed) {
9191 if (ctxt->errNo == 0)
9192 ret = 1;
9193 else
9194 ret = ctxt->errNo;
9195 } else {
9196 if (list != NULL) {
9197 xmlNodePtr cur;
9198
9199 /*
9200 * Return the newly created nodeset after unlinking it from
9201 * they pseudo parent.
9202 */
9203 cur = newDoc->children->children;
9204 *list = cur;
9205 while (cur != NULL) {
9206 cur->parent = NULL;
9207 cur = cur->next;
9208 }
9209 newDoc->children->children = NULL;
9210 }
9211 ret = 0;
9212 }
9213 if (sax != NULL)
9214 ctxt->sax = oldsax;
9215 xmlFreeParserCtxt(ctxt);
9216 newDoc->intSubset = NULL;
9217 newDoc->extSubset = NULL;
9218 xmlFreeDoc(newDoc);
9219
9220 return(ret);
9221}
9222
9223/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009224 * xmlParseExternalEntity:
9225 * @doc: the document the chunk pertains to
9226 * @sax: the SAX handler bloc (possibly NULL)
9227 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9228 * @depth: Used for loop detection, use 0
9229 * @URL: the URL for the entity to load
9230 * @ID: the System ID for the entity to load
9231 * @list: the return value for the set of parsed nodes
9232 *
9233 * Parse an external general entity
9234 * An external general parsed entity is well-formed if it matches the
9235 * production labeled extParsedEnt.
9236 *
9237 * [78] extParsedEnt ::= TextDecl? content
9238 *
9239 * Returns 0 if the entity is well formed, -1 in case of args problem and
9240 * the parser error code otherwise
9241 */
9242
9243int
9244xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9245 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009246 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
9247 ID, list));
Daniel Veillard257d9102001-05-08 10:41:44 +00009248}
9249
9250/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009251 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009252 * @doc: the document the chunk pertains to
9253 * @sax: the SAX handler bloc (possibly NULL)
9254 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9255 * @depth: Used for loop detection, use 0
9256 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9257 * @list: the return value for the set of parsed nodes
9258 *
9259 * Parse a well-balanced chunk of an XML document
9260 * called by the parser
9261 * The allowed sequence for the Well Balanced Chunk is the one defined by
9262 * the content production in the XML grammar:
9263 *
9264 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9265 *
9266 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9267 * the parser error code otherwise
9268 */
9269
9270int
9271xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9272 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9273 xmlParserCtxtPtr ctxt;
9274 xmlDocPtr newDoc;
9275 xmlSAXHandlerPtr oldsax = NULL;
9276 int size;
9277 int ret = 0;
9278
9279 if (depth > 40) {
9280 return(XML_ERR_ENTITY_LOOP);
9281 }
9282
9283
9284 if (list != NULL)
9285 *list = NULL;
9286 if (string == NULL)
9287 return(-1);
9288
9289 size = xmlStrlen(string);
9290
9291 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9292 if (ctxt == NULL) return(-1);
9293 ctxt->userData = ctxt;
9294 if (sax != NULL) {
9295 oldsax = ctxt->sax;
9296 ctxt->sax = sax;
9297 if (user_data != NULL)
9298 ctxt->userData = user_data;
9299 }
9300 newDoc = xmlNewDoc(BAD_CAST "1.0");
9301 if (newDoc == NULL) {
9302 xmlFreeParserCtxt(ctxt);
9303 return(-1);
9304 }
9305 if (doc != NULL) {
9306 newDoc->intSubset = doc->intSubset;
9307 newDoc->extSubset = doc->extSubset;
9308 }
9309 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9310 if (newDoc->children == NULL) {
9311 if (sax != NULL)
9312 ctxt->sax = oldsax;
9313 xmlFreeParserCtxt(ctxt);
9314 newDoc->intSubset = NULL;
9315 newDoc->extSubset = NULL;
9316 xmlFreeDoc(newDoc);
9317 return(-1);
9318 }
9319 nodePush(ctxt, newDoc->children);
9320 if (doc == NULL) {
9321 ctxt->myDoc = newDoc;
9322 } else {
9323 ctxt->myDoc = doc;
9324 newDoc->children->doc = doc;
9325 }
9326 ctxt->instate = XML_PARSER_CONTENT;
9327 ctxt->depth = depth;
9328
9329 /*
9330 * Doing validity checking on chunk doesn't make sense
9331 */
9332 ctxt->validate = 0;
9333 ctxt->loadsubset = 0;
9334
9335 xmlParseContent(ctxt);
9336
9337 if ((RAW == '<') && (NXT(1) == '/')) {
9338 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9340 ctxt->sax->error(ctxt->userData,
9341 "chunk is not well balanced\n");
9342 ctxt->wellFormed = 0;
9343 ctxt->disableSAX = 1;
9344 } else if (RAW != 0) {
9345 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9347 ctxt->sax->error(ctxt->userData,
9348 "extra content at the end of well balanced chunk\n");
9349 ctxt->wellFormed = 0;
9350 ctxt->disableSAX = 1;
9351 }
9352 if (ctxt->node != newDoc->children) {
9353 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9355 ctxt->sax->error(ctxt->userData,
9356 "chunk is not well balanced\n");
9357 ctxt->wellFormed = 0;
9358 ctxt->disableSAX = 1;
9359 }
9360
9361 if (!ctxt->wellFormed) {
9362 if (ctxt->errNo == 0)
9363 ret = 1;
9364 else
9365 ret = ctxt->errNo;
9366 } else {
9367 if (list != NULL) {
9368 xmlNodePtr cur;
9369
9370 /*
9371 * Return the newly created nodeset after unlinking it from
9372 * they pseudo parent.
9373 */
9374 cur = newDoc->children->children;
9375 *list = cur;
9376 while (cur != NULL) {
9377 cur->parent = NULL;
9378 cur = cur->next;
9379 }
9380 newDoc->children->children = NULL;
9381 }
9382 ret = 0;
9383 }
9384 if (sax != NULL)
9385 ctxt->sax = oldsax;
9386 xmlFreeParserCtxt(ctxt);
9387 newDoc->intSubset = NULL;
9388 newDoc->extSubset = NULL;
9389 xmlFreeDoc(newDoc);
9390
9391 return(ret);
9392}
9393
9394/**
9395 * xmlSAXParseEntity:
9396 * @sax: the SAX handler block
9397 * @filename: the filename
9398 *
9399 * parse an XML external entity out of context and build a tree.
9400 * It use the given SAX function block to handle the parsing callback.
9401 * If sax is NULL, fallback to the default DOM tree building routines.
9402 *
9403 * [78] extParsedEnt ::= TextDecl? content
9404 *
9405 * This correspond to a "Well Balanced" chunk
9406 *
9407 * Returns the resulting document tree
9408 */
9409
9410xmlDocPtr
9411xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9412 xmlDocPtr ret;
9413 xmlParserCtxtPtr ctxt;
9414 char *directory = NULL;
9415
9416 ctxt = xmlCreateFileParserCtxt(filename);
9417 if (ctxt == NULL) {
9418 return(NULL);
9419 }
9420 if (sax != NULL) {
9421 if (ctxt->sax != NULL)
9422 xmlFree(ctxt->sax);
9423 ctxt->sax = sax;
9424 ctxt->userData = NULL;
9425 }
9426
9427 if ((ctxt->directory == NULL) && (directory == NULL))
9428 directory = xmlParserGetDirectory(filename);
9429
9430 xmlParseExtParsedEnt(ctxt);
9431
9432 if (ctxt->wellFormed)
9433 ret = ctxt->myDoc;
9434 else {
9435 ret = NULL;
9436 xmlFreeDoc(ctxt->myDoc);
9437 ctxt->myDoc = NULL;
9438 }
9439 if (sax != NULL)
9440 ctxt->sax = NULL;
9441 xmlFreeParserCtxt(ctxt);
9442
9443 return(ret);
9444}
9445
9446/**
9447 * xmlParseEntity:
9448 * @filename: the filename
9449 *
9450 * parse an XML external entity out of context and build a tree.
9451 *
9452 * [78] extParsedEnt ::= TextDecl? content
9453 *
9454 * This correspond to a "Well Balanced" chunk
9455 *
9456 * Returns the resulting document tree
9457 */
9458
9459xmlDocPtr
9460xmlParseEntity(const char *filename) {
9461 return(xmlSAXParseEntity(NULL, filename));
9462}
9463
9464/**
9465 * xmlCreateEntityParserCtxt:
9466 * @URL: the entity URL
9467 * @ID: the entity PUBLIC ID
9468 * @base: a posible base for the target URI
9469 *
9470 * Create a parser context for an external entity
9471 * Automatic support for ZLIB/Compress compressed document is provided
9472 * by default if found at compile-time.
9473 *
9474 * Returns the new parser context or NULL
9475 */
9476xmlParserCtxtPtr
9477xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9478 const xmlChar *base) {
9479 xmlParserCtxtPtr ctxt;
9480 xmlParserInputPtr inputStream;
9481 char *directory = NULL;
9482 xmlChar *uri;
9483
9484 ctxt = xmlNewParserCtxt();
9485 if (ctxt == NULL) {
9486 return(NULL);
9487 }
9488
9489 uri = xmlBuildURI(URL, base);
9490
9491 if (uri == NULL) {
9492 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9493 if (inputStream == NULL) {
9494 xmlFreeParserCtxt(ctxt);
9495 return(NULL);
9496 }
9497
9498 inputPush(ctxt, inputStream);
9499
9500 if ((ctxt->directory == NULL) && (directory == NULL))
9501 directory = xmlParserGetDirectory((char *)URL);
9502 if ((ctxt->directory == NULL) && (directory != NULL))
9503 ctxt->directory = directory;
9504 } else {
9505 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9506 if (inputStream == NULL) {
9507 xmlFree(uri);
9508 xmlFreeParserCtxt(ctxt);
9509 return(NULL);
9510 }
9511
9512 inputPush(ctxt, inputStream);
9513
9514 if ((ctxt->directory == NULL) && (directory == NULL))
9515 directory = xmlParserGetDirectory((char *)uri);
9516 if ((ctxt->directory == NULL) && (directory != NULL))
9517 ctxt->directory = directory;
9518 xmlFree(uri);
9519 }
9520
9521 return(ctxt);
9522}
9523
9524/************************************************************************
9525 * *
9526 * Front ends when parsing from a file *
9527 * *
9528 ************************************************************************/
9529
9530/**
9531 * xmlCreateFileParserCtxt:
9532 * @filename: the filename
9533 *
9534 * Create a parser context for a file content.
9535 * Automatic support for ZLIB/Compress compressed document is provided
9536 * by default if found at compile-time.
9537 *
9538 * Returns the new parser context or NULL
9539 */
9540xmlParserCtxtPtr
9541xmlCreateFileParserCtxt(const char *filename)
9542{
9543 xmlParserCtxtPtr ctxt;
9544 xmlParserInputPtr inputStream;
9545 xmlParserInputBufferPtr buf;
9546 char *directory = NULL;
9547
9548 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9549 if (buf == NULL) {
9550 return(NULL);
9551 }
9552
9553 ctxt = xmlNewParserCtxt();
9554 if (ctxt == NULL) {
9555 if (xmlDefaultSAXHandler.error != NULL) {
9556 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9557 }
9558 return(NULL);
9559 }
9560
9561 inputStream = xmlNewInputStream(ctxt);
9562 if (inputStream == NULL) {
9563 xmlFreeParserCtxt(ctxt);
9564 return(NULL);
9565 }
9566
9567 inputStream->filename = xmlMemStrdup(filename);
9568 inputStream->buf = buf;
9569 inputStream->base = inputStream->buf->buffer->content;
9570 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009571 inputStream->end =
9572 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009573
9574 inputPush(ctxt, inputStream);
9575 if ((ctxt->directory == NULL) && (directory == NULL))
9576 directory = xmlParserGetDirectory(filename);
9577 if ((ctxt->directory == NULL) && (directory != NULL))
9578 ctxt->directory = directory;
9579
9580 return(ctxt);
9581}
9582
9583/**
9584 * xmlSAXParseFile:
9585 * @sax: the SAX handler block
9586 * @filename: the filename
9587 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9588 * documents
9589 *
9590 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9591 * compressed document is provided by default if found at compile-time.
9592 * It use the given SAX function block to handle the parsing callback.
9593 * If sax is NULL, fallback to the default DOM tree building routines.
9594 *
9595 * Returns the resulting document tree
9596 */
9597
9598xmlDocPtr
9599xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9600 int recovery) {
9601 xmlDocPtr ret;
9602 xmlParserCtxtPtr ctxt;
9603 char *directory = NULL;
9604
9605 ctxt = xmlCreateFileParserCtxt(filename);
9606 if (ctxt == NULL) {
9607 return(NULL);
9608 }
9609 if (sax != NULL) {
9610 if (ctxt->sax != NULL)
9611 xmlFree(ctxt->sax);
9612 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009613 }
9614
9615 if ((ctxt->directory == NULL) && (directory == NULL))
9616 directory = xmlParserGetDirectory(filename);
9617 if ((ctxt->directory == NULL) && (directory != NULL))
9618 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9619
9620 xmlParseDocument(ctxt);
9621
9622 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9623 else {
9624 ret = NULL;
9625 xmlFreeDoc(ctxt->myDoc);
9626 ctxt->myDoc = NULL;
9627 }
9628 if (sax != NULL)
9629 ctxt->sax = NULL;
9630 xmlFreeParserCtxt(ctxt);
9631
9632 return(ret);
9633}
9634
9635/**
9636 * xmlRecoverDoc:
9637 * @cur: a pointer to an array of xmlChar
9638 *
9639 * parse an XML in-memory document and build a tree.
9640 * In the case the document is not Well Formed, a tree is built anyway
9641 *
9642 * Returns the resulting document tree
9643 */
9644
9645xmlDocPtr
9646xmlRecoverDoc(xmlChar *cur) {
9647 return(xmlSAXParseDoc(NULL, cur, 1));
9648}
9649
9650/**
9651 * xmlParseFile:
9652 * @filename: the filename
9653 *
9654 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9655 * compressed document is provided by default if found at compile-time.
9656 *
9657 * Returns the resulting document tree
9658 */
9659
9660xmlDocPtr
9661xmlParseFile(const char *filename) {
9662 return(xmlSAXParseFile(NULL, filename, 0));
9663}
9664
9665/**
9666 * xmlRecoverFile:
9667 * @filename: the filename
9668 *
9669 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9670 * compressed document is provided by default if found at compile-time.
9671 * In the case the document is not Well Formed, a tree is built anyway
9672 *
9673 * Returns the resulting document tree
9674 */
9675
9676xmlDocPtr
9677xmlRecoverFile(const char *filename) {
9678 return(xmlSAXParseFile(NULL, filename, 1));
9679}
9680
9681
9682/**
9683 * xmlSetupParserForBuffer:
9684 * @ctxt: an XML parser context
9685 * @buffer: a xmlChar * buffer
9686 * @filename: a file name
9687 *
9688 * Setup the parser context to parse a new buffer; Clears any prior
9689 * contents from the parser context. The buffer parameter must not be
9690 * NULL, but the filename parameter can be
9691 */
9692void
9693xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9694 const char* filename)
9695{
9696 xmlParserInputPtr input;
9697
9698 input = xmlNewInputStream(ctxt);
9699 if (input == NULL) {
9700 perror("malloc");
9701 xmlFree(ctxt);
9702 return;
9703 }
9704
9705 xmlClearParserCtxt(ctxt);
9706 if (filename != NULL)
9707 input->filename = xmlMemStrdup(filename);
9708 input->base = buffer;
9709 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009710 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009711 inputPush(ctxt, input);
9712}
9713
9714/**
9715 * xmlSAXUserParseFile:
9716 * @sax: a SAX handler
9717 * @user_data: The user data returned on SAX callbacks
9718 * @filename: a file name
9719 *
9720 * parse an XML file and call the given SAX handler routines.
9721 * Automatic support for ZLIB/Compress compressed document is provided
9722 *
9723 * Returns 0 in case of success or a error number otherwise
9724 */
9725int
9726xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9727 const char *filename) {
9728 int ret = 0;
9729 xmlParserCtxtPtr ctxt;
9730
9731 ctxt = xmlCreateFileParserCtxt(filename);
9732 if (ctxt == NULL) return -1;
9733 if (ctxt->sax != &xmlDefaultSAXHandler)
9734 xmlFree(ctxt->sax);
9735 ctxt->sax = sax;
9736 if (user_data != NULL)
9737 ctxt->userData = user_data;
9738
9739 xmlParseDocument(ctxt);
9740
9741 if (ctxt->wellFormed)
9742 ret = 0;
9743 else {
9744 if (ctxt->errNo != 0)
9745 ret = ctxt->errNo;
9746 else
9747 ret = -1;
9748 }
9749 if (sax != NULL)
9750 ctxt->sax = NULL;
9751 xmlFreeParserCtxt(ctxt);
9752
9753 return ret;
9754}
9755
9756/************************************************************************
9757 * *
9758 * Front ends when parsing from memory *
9759 * *
9760 ************************************************************************/
9761
9762/**
9763 * xmlCreateMemoryParserCtxt:
9764 * @buffer: a pointer to a char array
9765 * @size: the size of the array
9766 *
9767 * Create a parser context for an XML in-memory document.
9768 *
9769 * Returns the new parser context or NULL
9770 */
9771xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009772xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009773 xmlParserCtxtPtr ctxt;
9774 xmlParserInputPtr input;
9775 xmlParserInputBufferPtr buf;
9776
9777 if (buffer == NULL)
9778 return(NULL);
9779 if (size <= 0)
9780 return(NULL);
9781
9782 ctxt = xmlNewParserCtxt();
9783 if (ctxt == NULL)
9784 return(NULL);
9785
9786 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9787 if (buf == NULL) return(NULL);
9788
9789 input = xmlNewInputStream(ctxt);
9790 if (input == NULL) {
9791 xmlFreeParserCtxt(ctxt);
9792 return(NULL);
9793 }
9794
9795 input->filename = NULL;
9796 input->buf = buf;
9797 input->base = input->buf->buffer->content;
9798 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009799 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009800
9801 inputPush(ctxt, input);
9802 return(ctxt);
9803}
9804
9805/**
9806 * xmlSAXParseMemory:
9807 * @sax: the SAX handler block
9808 * @buffer: an pointer to a char array
9809 * @size: the size of the array
9810 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9811 * documents
9812 *
9813 * parse an XML in-memory block and use the given SAX function block
9814 * to handle the parsing callback. If sax is NULL, fallback to the default
9815 * DOM tree building routines.
9816 *
9817 * Returns the resulting document tree
9818 */
9819xmlDocPtr
9820xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9821 xmlDocPtr ret;
9822 xmlParserCtxtPtr ctxt;
9823
9824 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9825 if (ctxt == NULL) return(NULL);
9826 if (sax != NULL) {
9827 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009828 }
9829
9830 xmlParseDocument(ctxt);
9831
9832 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9833 else {
9834 ret = NULL;
9835 xmlFreeDoc(ctxt->myDoc);
9836 ctxt->myDoc = NULL;
9837 }
9838 if (sax != NULL)
9839 ctxt->sax = NULL;
9840 xmlFreeParserCtxt(ctxt);
9841
9842 return(ret);
9843}
9844
9845/**
9846 * xmlParseMemory:
9847 * @buffer: an pointer to a char array
9848 * @size: the size of the array
9849 *
9850 * parse an XML in-memory block and build a tree.
9851 *
9852 * Returns the resulting document tree
9853 */
9854
9855xmlDocPtr xmlParseMemory(char *buffer, int size) {
9856 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9857}
9858
9859/**
9860 * xmlRecoverMemory:
9861 * @buffer: an pointer to a char array
9862 * @size: the size of the array
9863 *
9864 * parse an XML in-memory block and build a tree.
9865 * In the case the document is not Well Formed, a tree is built anyway
9866 *
9867 * Returns the resulting document tree
9868 */
9869
9870xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9871 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9872}
9873
9874/**
9875 * xmlSAXUserParseMemory:
9876 * @sax: a SAX handler
9877 * @user_data: The user data returned on SAX callbacks
9878 * @buffer: an in-memory XML document input
9879 * @size: the length of the XML document in bytes
9880 *
9881 * A better SAX parsing routine.
9882 * parse an XML in-memory buffer and call the given SAX handler routines.
9883 *
9884 * Returns 0 in case of success or a error number otherwise
9885 */
9886int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009887 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009888 int ret = 0;
9889 xmlParserCtxtPtr ctxt;
9890 xmlSAXHandlerPtr oldsax = NULL;
9891
9892 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9893 if (ctxt == NULL) return -1;
9894 if (sax != NULL) {
9895 oldsax = ctxt->sax;
9896 ctxt->sax = sax;
9897 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009898 if (user_data != NULL)
9899 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009900
9901 xmlParseDocument(ctxt);
9902
9903 if (ctxt->wellFormed)
9904 ret = 0;
9905 else {
9906 if (ctxt->errNo != 0)
9907 ret = ctxt->errNo;
9908 else
9909 ret = -1;
9910 }
9911 if (sax != NULL) {
9912 ctxt->sax = oldsax;
9913 }
9914 xmlFreeParserCtxt(ctxt);
9915
9916 return ret;
9917}
9918
9919/**
9920 * xmlCreateDocParserCtxt:
9921 * @cur: a pointer to an array of xmlChar
9922 *
9923 * Creates a parser context for an XML in-memory document.
9924 *
9925 * Returns the new parser context or NULL
9926 */
9927xmlParserCtxtPtr
9928xmlCreateDocParserCtxt(xmlChar *cur) {
9929 int len;
9930
9931 if (cur == NULL)
9932 return(NULL);
9933 len = xmlStrlen(cur);
9934 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9935}
9936
9937/**
9938 * xmlSAXParseDoc:
9939 * @sax: the SAX handler block
9940 * @cur: a pointer to an array of xmlChar
9941 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9942 * documents
9943 *
9944 * parse an XML in-memory document and build a tree.
9945 * It use the given SAX function block to handle the parsing callback.
9946 * If sax is NULL, fallback to the default DOM tree building routines.
9947 *
9948 * Returns the resulting document tree
9949 */
9950
9951xmlDocPtr
9952xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9953 xmlDocPtr ret;
9954 xmlParserCtxtPtr ctxt;
9955
9956 if (cur == NULL) return(NULL);
9957
9958
9959 ctxt = xmlCreateDocParserCtxt(cur);
9960 if (ctxt == NULL) return(NULL);
9961 if (sax != NULL) {
9962 ctxt->sax = sax;
9963 ctxt->userData = NULL;
9964 }
9965
9966 xmlParseDocument(ctxt);
9967 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9968 else {
9969 ret = NULL;
9970 xmlFreeDoc(ctxt->myDoc);
9971 ctxt->myDoc = NULL;
9972 }
9973 if (sax != NULL)
9974 ctxt->sax = NULL;
9975 xmlFreeParserCtxt(ctxt);
9976
9977 return(ret);
9978}
9979
9980/**
9981 * xmlParseDoc:
9982 * @cur: a pointer to an array of xmlChar
9983 *
9984 * parse an XML in-memory document and build a tree.
9985 *
9986 * Returns the resulting document tree
9987 */
9988
9989xmlDocPtr
9990xmlParseDoc(xmlChar *cur) {
9991 return(xmlSAXParseDoc(NULL, cur, 0));
9992}
9993
9994
9995/************************************************************************
9996 * *
9997 * Miscellaneous *
9998 * *
9999 ************************************************************************/
10000
10001#ifdef LIBXML_XPATH_ENABLED
10002#include <libxml/xpath.h>
10003#endif
10004
10005static int xmlParserInitialized = 0;
10006
10007/**
10008 * xmlInitParser:
10009 *
10010 * Initialization function for the XML parser.
10011 * This is not reentrant. Call once before processing in case of
10012 * use in multithreaded programs.
10013 */
10014
10015void
10016xmlInitParser(void) {
10017 if (xmlParserInitialized) return;
10018
10019 xmlInitCharEncodingHandlers();
10020 xmlInitializePredefinedEntities();
10021 xmlDefaultSAXHandlerInit();
10022 xmlRegisterDefaultInputCallbacks();
10023 xmlRegisterDefaultOutputCallbacks();
10024#ifdef LIBXML_HTML_ENABLED
10025 htmlInitAutoClose();
10026 htmlDefaultSAXHandlerInit();
10027#endif
10028#ifdef LIBXML_XPATH_ENABLED
10029 xmlXPathInit();
10030#endif
10031 xmlParserInitialized = 1;
10032}
10033
10034/**
10035 * xmlCleanupParser:
10036 *
10037 * Cleanup function for the XML parser. It tries to reclaim all
10038 * parsing related global memory allocated for the parser processing.
10039 * It doesn't deallocate any document related memory. Calling this
10040 * function should not prevent reusing the parser.
10041 */
10042
10043void
10044xmlCleanupParser(void) {
10045 xmlParserInitialized = 0;
10046 xmlCleanupCharEncodingHandlers();
10047 xmlCleanupPredefinedEntities();
10048}
10049
10050/**
10051 * xmlPedanticParserDefault:
10052 * @val: int 0 or 1
10053 *
10054 * Set and return the previous value for enabling pedantic warnings.
10055 *
10056 * Returns the last value for 0 for no substitution, 1 for substitution.
10057 */
10058
10059int
10060xmlPedanticParserDefault(int val) {
10061 int old = xmlPedanticParserDefaultValue;
10062
10063 xmlPedanticParserDefaultValue = val;
10064 return(old);
10065}
10066
10067/**
10068 * xmlSubstituteEntitiesDefault:
10069 * @val: int 0 or 1
10070 *
10071 * Set and return the previous value for default entity support.
10072 * Initially the parser always keep entity references instead of substituting
10073 * entity values in the output. This function has to be used to change the
10074 * default parser behaviour
10075 * SAX::subtituteEntities() has to be used for changing that on a file by
10076 * file basis.
10077 *
10078 * Returns the last value for 0 for no substitution, 1 for substitution.
10079 */
10080
10081int
10082xmlSubstituteEntitiesDefault(int val) {
10083 int old = xmlSubstituteEntitiesDefaultValue;
10084
10085 xmlSubstituteEntitiesDefaultValue = val;
10086 return(old);
10087}
10088
10089/**
10090 * xmlKeepBlanksDefault:
10091 * @val: int 0 or 1
10092 *
10093 * Set and return the previous value for default blanks text nodes support.
10094 * The 1.x version of the parser used an heuristic to try to detect
10095 * ignorable white spaces. As a result the SAX callback was generating
10096 * ignorableWhitespace() callbacks instead of characters() one, and when
10097 * using the DOM output text nodes containing those blanks were not generated.
10098 * The 2.x and later version will switch to the XML standard way and
10099 * ignorableWhitespace() are only generated when running the parser in
10100 * validating mode and when the current element doesn't allow CDATA or
10101 * mixed content.
10102 * This function is provided as a way to force the standard behaviour
10103 * on 1.X libs and to switch back to the old mode for compatibility when
10104 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
10105 * by using xmlIsBlankNode() commodity function to detect the "empty"
10106 * nodes generated.
10107 * This value also affect autogeneration of indentation when saving code
10108 * if blanks sections are kept, indentation is not generated.
10109 *
10110 * Returns the last value for 0 for no substitution, 1 for substitution.
10111 */
10112
10113int
10114xmlKeepBlanksDefault(int val) {
10115 int old = xmlKeepBlanksDefaultValue;
10116
10117 xmlKeepBlanksDefaultValue = val;
10118 xmlIndentTreeOutput = !val;
10119 return(old);
10120}
10121