blob: 60d0e227ca9793b0a1bef077fbb4f3359db85504 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
83int xmlGetWarningsDefaultValue = 1;
84int xmlParserDebugEntities = 0;
85#ifdef VMS
86int xmlSubstituteEntitiesDefaultVal = 0;
87#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
88int xmlDoValidityCheckingDefaultVal = 0;
89#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
90#else
91int xmlSubstituteEntitiesDefaultValue = 0;
92int xmlDoValidityCheckingDefaultValue = 0;
93#endif
94int xmlLoadExtDtdDefaultValue = 0;
95int xmlPedanticParserDefaultValue = 0;
96int xmlKeepBlanksDefaultValue = 1;
97
98/*
99 * List of XML prefixed PI allowed by W3C specs
100 */
101
102const char *xmlW3CPIs[] = {
103 "xml-stylesheet",
104 NULL
105};
106
107/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
108void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110 const xmlChar **str);
111
112
113/************************************************************************
114 * *
115 * Parser stacks related functions and macros *
116 * *
117 ************************************************************************/
118
119xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
120 const xmlChar ** str);
121
122/*
123 * Generic function for accessing stacks in the Parser Context
124 */
125
126#define PUSH_AND_POP(scope, type, name) \
127scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
128 if (ctxt->name##Nr >= ctxt->name##Max) { \
129 ctxt->name##Max *= 2; \
130 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
131 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
132 if (ctxt->name##Tab == NULL) { \
133 xmlGenericError(xmlGenericErrorContext, \
134 "realloc failed !\n"); \
135 return(0); \
136 } \
137 } \
138 ctxt->name##Tab[ctxt->name##Nr] = value; \
139 ctxt->name = value; \
140 return(ctxt->name##Nr++); \
141} \
142scope type name##Pop(xmlParserCtxtPtr ctxt) { \
143 type ret; \
144 if (ctxt->name##Nr <= 0) return(0); \
145 ctxt->name##Nr--; \
146 if (ctxt->name##Nr > 0) \
147 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
148 else \
149 ctxt->name = NULL; \
150 ret = ctxt->name##Tab[ctxt->name##Nr]; \
151 ctxt->name##Tab[ctxt->name##Nr] = 0; \
152 return(ret); \
153} \
154
155/*
156 * Those macros actually generate the functions
157 */
158PUSH_AND_POP(extern, xmlParserInputPtr, input)
159PUSH_AND_POP(extern, xmlNodePtr, node)
160PUSH_AND_POP(extern, xmlChar*, name)
161
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000162static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000163 if (ctxt->spaceNr >= ctxt->spaceMax) {
164 ctxt->spaceMax *= 2;
165 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
166 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
167 if (ctxt->spaceTab == NULL) {
168 xmlGenericError(xmlGenericErrorContext,
169 "realloc failed !\n");
170 return(0);
171 }
172 }
173 ctxt->spaceTab[ctxt->spaceNr] = val;
174 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
175 return(ctxt->spaceNr++);
176}
177
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000178static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000179 int ret;
180 if (ctxt->spaceNr <= 0) return(0);
181 ctxt->spaceNr--;
182 if (ctxt->spaceNr > 0)
183 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
184 else
185 ctxt->space = NULL;
186 ret = ctxt->spaceTab[ctxt->spaceNr];
187 ctxt->spaceTab[ctxt->spaceNr] = -1;
188 return(ret);
189}
190
191/*
192 * Macros for accessing the content. Those should be used only by the parser,
193 * and not exported.
194 *
195 * Dirty macros, i.e. one often need to make assumption on the context to
196 * use them
197 *
198 * CUR_PTR return the current pointer to the xmlChar to be parsed.
199 * To be used with extreme caution since operations consuming
200 * characters may move the input buffer to a different location !
201 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
202 * This should be used internally by the parser
203 * only to compare to ASCII values otherwise it would break when
204 * running with UTF-8 encoding.
205 * RAW same as CUR but in the input buffer, bypass any token
206 * extraction that may have been done
207 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
208 * to compare on ASCII based substring.
209 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
210 * strings within the parser.
211 *
212 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
213 *
214 * NEXT Skip to the next character, this does the proper decoding
215 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
216 * NEXTL(l) Skip l xmlChars in the input buffer
217 * CUR_CHAR(l) returns the current unicode character (int), set l
218 * to the number of xmlChars used for the encoding [0-5].
219 * CUR_SCHAR same but operate on a string instead of the context
220 * COPY_BUF copy the current unicode char to the target buffer, increment
221 * the index
222 * GROW, SHRINK handling of input buffers
223 */
224
225#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
226#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
227#define NXT(val) ctxt->input->cur[(val)]
228#define CUR_PTR ctxt->input->cur
229
230#define SKIP(val) do { \
231 ctxt->nbChars += (val),ctxt->input->cur += (val); \
232 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000233 if ((*ctxt->input->cur == 0) && \
234 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
235 xmlPopInput(ctxt); \
236 } while (0)
237
Daniel Veillard48b2f892001-02-25 16:11:03 +0000238#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000239 xmlParserInputShrink(ctxt->input); \
240 if ((*ctxt->input->cur == 0) && \
241 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
242 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000243 }
Owen Taylor3473f882001-02-23 17:55:21 +0000244
Daniel Veillard48b2f892001-02-25 16:11:03 +0000245#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000246 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
247 if ((*ctxt->input->cur == 0) && \
248 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
249 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000250 }
Owen Taylor3473f882001-02-23 17:55:21 +0000251
252#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
253
254#define NEXT xmlNextChar(ctxt)
255
Daniel Veillard21a0f912001-02-25 19:54:14 +0000256#define NEXT1 { \
257 ctxt->input->cur++; \
258 ctxt->nbChars++; \
259 if (*ctxt->input->cur == 0) \
260 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
261 }
262
Owen Taylor3473f882001-02-23 17:55:21 +0000263#define NEXTL(l) do { \
264 if (*(ctxt->input->cur) == '\n') { \
265 ctxt->input->line++; ctxt->input->col = 1; \
266 } else ctxt->input->col++; \
267 ctxt->token = 0; ctxt->input->cur += l; \
268 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000269 } while (0)
270
271#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
272#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
273
274#define COPY_BUF(l,b,i,v) \
275 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000276 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000277
278/**
279 * xmlSkipBlankChars:
280 * @ctxt: the XML parser context
281 *
282 * skip all blanks character found at that point in the input streams.
283 * It pops up finished entities in the process if allowable at that point.
284 *
285 * Returns the number of space chars skipped
286 */
287
288int
289xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
290 int cur, res = 0;
291
292 /*
293 * It's Okay to use CUR/NEXT here since all the blanks are on
294 * the ASCII range.
295 */
296 do {
297 cur = CUR;
298 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
299 NEXT;
300 cur = CUR;
301 res++;
302 }
303 while ((cur == 0) && (ctxt->inputNr > 1) &&
304 (ctxt->instate != XML_PARSER_COMMENT)) {
305 xmlPopInput(ctxt);
306 cur = CUR;
307 }
308 /*
309 * Need to handle support of entities branching here
310 */
311 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
312 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
313 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
314 return(res);
315}
316
317/************************************************************************
318 * *
319 * Commodity functions to handle entities *
320 * *
321 ************************************************************************/
322
323/**
324 * xmlPopInput:
325 * @ctxt: an XML parser context
326 *
327 * xmlPopInput: the current input pointed by ctxt->input came to an end
328 * pop it and return the next char.
329 *
330 * Returns the current xmlChar in the parser context
331 */
332xmlChar
333xmlPopInput(xmlParserCtxtPtr ctxt) {
334 if (ctxt->inputNr == 1) return(0); /* End of main Input */
335 if (xmlParserDebugEntities)
336 xmlGenericError(xmlGenericErrorContext,
337 "Popping input %d\n", ctxt->inputNr);
338 xmlFreeInputStream(inputPop(ctxt));
339 if ((*ctxt->input->cur == 0) &&
340 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
341 return(xmlPopInput(ctxt));
342 return(CUR);
343}
344
345/**
346 * xmlPushInput:
347 * @ctxt: an XML parser context
348 * @input: an XML parser input fragment (entity, XML fragment ...).
349 *
350 * xmlPushInput: switch to a new input stream which is stacked on top
351 * of the previous one(s).
352 */
353void
354xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
355 if (input == NULL) return;
356
357 if (xmlParserDebugEntities) {
358 if ((ctxt->input != NULL) && (ctxt->input->filename))
359 xmlGenericError(xmlGenericErrorContext,
360 "%s(%d): ", ctxt->input->filename,
361 ctxt->input->line);
362 xmlGenericError(xmlGenericErrorContext,
363 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
364 }
365 inputPush(ctxt, input);
366 GROW;
367}
368
369/**
370 * xmlParseCharRef:
371 * @ctxt: an XML parser context
372 *
373 * parse Reference declarations
374 *
375 * [66] CharRef ::= '&#' [0-9]+ ';' |
376 * '&#x' [0-9a-fA-F]+ ';'
377 *
378 * [ WFC: Legal Character ]
379 * Characters referred to using character references must match the
380 * production for Char.
381 *
382 * Returns the value parsed (as an int), 0 in case of error
383 */
384int
385xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000386 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000387 int count = 0;
388
389 if (ctxt->token != 0) {
390 val = ctxt->token;
391 ctxt->token = 0;
392 return(val);
393 }
394 /*
395 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
396 */
397 if ((RAW == '&') && (NXT(1) == '#') &&
398 (NXT(2) == 'x')) {
399 SKIP(3);
400 GROW;
401 while (RAW != ';') { /* loop blocked by count */
402 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
403 val = val * 16 + (CUR - '0');
404 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
405 val = val * 16 + (CUR - 'a') + 10;
406 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
407 val = val * 16 + (CUR - 'A') + 10;
408 else {
409 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
411 ctxt->sax->error(ctxt->userData,
412 "xmlParseCharRef: invalid hexadecimal value\n");
413 ctxt->wellFormed = 0;
414 ctxt->disableSAX = 1;
415 val = 0;
416 break;
417 }
418 NEXT;
419 count++;
420 }
421 if (RAW == ';') {
422 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
423 ctxt->nbChars ++;
424 ctxt->input->cur++;
425 }
426 } else if ((RAW == '&') && (NXT(1) == '#')) {
427 SKIP(2);
428 GROW;
429 while (RAW != ';') { /* loop blocked by count */
430 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
431 val = val * 10 + (CUR - '0');
432 else {
433 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
435 ctxt->sax->error(ctxt->userData,
436 "xmlParseCharRef: invalid decimal value\n");
437 ctxt->wellFormed = 0;
438 ctxt->disableSAX = 1;
439 val = 0;
440 break;
441 }
442 NEXT;
443 count++;
444 }
445 if (RAW == ';') {
446 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
447 ctxt->nbChars ++;
448 ctxt->input->cur++;
449 }
450 } else {
451 ctxt->errNo = XML_ERR_INVALID_CHARREF;
452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
453 ctxt->sax->error(ctxt->userData,
454 "xmlParseCharRef: invalid value\n");
455 ctxt->wellFormed = 0;
456 ctxt->disableSAX = 1;
457 }
458
459 /*
460 * [ WFC: Legal Character ]
461 * Characters referred to using character references must match the
462 * production for Char.
463 */
464 if (IS_CHAR(val)) {
465 return(val);
466 } else {
467 ctxt->errNo = XML_ERR_INVALID_CHAR;
468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
469 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
470 val);
471 ctxt->wellFormed = 0;
472 ctxt->disableSAX = 1;
473 }
474 return(0);
475}
476
477/**
478 * xmlParseStringCharRef:
479 * @ctxt: an XML parser context
480 * @str: a pointer to an index in the string
481 *
482 * parse Reference declarations, variant parsing from a string rather
483 * than an an input flow.
484 *
485 * [66] CharRef ::= '&#' [0-9]+ ';' |
486 * '&#x' [0-9a-fA-F]+ ';'
487 *
488 * [ WFC: Legal Character ]
489 * Characters referred to using character references must match the
490 * production for Char.
491 *
492 * Returns the value parsed (as an int), 0 in case of error, str will be
493 * updated to the current value of the index
494 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000495static int
Owen Taylor3473f882001-02-23 17:55:21 +0000496xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
497 const xmlChar *ptr;
498 xmlChar cur;
499 int val = 0;
500
501 if ((str == NULL) || (*str == NULL)) return(0);
502 ptr = *str;
503 cur = *ptr;
504 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
505 ptr += 3;
506 cur = *ptr;
507 while (cur != ';') { /* Non input consuming loop */
508 if ((cur >= '0') && (cur <= '9'))
509 val = val * 16 + (cur - '0');
510 else if ((cur >= 'a') && (cur <= 'f'))
511 val = val * 16 + (cur - 'a') + 10;
512 else if ((cur >= 'A') && (cur <= 'F'))
513 val = val * 16 + (cur - 'A') + 10;
514 else {
515 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
517 ctxt->sax->error(ctxt->userData,
518 "xmlParseStringCharRef: invalid hexadecimal value\n");
519 ctxt->wellFormed = 0;
520 ctxt->disableSAX = 1;
521 val = 0;
522 break;
523 }
524 ptr++;
525 cur = *ptr;
526 }
527 if (cur == ';')
528 ptr++;
529 } else if ((cur == '&') && (ptr[1] == '#')){
530 ptr += 2;
531 cur = *ptr;
532 while (cur != ';') { /* Non input consuming loops */
533 if ((cur >= '0') && (cur <= '9'))
534 val = val * 10 + (cur - '0');
535 else {
536 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
538 ctxt->sax->error(ctxt->userData,
539 "xmlParseStringCharRef: invalid decimal value\n");
540 ctxt->wellFormed = 0;
541 ctxt->disableSAX = 1;
542 val = 0;
543 break;
544 }
545 ptr++;
546 cur = *ptr;
547 }
548 if (cur == ';')
549 ptr++;
550 } else {
551 ctxt->errNo = XML_ERR_INVALID_CHARREF;
552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
553 ctxt->sax->error(ctxt->userData,
554 "xmlParseCharRef: invalid value\n");
555 ctxt->wellFormed = 0;
556 ctxt->disableSAX = 1;
557 return(0);
558 }
559 *str = ptr;
560
561 /*
562 * [ WFC: Legal Character ]
563 * Characters referred to using character references must match the
564 * production for Char.
565 */
566 if (IS_CHAR(val)) {
567 return(val);
568 } else {
569 ctxt->errNo = XML_ERR_INVALID_CHAR;
570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
571 ctxt->sax->error(ctxt->userData,
572 "CharRef: invalid xmlChar value %d\n", val);
573 ctxt->wellFormed = 0;
574 ctxt->disableSAX = 1;
575 }
576 return(0);
577}
578
579/**
580 * xmlParserHandlePEReference:
581 * @ctxt: the parser context
582 *
583 * [69] PEReference ::= '%' Name ';'
584 *
585 * [ WFC: No Recursion ]
586 * A parsed entity must not contain a recursive
587 * reference to itself, either directly or indirectly.
588 *
589 * [ WFC: Entity Declared ]
590 * In a document without any DTD, a document with only an internal DTD
591 * subset which contains no parameter entity references, or a document
592 * with "standalone='yes'", ... ... The declaration of a parameter
593 * entity must precede any reference to it...
594 *
595 * [ VC: Entity Declared ]
596 * In a document with an external subset or external parameter entities
597 * with "standalone='no'", ... ... The declaration of a parameter entity
598 * must precede any reference to it...
599 *
600 * [ WFC: In DTD ]
601 * Parameter-entity references may only appear in the DTD.
602 * NOTE: misleading but this is handled.
603 *
604 * A PEReference may have been detected in the current input stream
605 * the handling is done accordingly to
606 * http://www.w3.org/TR/REC-xml#entproc
607 * i.e.
608 * - Included in literal in entity values
609 * - Included as Paraemeter Entity reference within DTDs
610 */
611void
612xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
613 xmlChar *name;
614 xmlEntityPtr entity = NULL;
615 xmlParserInputPtr input;
616
617 if (ctxt->token != 0) {
618 return;
619 }
620 if (RAW != '%') return;
621 switch(ctxt->instate) {
622 case XML_PARSER_CDATA_SECTION:
623 return;
624 case XML_PARSER_COMMENT:
625 return;
626 case XML_PARSER_START_TAG:
627 return;
628 case XML_PARSER_END_TAG:
629 return;
630 case XML_PARSER_EOF:
631 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
633 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
634 ctxt->wellFormed = 0;
635 ctxt->disableSAX = 1;
636 return;
637 case XML_PARSER_PROLOG:
638 case XML_PARSER_START:
639 case XML_PARSER_MISC:
640 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
642 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
643 ctxt->wellFormed = 0;
644 ctxt->disableSAX = 1;
645 return;
646 case XML_PARSER_ENTITY_DECL:
647 case XML_PARSER_CONTENT:
648 case XML_PARSER_ATTRIBUTE_VALUE:
649 case XML_PARSER_PI:
650 case XML_PARSER_SYSTEM_LITERAL:
651 /* we just ignore it there */
652 return;
653 case XML_PARSER_EPILOG:
654 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
656 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
657 ctxt->wellFormed = 0;
658 ctxt->disableSAX = 1;
659 return;
660 case XML_PARSER_ENTITY_VALUE:
661 /*
662 * NOTE: in the case of entity values, we don't do the
663 * substitution here since we need the literal
664 * entity value to be able to save the internal
665 * subset of the document.
666 * This will be handled by xmlStringDecodeEntities
667 */
668 return;
669 case XML_PARSER_DTD:
670 /*
671 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
672 * In the internal DTD subset, parameter-entity references
673 * can occur only where markup declarations can occur, not
674 * within markup declarations.
675 * In that case this is handled in xmlParseMarkupDecl
676 */
677 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
678 return;
679 break;
680 case XML_PARSER_IGNORE:
681 return;
682 }
683
684 NEXT;
685 name = xmlParseName(ctxt);
686 if (xmlParserDebugEntities)
687 xmlGenericError(xmlGenericErrorContext,
688 "PE Reference: %s\n", name);
689 if (name == NULL) {
690 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
692 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
693 ctxt->wellFormed = 0;
694 ctxt->disableSAX = 1;
695 } else {
696 if (RAW == ';') {
697 NEXT;
698 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
699 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
700 if (entity == NULL) {
701
702 /*
703 * [ WFC: Entity Declared ]
704 * In a document without any DTD, a document with only an
705 * internal DTD subset which contains no parameter entity
706 * references, or a document with "standalone='yes'", ...
707 * ... The declaration of a parameter entity must precede
708 * any reference to it...
709 */
710 if ((ctxt->standalone == 1) ||
711 ((ctxt->hasExternalSubset == 0) &&
712 (ctxt->hasPErefs == 0))) {
713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
714 ctxt->sax->error(ctxt->userData,
715 "PEReference: %%%s; not found\n", name);
716 ctxt->wellFormed = 0;
717 ctxt->disableSAX = 1;
718 } else {
719 /*
720 * [ VC: Entity Declared ]
721 * In a document with an external subset or external
722 * parameter entities with "standalone='no'", ...
723 * ... The declaration of a parameter entity must precede
724 * any reference to it...
725 */
726 if ((!ctxt->disableSAX) &&
727 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
728 ctxt->vctxt.error(ctxt->vctxt.userData,
729 "PEReference: %%%s; not found\n", name);
730 } else if ((!ctxt->disableSAX) &&
731 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
732 ctxt->sax->warning(ctxt->userData,
733 "PEReference: %%%s; not found\n", name);
734 ctxt->valid = 0;
735 }
736 } else {
737 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
738 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
739 /*
740 * handle the extra spaces added before and after
741 * c.f. http://www.w3.org/TR/REC-xml#as-PE
742 * this is done independantly.
743 */
744 input = xmlNewEntityInputStream(ctxt, entity);
745 xmlPushInput(ctxt, input);
746 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
747 (RAW == '<') && (NXT(1) == '?') &&
748 (NXT(2) == 'x') && (NXT(3) == 'm') &&
749 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
750 xmlParseTextDecl(ctxt);
751 }
752 if (ctxt->token == 0)
753 ctxt->token = ' ';
754 } else {
755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
756 ctxt->sax->error(ctxt->userData,
757 "xmlHandlePEReference: %s is not a parameter entity\n",
758 name);
759 ctxt->wellFormed = 0;
760 ctxt->disableSAX = 1;
761 }
762 }
763 } else {
764 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
766 ctxt->sax->error(ctxt->userData,
767 "xmlHandlePEReference: expecting ';'\n");
768 ctxt->wellFormed = 0;
769 ctxt->disableSAX = 1;
770 }
771 xmlFree(name);
772 }
773}
774
775/*
776 * Macro used to grow the current buffer.
777 */
778#define growBuffer(buffer) { \
779 buffer##_size *= 2; \
780 buffer = (xmlChar *) \
781 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
782 if (buffer == NULL) { \
783 perror("realloc failed"); \
784 return(NULL); \
785 } \
786}
787
788/**
789 * xmlStringDecodeEntities:
790 * @ctxt: the parser context
791 * @str: the input string
792 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
793 * @end: an end marker xmlChar, 0 if none
794 * @end2: an end marker xmlChar, 0 if none
795 * @end3: an end marker xmlChar, 0 if none
796 *
797 * Takes a entity string content and process to do the adequate subtitutions.
798 *
799 * [67] Reference ::= EntityRef | CharRef
800 *
801 * [69] PEReference ::= '%' Name ';'
802 *
803 * Returns A newly allocated string with the substitution done. The caller
804 * must deallocate it !
805 */
806xmlChar *
807xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
808 xmlChar end, xmlChar end2, xmlChar end3) {
809 xmlChar *buffer = NULL;
810 int buffer_size = 0;
811
812 xmlChar *current = NULL;
813 xmlEntityPtr ent;
814 int c,l;
815 int nbchars = 0;
816
817 if (str == NULL)
818 return(NULL);
819
820 if (ctxt->depth > 40) {
821 ctxt->errNo = XML_ERR_ENTITY_LOOP;
822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
823 ctxt->sax->error(ctxt->userData,
824 "Detected entity reference loop\n");
825 ctxt->wellFormed = 0;
826 ctxt->disableSAX = 1;
827 return(NULL);
828 }
829
830 /*
831 * allocate a translation buffer.
832 */
833 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
834 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
835 if (buffer == NULL) {
836 perror("xmlDecodeEntities: malloc failed");
837 return(NULL);
838 }
839
840 /*
841 * Ok loop until we reach one of the ending char or a size limit.
842 * we are operating on already parsed values.
843 */
844 c = CUR_SCHAR(str, l);
845 while ((c != 0) && (c != end) && /* non input consuming loop */
846 (c != end2) && (c != end3)) {
847
848 if (c == 0) break;
849 if ((c == '&') && (str[1] == '#')) {
850 int val = xmlParseStringCharRef(ctxt, &str);
851 if (val != 0) {
852 COPY_BUF(0,buffer,nbchars,val);
853 }
854 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
855 if (xmlParserDebugEntities)
856 xmlGenericError(xmlGenericErrorContext,
857 "String decoding Entity Reference: %.30s\n",
858 str);
859 ent = xmlParseStringEntityRef(ctxt, &str);
860 if ((ent != NULL) &&
861 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
862 if (ent->content != NULL) {
863 COPY_BUF(0,buffer,nbchars,ent->content[0]);
864 } else {
865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
866 ctxt->sax->error(ctxt->userData,
867 "internal error entity has no content\n");
868 }
869 } else if ((ent != NULL) && (ent->content != NULL)) {
870 xmlChar *rep;
871
872 ctxt->depth++;
873 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
874 0, 0, 0);
875 ctxt->depth--;
876 if (rep != NULL) {
877 current = rep;
878 while (*current != 0) { /* non input consuming loop */
879 buffer[nbchars++] = *current++;
880 if (nbchars >
881 buffer_size - XML_PARSER_BUFFER_SIZE) {
882 growBuffer(buffer);
883 }
884 }
885 xmlFree(rep);
886 }
887 } else if (ent != NULL) {
888 int i = xmlStrlen(ent->name);
889 const xmlChar *cur = ent->name;
890
891 buffer[nbchars++] = '&';
892 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
893 growBuffer(buffer);
894 }
895 for (;i > 0;i--)
896 buffer[nbchars++] = *cur++;
897 buffer[nbchars++] = ';';
898 }
899 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
900 if (xmlParserDebugEntities)
901 xmlGenericError(xmlGenericErrorContext,
902 "String decoding PE Reference: %.30s\n", str);
903 ent = xmlParseStringPEReference(ctxt, &str);
904 if (ent != NULL) {
905 xmlChar *rep;
906
907 ctxt->depth++;
908 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
909 0, 0, 0);
910 ctxt->depth--;
911 if (rep != NULL) {
912 current = rep;
913 while (*current != 0) { /* non input consuming loop */
914 buffer[nbchars++] = *current++;
915 if (nbchars >
916 buffer_size - XML_PARSER_BUFFER_SIZE) {
917 growBuffer(buffer);
918 }
919 }
920 xmlFree(rep);
921 }
922 }
923 } else {
924 COPY_BUF(l,buffer,nbchars,c);
925 str += l;
926 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
927 growBuffer(buffer);
928 }
929 }
930 c = CUR_SCHAR(str, l);
931 }
932 buffer[nbchars++] = 0;
933 return(buffer);
934}
935
936
937/************************************************************************
938 * *
939 * Commodity functions to handle xmlChars *
940 * *
941 ************************************************************************/
942
943/**
944 * xmlStrndup:
945 * @cur: the input xmlChar *
946 * @len: the len of @cur
947 *
948 * a strndup for array of xmlChar's
949 *
950 * Returns a new xmlChar * or NULL
951 */
952xmlChar *
953xmlStrndup(const xmlChar *cur, int len) {
954 xmlChar *ret;
955
956 if ((cur == NULL) || (len < 0)) return(NULL);
957 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
958 if (ret == NULL) {
959 xmlGenericError(xmlGenericErrorContext,
960 "malloc of %ld byte failed\n",
961 (len + 1) * (long)sizeof(xmlChar));
962 return(NULL);
963 }
964 memcpy(ret, cur, len * sizeof(xmlChar));
965 ret[len] = 0;
966 return(ret);
967}
968
969/**
970 * xmlStrdup:
971 * @cur: the input xmlChar *
972 *
973 * a strdup for array of xmlChar's. Since they are supposed to be
974 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
975 * a termination mark of '0'.
976 *
977 * Returns a new xmlChar * or NULL
978 */
979xmlChar *
980xmlStrdup(const xmlChar *cur) {
981 const xmlChar *p = cur;
982
983 if (cur == NULL) return(NULL);
984 while (*p != 0) p++; /* non input consuming */
985 return(xmlStrndup(cur, p - cur));
986}
987
988/**
989 * xmlCharStrndup:
990 * @cur: the input char *
991 * @len: the len of @cur
992 *
993 * a strndup for char's to xmlChar's
994 *
995 * Returns a new xmlChar * or NULL
996 */
997
998xmlChar *
999xmlCharStrndup(const char *cur, int len) {
1000 int i;
1001 xmlChar *ret;
1002
1003 if ((cur == NULL) || (len < 0)) return(NULL);
1004 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1005 if (ret == NULL) {
1006 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1007 (len + 1) * (long)sizeof(xmlChar));
1008 return(NULL);
1009 }
1010 for (i = 0;i < len;i++)
1011 ret[i] = (xmlChar) cur[i];
1012 ret[len] = 0;
1013 return(ret);
1014}
1015
1016/**
1017 * xmlCharStrdup:
1018 * @cur: the input char *
1019 * @len: the len of @cur
1020 *
1021 * a strdup for char's to xmlChar's
1022 *
1023 * Returns a new xmlChar * or NULL
1024 */
1025
1026xmlChar *
1027xmlCharStrdup(const char *cur) {
1028 const char *p = cur;
1029
1030 if (cur == NULL) return(NULL);
1031 while (*p != '\0') p++; /* non input consuming */
1032 return(xmlCharStrndup(cur, p - cur));
1033}
1034
1035/**
1036 * xmlStrcmp:
1037 * @str1: the first xmlChar *
1038 * @str2: the second xmlChar *
1039 *
1040 * a strcmp for xmlChar's
1041 *
1042 * Returns the integer result of the comparison
1043 */
1044
1045int
1046xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1047 register int tmp;
1048
1049 if (str1 == str2) return(0);
1050 if (str1 == NULL) return(-1);
1051 if (str2 == NULL) return(1);
1052 do {
1053 tmp = *str1++ - *str2;
1054 if (tmp != 0) return(tmp);
1055 } while (*str2++ != 0);
1056 return 0;
1057}
1058
1059/**
1060 * xmlStrEqual:
1061 * @str1: the first xmlChar *
1062 * @str2: the second xmlChar *
1063 *
1064 * Check if both string are equal of have same content
1065 * Should be a bit more readable and faster than xmlStrEqual()
1066 *
1067 * Returns 1 if they are equal, 0 if they are different
1068 */
1069
1070int
1071xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1072 if (str1 == str2) return(1);
1073 if (str1 == NULL) return(0);
1074 if (str2 == NULL) return(0);
1075 do {
1076 if (*str1++ != *str2) return(0);
1077 } while (*str2++);
1078 return(1);
1079}
1080
1081/**
1082 * xmlStrncmp:
1083 * @str1: the first xmlChar *
1084 * @str2: the second xmlChar *
1085 * @len: the max comparison length
1086 *
1087 * a strncmp for xmlChar's
1088 *
1089 * Returns the integer result of the comparison
1090 */
1091
1092int
1093xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1094 register int tmp;
1095
1096 if (len <= 0) return(0);
1097 if (str1 == str2) return(0);
1098 if (str1 == NULL) return(-1);
1099 if (str2 == NULL) return(1);
1100 do {
1101 tmp = *str1++ - *str2;
1102 if (tmp != 0 || --len == 0) return(tmp);
1103 } while (*str2++ != 0);
1104 return 0;
1105}
1106
1107static xmlChar casemap[256] = {
1108 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1109 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1110 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1111 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1112 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1113 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1114 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1115 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1116 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1117 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1118 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1119 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1120 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1121 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1122 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1123 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1124 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1125 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1126 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1127 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1128 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1129 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1130 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1131 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1132 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1133 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1134 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1135 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1136 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1137 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1138 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1139 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1140};
1141
1142/**
1143 * xmlStrcasecmp:
1144 * @str1: the first xmlChar *
1145 * @str2: the second xmlChar *
1146 *
1147 * a strcasecmp for xmlChar's
1148 *
1149 * Returns the integer result of the comparison
1150 */
1151
1152int
1153xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1154 register int tmp;
1155
1156 if (str1 == str2) return(0);
1157 if (str1 == NULL) return(-1);
1158 if (str2 == NULL) return(1);
1159 do {
1160 tmp = casemap[*str1++] - casemap[*str2];
1161 if (tmp != 0) return(tmp);
1162 } while (*str2++ != 0);
1163 return 0;
1164}
1165
1166/**
1167 * xmlStrncasecmp:
1168 * @str1: the first xmlChar *
1169 * @str2: the second xmlChar *
1170 * @len: the max comparison length
1171 *
1172 * a strncasecmp for xmlChar's
1173 *
1174 * Returns the integer result of the comparison
1175 */
1176
1177int
1178xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1179 register int tmp;
1180
1181 if (len <= 0) return(0);
1182 if (str1 == str2) return(0);
1183 if (str1 == NULL) return(-1);
1184 if (str2 == NULL) return(1);
1185 do {
1186 tmp = casemap[*str1++] - casemap[*str2];
1187 if (tmp != 0 || --len == 0) return(tmp);
1188 } while (*str2++ != 0);
1189 return 0;
1190}
1191
1192/**
1193 * xmlStrchr:
1194 * @str: the xmlChar * array
1195 * @val: the xmlChar to search
1196 *
1197 * a strchr for xmlChar's
1198 *
1199 * Returns the xmlChar * for the first occurence or NULL.
1200 */
1201
1202const xmlChar *
1203xmlStrchr(const xmlChar *str, xmlChar val) {
1204 if (str == NULL) return(NULL);
1205 while (*str != 0) { /* non input consuming */
1206 if (*str == val) return((xmlChar *) str);
1207 str++;
1208 }
1209 return(NULL);
1210}
1211
1212/**
1213 * xmlStrstr:
1214 * @str: the xmlChar * array (haystack)
1215 * @val: the xmlChar to search (needle)
1216 *
1217 * a strstr for xmlChar's
1218 *
1219 * Returns the xmlChar * for the first occurence or NULL.
1220 */
1221
1222const xmlChar *
1223xmlStrstr(const xmlChar *str, xmlChar *val) {
1224 int n;
1225
1226 if (str == NULL) return(NULL);
1227 if (val == NULL) return(NULL);
1228 n = xmlStrlen(val);
1229
1230 if (n == 0) return(str);
1231 while (*str != 0) { /* non input consuming */
1232 if (*str == *val) {
1233 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1234 }
1235 str++;
1236 }
1237 return(NULL);
1238}
1239
1240/**
1241 * xmlStrcasestr:
1242 * @str: the xmlChar * array (haystack)
1243 * @val: the xmlChar to search (needle)
1244 *
1245 * a case-ignoring strstr for xmlChar's
1246 *
1247 * Returns the xmlChar * for the first occurence or NULL.
1248 */
1249
1250const xmlChar *
1251xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1252 int n;
1253
1254 if (str == NULL) return(NULL);
1255 if (val == NULL) return(NULL);
1256 n = xmlStrlen(val);
1257
1258 if (n == 0) return(str);
1259 while (*str != 0) { /* non input consuming */
1260 if (casemap[*str] == casemap[*val])
1261 if (!xmlStrncasecmp(str, val, n)) return(str);
1262 str++;
1263 }
1264 return(NULL);
1265}
1266
1267/**
1268 * xmlStrsub:
1269 * @str: the xmlChar * array (haystack)
1270 * @start: the index of the first char (zero based)
1271 * @len: the length of the substring
1272 *
1273 * Extract a substring of a given string
1274 *
1275 * Returns the xmlChar * for the first occurence or NULL.
1276 */
1277
1278xmlChar *
1279xmlStrsub(const xmlChar *str, int start, int len) {
1280 int i;
1281
1282 if (str == NULL) return(NULL);
1283 if (start < 0) return(NULL);
1284 if (len < 0) return(NULL);
1285
1286 for (i = 0;i < start;i++) {
1287 if (*str == 0) return(NULL);
1288 str++;
1289 }
1290 if (*str == 0) return(NULL);
1291 return(xmlStrndup(str, len));
1292}
1293
1294/**
1295 * xmlStrlen:
1296 * @str: the xmlChar * array
1297 *
1298 * length of a xmlChar's string
1299 *
1300 * Returns the number of xmlChar contained in the ARRAY.
1301 */
1302
1303int
1304xmlStrlen(const xmlChar *str) {
1305 int len = 0;
1306
1307 if (str == NULL) return(0);
1308 while (*str != 0) { /* non input consuming */
1309 str++;
1310 len++;
1311 }
1312 return(len);
1313}
1314
1315/**
1316 * xmlStrncat:
1317 * @cur: the original xmlChar * array
1318 * @add: the xmlChar * array added
1319 * @len: the length of @add
1320 *
1321 * a strncat for array of xmlChar's, it will extend cur with the len
1322 * first bytes of @add.
1323 *
1324 * Returns a new xmlChar *, the original @cur is reallocated if needed
1325 * and should not be freed
1326 */
1327
1328xmlChar *
1329xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1330 int size;
1331 xmlChar *ret;
1332
1333 if ((add == NULL) || (len == 0))
1334 return(cur);
1335 if (cur == NULL)
1336 return(xmlStrndup(add, len));
1337
1338 size = xmlStrlen(cur);
1339 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1340 if (ret == NULL) {
1341 xmlGenericError(xmlGenericErrorContext,
1342 "xmlStrncat: realloc of %ld byte failed\n",
1343 (size + len + 1) * (long)sizeof(xmlChar));
1344 return(cur);
1345 }
1346 memcpy(&ret[size], add, len * sizeof(xmlChar));
1347 ret[size + len] = 0;
1348 return(ret);
1349}
1350
1351/**
1352 * xmlStrcat:
1353 * @cur: the original xmlChar * array
1354 * @add: the xmlChar * array added
1355 *
1356 * a strcat for array of xmlChar's. Since they are supposed to be
1357 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1358 * a termination mark of '0'.
1359 *
1360 * Returns a new xmlChar * containing the concatenated string.
1361 */
1362xmlChar *
1363xmlStrcat(xmlChar *cur, const xmlChar *add) {
1364 const xmlChar *p = add;
1365
1366 if (add == NULL) return(cur);
1367 if (cur == NULL)
1368 return(xmlStrdup(add));
1369
1370 while (*p != 0) p++; /* non input consuming */
1371 return(xmlStrncat(cur, add, p - add));
1372}
1373
1374/************************************************************************
1375 * *
1376 * Commodity functions, cleanup needed ? *
1377 * *
1378 ************************************************************************/
1379
1380/**
1381 * areBlanks:
1382 * @ctxt: an XML parser context
1383 * @str: a xmlChar *
1384 * @len: the size of @str
1385 *
1386 * Is this a sequence of blank chars that one can ignore ?
1387 *
1388 * Returns 1 if ignorable 0 otherwise.
1389 */
1390
1391static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1392 int i, ret;
1393 xmlNodePtr lastChild;
1394
Daniel Veillard2f362242001-03-02 17:36:21 +00001395 if (ctxt->keepBlanks)
1396 return(0);
1397
Owen Taylor3473f882001-02-23 17:55:21 +00001398 /*
1399 * Check for xml:space value.
1400 */
1401 if (*(ctxt->space) == 1)
1402 return(0);
1403
1404 /*
1405 * Check that the string is made of blanks
1406 */
1407 for (i = 0;i < len;i++)
1408 if (!(IS_BLANK(str[i]))) return(0);
1409
1410 /*
1411 * Look if the element is mixed content in the Dtd if available
1412 */
1413 if (ctxt->myDoc != NULL) {
1414 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1415 if (ret == 0) return(1);
1416 if (ret == 1) return(0);
1417 }
1418
1419 /*
1420 * Otherwise, heuristic :-\
1421 */
Owen Taylor3473f882001-02-23 17:55:21 +00001422 if (RAW != '<') return(0);
1423 if (ctxt->node == NULL) return(0);
1424 if ((ctxt->node->children == NULL) &&
1425 (RAW == '<') && (NXT(1) == '/')) return(0);
1426
1427 lastChild = xmlGetLastChild(ctxt->node);
1428 if (lastChild == NULL) {
1429 if (ctxt->node->content != NULL) return(0);
1430 } else if (xmlNodeIsText(lastChild))
1431 return(0);
1432 else if ((ctxt->node->children != NULL) &&
1433 (xmlNodeIsText(ctxt->node->children)))
1434 return(0);
1435 return(1);
1436}
1437
1438/*
1439 * Forward definition for recusive behaviour.
1440 */
1441void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1442void xmlParseReference(xmlParserCtxtPtr ctxt);
1443
1444/************************************************************************
1445 * *
1446 * Extra stuff for namespace support *
1447 * Relates to http://www.w3.org/TR/WD-xml-names *
1448 * *
1449 ************************************************************************/
1450
1451/**
1452 * xmlSplitQName:
1453 * @ctxt: an XML parser context
1454 * @name: an XML parser context
1455 * @prefix: a xmlChar **
1456 *
1457 * parse an UTF8 encoded XML qualified name string
1458 *
1459 * [NS 5] QName ::= (Prefix ':')? LocalPart
1460 *
1461 * [NS 6] Prefix ::= NCName
1462 *
1463 * [NS 7] LocalPart ::= NCName
1464 *
1465 * Returns the local part, and prefix is updated
1466 * to get the Prefix if any.
1467 */
1468
1469xmlChar *
1470xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1471 xmlChar buf[XML_MAX_NAMELEN + 5];
1472 xmlChar *buffer = NULL;
1473 int len = 0;
1474 int max = XML_MAX_NAMELEN;
1475 xmlChar *ret = NULL;
1476 const xmlChar *cur = name;
1477 int c;
1478
1479 *prefix = NULL;
1480
1481#ifndef XML_XML_NAMESPACE
1482 /* xml: prefix is not really a namespace */
1483 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1484 (cur[2] == 'l') && (cur[3] == ':'))
1485 return(xmlStrdup(name));
1486#endif
1487
1488 /* nasty but valid */
1489 if (cur[0] == ':')
1490 return(xmlStrdup(name));
1491
1492 c = *cur++;
1493 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1494 buf[len++] = c;
1495 c = *cur++;
1496 }
1497 if (len >= max) {
1498 /*
1499 * Okay someone managed to make a huge name, so he's ready to pay
1500 * for the processing speed.
1501 */
1502 max = len * 2;
1503
1504 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1505 if (buffer == NULL) {
1506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1507 ctxt->sax->error(ctxt->userData,
1508 "xmlSplitQName: out of memory\n");
1509 return(NULL);
1510 }
1511 memcpy(buffer, buf, len);
1512 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1513 if (len + 10 > max) {
1514 max *= 2;
1515 buffer = (xmlChar *) xmlRealloc(buffer,
1516 max * sizeof(xmlChar));
1517 if (buffer == NULL) {
1518 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1519 ctxt->sax->error(ctxt->userData,
1520 "xmlSplitQName: out of memory\n");
1521 return(NULL);
1522 }
1523 }
1524 buffer[len++] = c;
1525 c = *cur++;
1526 }
1527 buffer[len] = 0;
1528 }
1529
1530 if (buffer == NULL)
1531 ret = xmlStrndup(buf, len);
1532 else {
1533 ret = buffer;
1534 buffer = NULL;
1535 max = XML_MAX_NAMELEN;
1536 }
1537
1538
1539 if (c == ':') {
1540 c = *cur++;
1541 if (c == 0) return(ret);
1542 *prefix = ret;
1543 len = 0;
1544
1545 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1546 buf[len++] = c;
1547 c = *cur++;
1548 }
1549 if (len >= max) {
1550 /*
1551 * Okay someone managed to make a huge name, so he's ready to pay
1552 * for the processing speed.
1553 */
1554 max = len * 2;
1555
1556 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1557 if (buffer == NULL) {
1558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1559 ctxt->sax->error(ctxt->userData,
1560 "xmlSplitQName: out of memory\n");
1561 return(NULL);
1562 }
1563 memcpy(buffer, buf, len);
1564 while (c != 0) { /* tested bigname2.xml */
1565 if (len + 10 > max) {
1566 max *= 2;
1567 buffer = (xmlChar *) xmlRealloc(buffer,
1568 max * sizeof(xmlChar));
1569 if (buffer == NULL) {
1570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1571 ctxt->sax->error(ctxt->userData,
1572 "xmlSplitQName: out of memory\n");
1573 return(NULL);
1574 }
1575 }
1576 buffer[len++] = c;
1577 c = *cur++;
1578 }
1579 buffer[len] = 0;
1580 }
1581
1582 if (buffer == NULL)
1583 ret = xmlStrndup(buf, len);
1584 else {
1585 ret = buffer;
1586 }
1587 }
1588
1589 return(ret);
1590}
1591
1592/************************************************************************
1593 * *
1594 * The parser itself *
1595 * Relates to http://www.w3.org/TR/REC-xml *
1596 * *
1597 ************************************************************************/
1598
Daniel Veillard21a0f912001-02-25 19:54:14 +00001599xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001600/**
1601 * xmlParseName:
1602 * @ctxt: an XML parser context
1603 *
1604 * parse an XML name.
1605 *
1606 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1607 * CombiningChar | Extender
1608 *
1609 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1610 *
1611 * [6] Names ::= Name (S Name)*
1612 *
1613 * Returns the Name parsed or NULL
1614 */
1615
1616xmlChar *
1617xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001618 const xmlChar *in;
1619 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001620 int count = 0;
1621
1622 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001623
1624 /*
1625 * Accelerator for simple ASCII names
1626 */
1627 in = ctxt->input->cur;
1628 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1629 ((*in >= 0x41) && (*in <= 0x5A)) ||
1630 (*in == '_') || (*in == ':')) {
1631 in++;
1632 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1633 ((*in >= 0x41) && (*in <= 0x5A)) ||
1634 ((*in >= 0x30) && (*in <= 0x39)) ||
1635 (*in == '_') || (*in == ':'))
1636 in++;
1637 if ((*in == ' ') || (*in == '>') || (*in == '/')) {
1638 count = in - ctxt->input->cur;
1639 ret = xmlStrndup(ctxt->input->cur, count);
1640 ctxt->input->cur = in;
1641 return(ret);
1642 }
1643 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001644 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001645}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001646
Daniel Veillard21a0f912001-02-25 19:54:14 +00001647xmlChar *
1648xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1649 xmlChar buf[XML_MAX_NAMELEN + 5];
1650 int len = 0, l;
1651 int c;
1652 int count = 0;
1653
1654 /*
1655 * Handler for more complex cases
1656 */
1657 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001658 c = CUR_CHAR(l);
1659 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1660 (!IS_LETTER(c) && (c != '_') &&
1661 (c != ':'))) {
1662 return(NULL);
1663 }
1664
1665 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1666 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1667 (c == '.') || (c == '-') ||
1668 (c == '_') || (c == ':') ||
1669 (IS_COMBINING(c)) ||
1670 (IS_EXTENDER(c)))) {
1671 if (count++ > 100) {
1672 count = 0;
1673 GROW;
1674 }
1675 COPY_BUF(l,buf,len,c);
1676 NEXTL(l);
1677 c = CUR_CHAR(l);
1678 if (len >= XML_MAX_NAMELEN) {
1679 /*
1680 * Okay someone managed to make a huge name, so he's ready to pay
1681 * for the processing speed.
1682 */
1683 xmlChar *buffer;
1684 int max = len * 2;
1685
1686 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1687 if (buffer == NULL) {
1688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1689 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001690 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001691 return(NULL);
1692 }
1693 memcpy(buffer, buf, len);
1694 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1695 (c == '.') || (c == '-') ||
1696 (c == '_') || (c == ':') ||
1697 (IS_COMBINING(c)) ||
1698 (IS_EXTENDER(c))) {
1699 if (count++ > 100) {
1700 count = 0;
1701 GROW;
1702 }
1703 if (len + 10 > max) {
1704 max *= 2;
1705 buffer = (xmlChar *) xmlRealloc(buffer,
1706 max * sizeof(xmlChar));
1707 if (buffer == NULL) {
1708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1709 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001710 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001711 return(NULL);
1712 }
1713 }
1714 COPY_BUF(l,buffer,len,c);
1715 NEXTL(l);
1716 c = CUR_CHAR(l);
1717 }
1718 buffer[len] = 0;
1719 return(buffer);
1720 }
1721 }
1722 return(xmlStrndup(buf, len));
1723}
1724
1725/**
1726 * xmlParseStringName:
1727 * @ctxt: an XML parser context
1728 * @str: a pointer to the string pointer (IN/OUT)
1729 *
1730 * parse an XML name.
1731 *
1732 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1733 * CombiningChar | Extender
1734 *
1735 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1736 *
1737 * [6] Names ::= Name (S Name)*
1738 *
1739 * Returns the Name parsed or NULL. The str pointer
1740 * is updated to the current location in the string.
1741 */
1742
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001743static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001744xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1745 xmlChar buf[XML_MAX_NAMELEN + 5];
1746 const xmlChar *cur = *str;
1747 int len = 0, l;
1748 int c;
1749
1750 c = CUR_SCHAR(cur, l);
1751 if (!IS_LETTER(c) && (c != '_') &&
1752 (c != ':')) {
1753 return(NULL);
1754 }
1755
1756 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1757 (c == '.') || (c == '-') ||
1758 (c == '_') || (c == ':') ||
1759 (IS_COMBINING(c)) ||
1760 (IS_EXTENDER(c))) {
1761 COPY_BUF(l,buf,len,c);
1762 cur += l;
1763 c = CUR_SCHAR(cur, l);
1764 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1765 /*
1766 * Okay someone managed to make a huge name, so he's ready to pay
1767 * for the processing speed.
1768 */
1769 xmlChar *buffer;
1770 int max = len * 2;
1771
1772 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1773 if (buffer == NULL) {
1774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1775 ctxt->sax->error(ctxt->userData,
1776 "xmlParseStringName: out of memory\n");
1777 return(NULL);
1778 }
1779 memcpy(buffer, buf, len);
1780 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1781 (c == '.') || (c == '-') ||
1782 (c == '_') || (c == ':') ||
1783 (IS_COMBINING(c)) ||
1784 (IS_EXTENDER(c))) {
1785 if (len + 10 > max) {
1786 max *= 2;
1787 buffer = (xmlChar *) xmlRealloc(buffer,
1788 max * sizeof(xmlChar));
1789 if (buffer == NULL) {
1790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1791 ctxt->sax->error(ctxt->userData,
1792 "xmlParseStringName: out of memory\n");
1793 return(NULL);
1794 }
1795 }
1796 COPY_BUF(l,buffer,len,c);
1797 cur += l;
1798 c = CUR_SCHAR(cur, l);
1799 }
1800 buffer[len] = 0;
1801 *str = cur;
1802 return(buffer);
1803 }
1804 }
1805 *str = cur;
1806 return(xmlStrndup(buf, len));
1807}
1808
1809/**
1810 * xmlParseNmtoken:
1811 * @ctxt: an XML parser context
1812 *
1813 * parse an XML Nmtoken.
1814 *
1815 * [7] Nmtoken ::= (NameChar)+
1816 *
1817 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1818 *
1819 * Returns the Nmtoken parsed or NULL
1820 */
1821
1822xmlChar *
1823xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1824 xmlChar buf[XML_MAX_NAMELEN + 5];
1825 int len = 0, l;
1826 int c;
1827 int count = 0;
1828
1829 GROW;
1830 c = CUR_CHAR(l);
1831
1832 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1833 (c == '.') || (c == '-') ||
1834 (c == '_') || (c == ':') ||
1835 (IS_COMBINING(c)) ||
1836 (IS_EXTENDER(c))) {
1837 if (count++ > 100) {
1838 count = 0;
1839 GROW;
1840 }
1841 COPY_BUF(l,buf,len,c);
1842 NEXTL(l);
1843 c = CUR_CHAR(l);
1844 if (len >= XML_MAX_NAMELEN) {
1845 /*
1846 * Okay someone managed to make a huge token, so he's ready to pay
1847 * for the processing speed.
1848 */
1849 xmlChar *buffer;
1850 int max = len * 2;
1851
1852 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1853 if (buffer == NULL) {
1854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1855 ctxt->sax->error(ctxt->userData,
1856 "xmlParseNmtoken: out of memory\n");
1857 return(NULL);
1858 }
1859 memcpy(buffer, buf, len);
1860 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1861 (c == '.') || (c == '-') ||
1862 (c == '_') || (c == ':') ||
1863 (IS_COMBINING(c)) ||
1864 (IS_EXTENDER(c))) {
1865 if (count++ > 100) {
1866 count = 0;
1867 GROW;
1868 }
1869 if (len + 10 > max) {
1870 max *= 2;
1871 buffer = (xmlChar *) xmlRealloc(buffer,
1872 max * sizeof(xmlChar));
1873 if (buffer == NULL) {
1874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1875 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001876 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001877 return(NULL);
1878 }
1879 }
1880 COPY_BUF(l,buffer,len,c);
1881 NEXTL(l);
1882 c = CUR_CHAR(l);
1883 }
1884 buffer[len] = 0;
1885 return(buffer);
1886 }
1887 }
1888 if (len == 0)
1889 return(NULL);
1890 return(xmlStrndup(buf, len));
1891}
1892
1893/**
1894 * xmlParseEntityValue:
1895 * @ctxt: an XML parser context
1896 * @orig: if non-NULL store a copy of the original entity value
1897 *
1898 * parse a value for ENTITY declarations
1899 *
1900 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1901 * "'" ([^%&'] | PEReference | Reference)* "'"
1902 *
1903 * Returns the EntityValue parsed with reference substitued or NULL
1904 */
1905
1906xmlChar *
1907xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1908 xmlChar *buf = NULL;
1909 int len = 0;
1910 int size = XML_PARSER_BUFFER_SIZE;
1911 int c, l;
1912 xmlChar stop;
1913 xmlChar *ret = NULL;
1914 const xmlChar *cur = NULL;
1915 xmlParserInputPtr input;
1916
1917 if (RAW == '"') stop = '"';
1918 else if (RAW == '\'') stop = '\'';
1919 else {
1920 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1922 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1923 ctxt->wellFormed = 0;
1924 ctxt->disableSAX = 1;
1925 return(NULL);
1926 }
1927 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1928 if (buf == NULL) {
1929 xmlGenericError(xmlGenericErrorContext,
1930 "malloc of %d byte failed\n", size);
1931 return(NULL);
1932 }
1933
1934 /*
1935 * The content of the entity definition is copied in a buffer.
1936 */
1937
1938 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1939 input = ctxt->input;
1940 GROW;
1941 NEXT;
1942 c = CUR_CHAR(l);
1943 /*
1944 * NOTE: 4.4.5 Included in Literal
1945 * When a parameter entity reference appears in a literal entity
1946 * value, ... a single or double quote character in the replacement
1947 * text is always treated as a normal data character and will not
1948 * terminate the literal.
1949 * In practice it means we stop the loop only when back at parsing
1950 * the initial entity and the quote is found
1951 */
1952 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1953 (ctxt->input != input))) {
1954 if (len + 5 >= size) {
1955 size *= 2;
1956 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1957 if (buf == NULL) {
1958 xmlGenericError(xmlGenericErrorContext,
1959 "realloc of %d byte failed\n", size);
1960 return(NULL);
1961 }
1962 }
1963 COPY_BUF(l,buf,len,c);
1964 NEXTL(l);
1965 /*
1966 * Pop-up of finished entities.
1967 */
1968 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1969 xmlPopInput(ctxt);
1970
1971 GROW;
1972 c = CUR_CHAR(l);
1973 if (c == 0) {
1974 GROW;
1975 c = CUR_CHAR(l);
1976 }
1977 }
1978 buf[len] = 0;
1979
1980 /*
1981 * Raise problem w.r.t. '&' and '%' being used in non-entities
1982 * reference constructs. Note Charref will be handled in
1983 * xmlStringDecodeEntities()
1984 */
1985 cur = buf;
1986 while (*cur != 0) { /* non input consuming */
1987 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1988 xmlChar *name;
1989 xmlChar tmp = *cur;
1990
1991 cur++;
1992 name = xmlParseStringName(ctxt, &cur);
1993 if ((name == NULL) || (*cur != ';')) {
1994 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1996 ctxt->sax->error(ctxt->userData,
1997 "EntityValue: '%c' forbidden except for entities references\n",
1998 tmp);
1999 ctxt->wellFormed = 0;
2000 ctxt->disableSAX = 1;
2001 }
2002 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2003 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2005 ctxt->sax->error(ctxt->userData,
2006 "EntityValue: PEReferences forbidden in internal subset\n",
2007 tmp);
2008 ctxt->wellFormed = 0;
2009 ctxt->disableSAX = 1;
2010 }
2011 if (name != NULL)
2012 xmlFree(name);
2013 }
2014 cur++;
2015 }
2016
2017 /*
2018 * Then PEReference entities are substituted.
2019 */
2020 if (c != stop) {
2021 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2023 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2024 ctxt->wellFormed = 0;
2025 ctxt->disableSAX = 1;
2026 xmlFree(buf);
2027 } else {
2028 NEXT;
2029 /*
2030 * NOTE: 4.4.7 Bypassed
2031 * When a general entity reference appears in the EntityValue in
2032 * an entity declaration, it is bypassed and left as is.
2033 * so XML_SUBSTITUTE_REF is not set here.
2034 */
2035 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2036 0, 0, 0);
2037 if (orig != NULL)
2038 *orig = buf;
2039 else
2040 xmlFree(buf);
2041 }
2042
2043 return(ret);
2044}
2045
2046/**
2047 * xmlParseAttValue:
2048 * @ctxt: an XML parser context
2049 *
2050 * parse a value for an attribute
2051 * Note: the parser won't do substitution of entities here, this
2052 * will be handled later in xmlStringGetNodeList
2053 *
2054 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2055 * "'" ([^<&'] | Reference)* "'"
2056 *
2057 * 3.3.3 Attribute-Value Normalization:
2058 * Before the value of an attribute is passed to the application or
2059 * checked for validity, the XML processor must normalize it as follows:
2060 * - a character reference is processed by appending the referenced
2061 * character to the attribute value
2062 * - an entity reference is processed by recursively processing the
2063 * replacement text of the entity
2064 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2065 * appending #x20 to the normalized value, except that only a single
2066 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2067 * parsed entity or the literal entity value of an internal parsed entity
2068 * - other characters are processed by appending them to the normalized value
2069 * If the declared value is not CDATA, then the XML processor must further
2070 * process the normalized attribute value by discarding any leading and
2071 * trailing space (#x20) characters, and by replacing sequences of space
2072 * (#x20) characters by a single space (#x20) character.
2073 * All attributes for which no declaration has been read should be treated
2074 * by a non-validating parser as if declared CDATA.
2075 *
2076 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2077 */
2078
2079xmlChar *
2080xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2081 xmlChar limit = 0;
2082 xmlChar *buf = NULL;
2083 int len = 0;
2084 int buf_size = 0;
2085 int c, l;
2086 xmlChar *current = NULL;
2087 xmlEntityPtr ent;
2088
2089
2090 SHRINK;
2091 if (NXT(0) == '"') {
2092 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2093 limit = '"';
2094 NEXT;
2095 } else if (NXT(0) == '\'') {
2096 limit = '\'';
2097 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2098 NEXT;
2099 } else {
2100 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2102 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2103 ctxt->wellFormed = 0;
2104 ctxt->disableSAX = 1;
2105 return(NULL);
2106 }
2107
2108 /*
2109 * allocate a translation buffer.
2110 */
2111 buf_size = XML_PARSER_BUFFER_SIZE;
2112 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2113 if (buf == NULL) {
2114 perror("xmlParseAttValue: malloc failed");
2115 return(NULL);
2116 }
2117
2118 /*
2119 * Ok loop until we reach one of the ending char or a size limit.
2120 */
2121 c = CUR_CHAR(l);
2122 while (((NXT(0) != limit) && /* checked */
2123 (c != '<')) || (ctxt->token != 0)) {
2124 if (c == 0) break;
2125 if (ctxt->token == '&') {
2126 /*
2127 * The reparsing will be done in xmlStringGetNodeList()
2128 * called by the attribute() function in SAX.c
2129 */
2130 static xmlChar buffer[6] = "&#38;";
2131
2132 if (len > buf_size - 10) {
2133 growBuffer(buf);
2134 }
2135 current = &buffer[0];
2136 while (*current != 0) { /* non input consuming */
2137 buf[len++] = *current++;
2138 }
2139 ctxt->token = 0;
2140 } else if (c == '&') {
2141 if (NXT(1) == '#') {
2142 int val = xmlParseCharRef(ctxt);
2143 if (val == '&') {
2144 /*
2145 * The reparsing will be done in xmlStringGetNodeList()
2146 * called by the attribute() function in SAX.c
2147 */
2148 static xmlChar buffer[6] = "&#38;";
2149
2150 if (len > buf_size - 10) {
2151 growBuffer(buf);
2152 }
2153 current = &buffer[0];
2154 while (*current != 0) { /* non input consuming */
2155 buf[len++] = *current++;
2156 }
2157 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002158 if (len > buf_size - 10) {
2159 growBuffer(buf);
2160 }
Owen Taylor3473f882001-02-23 17:55:21 +00002161 len += xmlCopyChar(0, &buf[len], val);
2162 }
2163 } else {
2164 ent = xmlParseEntityRef(ctxt);
2165 if ((ent != NULL) &&
2166 (ctxt->replaceEntities != 0)) {
2167 xmlChar *rep;
2168
2169 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2170 rep = xmlStringDecodeEntities(ctxt, ent->content,
2171 XML_SUBSTITUTE_REF, 0, 0, 0);
2172 if (rep != NULL) {
2173 current = rep;
2174 while (*current != 0) { /* non input consuming */
2175 buf[len++] = *current++;
2176 if (len > buf_size - 10) {
2177 growBuffer(buf);
2178 }
2179 }
2180 xmlFree(rep);
2181 }
2182 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002183 if (len > buf_size - 10) {
2184 growBuffer(buf);
2185 }
Owen Taylor3473f882001-02-23 17:55:21 +00002186 if (ent->content != NULL)
2187 buf[len++] = ent->content[0];
2188 }
2189 } else if (ent != NULL) {
2190 int i = xmlStrlen(ent->name);
2191 const xmlChar *cur = ent->name;
2192
2193 /*
2194 * This may look absurd but is needed to detect
2195 * entities problems
2196 */
2197 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2198 (ent->content != NULL)) {
2199 xmlChar *rep;
2200 rep = xmlStringDecodeEntities(ctxt, ent->content,
2201 XML_SUBSTITUTE_REF, 0, 0, 0);
2202 if (rep != NULL)
2203 xmlFree(rep);
2204 }
2205
2206 /*
2207 * Just output the reference
2208 */
2209 buf[len++] = '&';
2210 if (len > buf_size - i - 10) {
2211 growBuffer(buf);
2212 }
2213 for (;i > 0;i--)
2214 buf[len++] = *cur++;
2215 buf[len++] = ';';
2216 }
2217 }
2218 } else {
2219 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2220 COPY_BUF(l,buf,len,0x20);
2221 if (len > buf_size - 10) {
2222 growBuffer(buf);
2223 }
2224 } else {
2225 COPY_BUF(l,buf,len,c);
2226 if (len > buf_size - 10) {
2227 growBuffer(buf);
2228 }
2229 }
2230 NEXTL(l);
2231 }
2232 GROW;
2233 c = CUR_CHAR(l);
2234 }
2235 buf[len++] = 0;
2236 if (RAW == '<') {
2237 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2239 ctxt->sax->error(ctxt->userData,
2240 "Unescaped '<' not allowed in attributes values\n");
2241 ctxt->wellFormed = 0;
2242 ctxt->disableSAX = 1;
2243 } else if (RAW != limit) {
2244 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2246 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2247 ctxt->wellFormed = 0;
2248 ctxt->disableSAX = 1;
2249 } else
2250 NEXT;
2251 return(buf);
2252}
2253
2254/**
2255 * xmlParseSystemLiteral:
2256 * @ctxt: an XML parser context
2257 *
2258 * parse an XML Literal
2259 *
2260 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2261 *
2262 * Returns the SystemLiteral parsed or NULL
2263 */
2264
2265xmlChar *
2266xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2267 xmlChar *buf = NULL;
2268 int len = 0;
2269 int size = XML_PARSER_BUFFER_SIZE;
2270 int cur, l;
2271 xmlChar stop;
2272 int state = ctxt->instate;
2273 int count = 0;
2274
2275 SHRINK;
2276 if (RAW == '"') {
2277 NEXT;
2278 stop = '"';
2279 } else if (RAW == '\'') {
2280 NEXT;
2281 stop = '\'';
2282 } else {
2283 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2285 ctxt->sax->error(ctxt->userData,
2286 "SystemLiteral \" or ' expected\n");
2287 ctxt->wellFormed = 0;
2288 ctxt->disableSAX = 1;
2289 return(NULL);
2290 }
2291
2292 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2293 if (buf == NULL) {
2294 xmlGenericError(xmlGenericErrorContext,
2295 "malloc of %d byte failed\n", size);
2296 return(NULL);
2297 }
2298 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2299 cur = CUR_CHAR(l);
2300 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2301 if (len + 5 >= size) {
2302 size *= 2;
2303 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2304 if (buf == NULL) {
2305 xmlGenericError(xmlGenericErrorContext,
2306 "realloc of %d byte failed\n", size);
2307 ctxt->instate = (xmlParserInputState) state;
2308 return(NULL);
2309 }
2310 }
2311 count++;
2312 if (count > 50) {
2313 GROW;
2314 count = 0;
2315 }
2316 COPY_BUF(l,buf,len,cur);
2317 NEXTL(l);
2318 cur = CUR_CHAR(l);
2319 if (cur == 0) {
2320 GROW;
2321 SHRINK;
2322 cur = CUR_CHAR(l);
2323 }
2324 }
2325 buf[len] = 0;
2326 ctxt->instate = (xmlParserInputState) state;
2327 if (!IS_CHAR(cur)) {
2328 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2330 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2331 ctxt->wellFormed = 0;
2332 ctxt->disableSAX = 1;
2333 } else {
2334 NEXT;
2335 }
2336 return(buf);
2337}
2338
2339/**
2340 * xmlParsePubidLiteral:
2341 * @ctxt: an XML parser context
2342 *
2343 * parse an XML public literal
2344 *
2345 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2346 *
2347 * Returns the PubidLiteral parsed or NULL.
2348 */
2349
2350xmlChar *
2351xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2352 xmlChar *buf = NULL;
2353 int len = 0;
2354 int size = XML_PARSER_BUFFER_SIZE;
2355 xmlChar cur;
2356 xmlChar stop;
2357 int count = 0;
2358
2359 SHRINK;
2360 if (RAW == '"') {
2361 NEXT;
2362 stop = '"';
2363 } else if (RAW == '\'') {
2364 NEXT;
2365 stop = '\'';
2366 } else {
2367 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2369 ctxt->sax->error(ctxt->userData,
2370 "SystemLiteral \" or ' expected\n");
2371 ctxt->wellFormed = 0;
2372 ctxt->disableSAX = 1;
2373 return(NULL);
2374 }
2375 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2376 if (buf == NULL) {
2377 xmlGenericError(xmlGenericErrorContext,
2378 "malloc of %d byte failed\n", size);
2379 return(NULL);
2380 }
2381 cur = CUR;
2382 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2383 if (len + 1 >= size) {
2384 size *= 2;
2385 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2386 if (buf == NULL) {
2387 xmlGenericError(xmlGenericErrorContext,
2388 "realloc of %d byte failed\n", size);
2389 return(NULL);
2390 }
2391 }
2392 buf[len++] = cur;
2393 count++;
2394 if (count > 50) {
2395 GROW;
2396 count = 0;
2397 }
2398 NEXT;
2399 cur = CUR;
2400 if (cur == 0) {
2401 GROW;
2402 SHRINK;
2403 cur = CUR;
2404 }
2405 }
2406 buf[len] = 0;
2407 if (cur != stop) {
2408 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2410 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2411 ctxt->wellFormed = 0;
2412 ctxt->disableSAX = 1;
2413 } else {
2414 NEXT;
2415 }
2416 return(buf);
2417}
2418
Daniel Veillard48b2f892001-02-25 16:11:03 +00002419void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002420/**
2421 * xmlParseCharData:
2422 * @ctxt: an XML parser context
2423 * @cdata: int indicating whether we are within a CDATA section
2424 *
2425 * parse a CharData section.
2426 * if we are within a CDATA section ']]>' marks an end of section.
2427 *
2428 * The right angle bracket (>) may be represented using the string "&gt;",
2429 * and must, for compatibility, be escaped using "&gt;" or a character
2430 * reference when it appears in the string "]]>" in content, when that
2431 * string is not marking the end of a CDATA section.
2432 *
2433 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2434 */
2435
2436void
2437xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002438 const xmlChar *in;
2439 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002440 int line = ctxt->input->line;
2441 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002442
2443 SHRINK;
2444 GROW;
2445 /*
2446 * Accelerated common case where input don't need to be
2447 * modified before passing it to the handler.
2448 */
2449 if ((ctxt->token == 0) && (!cdata)) {
2450 in = ctxt->input->cur;
2451 do {
2452 while (((*in >= 0x20) && (*in != '<') &&
2453 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2454 in++;
2455 if (*in == 0xA) {
2456 ctxt->input->line++;
2457 continue; /* while */
2458 }
2459 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002460 if (nbchar > 0) {
2461 if (IS_BLANK(*ctxt->input->cur) &&
2462 areBlanks(ctxt, ctxt->input->cur, nbchar)) {
2463 if (ctxt->sax->ignorableWhitespace != NULL)
2464 ctxt->sax->ignorableWhitespace(ctxt->userData,
2465 ctxt->input->cur, nbchar);
2466 } else {
2467 if (ctxt->sax->characters != NULL)
2468 ctxt->sax->characters(ctxt->userData,
2469 ctxt->input->cur, nbchar);
2470 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002471 }
2472 ctxt->input->cur = in;
2473 if (*in == 0xD) {
2474 in++;
2475 if (*in == 0xA) {
2476 ctxt->input->cur = in;
2477 in++;
2478 ctxt->input->line++;
2479 continue; /* while */
2480 }
2481 in--;
2482 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002483 if (*in == '<') {
2484 return;
2485 }
2486 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002487 return;
2488 }
2489 SHRINK;
2490 GROW;
2491 in = ctxt->input->cur;
2492 } while ((*in >= 0x20) && (*in <= 0x7F));
2493 nbchar = 0;
2494 }
Daniel Veillard50582112001-03-26 22:52:16 +00002495 ctxt->input->line = line;
2496 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002497 xmlParseCharDataComplex(ctxt, cdata);
2498}
2499
2500void
2501xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002502 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2503 int nbchar = 0;
2504 int cur, l;
2505 int count = 0;
2506
2507 SHRINK;
2508 GROW;
2509 cur = CUR_CHAR(l);
2510 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2511 ((cur != '&') || (ctxt->token == '&')) &&
2512 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2513 if ((cur == ']') && (NXT(1) == ']') &&
2514 (NXT(2) == '>')) {
2515 if (cdata) break;
2516 else {
2517 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2518 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2519 ctxt->sax->error(ctxt->userData,
2520 "Sequence ']]>' not allowed in content\n");
2521 /* Should this be relaxed ??? I see a "must here */
2522 ctxt->wellFormed = 0;
2523 ctxt->disableSAX = 1;
2524 }
2525 }
2526 COPY_BUF(l,buf,nbchar,cur);
2527 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2528 /*
2529 * Ok the segment is to be consumed as chars.
2530 */
2531 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2532 if (areBlanks(ctxt, buf, nbchar)) {
2533 if (ctxt->sax->ignorableWhitespace != NULL)
2534 ctxt->sax->ignorableWhitespace(ctxt->userData,
2535 buf, nbchar);
2536 } else {
2537 if (ctxt->sax->characters != NULL)
2538 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2539 }
2540 }
2541 nbchar = 0;
2542 }
2543 count++;
2544 if (count > 50) {
2545 GROW;
2546 count = 0;
2547 }
2548 NEXTL(l);
2549 cur = CUR_CHAR(l);
2550 }
2551 if (nbchar != 0) {
2552 /*
2553 * Ok the segment is to be consumed as chars.
2554 */
2555 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2556 if (areBlanks(ctxt, buf, nbchar)) {
2557 if (ctxt->sax->ignorableWhitespace != NULL)
2558 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2559 } else {
2560 if (ctxt->sax->characters != NULL)
2561 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2562 }
2563 }
2564 }
2565}
2566
2567/**
2568 * xmlParseExternalID:
2569 * @ctxt: an XML parser context
2570 * @publicID: a xmlChar** receiving PubidLiteral
2571 * @strict: indicate whether we should restrict parsing to only
2572 * production [75], see NOTE below
2573 *
2574 * Parse an External ID or a Public ID
2575 *
2576 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2577 * 'PUBLIC' S PubidLiteral S SystemLiteral
2578 *
2579 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2580 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2581 *
2582 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2583 *
2584 * Returns the function returns SystemLiteral and in the second
2585 * case publicID receives PubidLiteral, is strict is off
2586 * it is possible to return NULL and have publicID set.
2587 */
2588
2589xmlChar *
2590xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2591 xmlChar *URI = NULL;
2592
2593 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002594
2595 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002596 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2597 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2598 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2599 SKIP(6);
2600 if (!IS_BLANK(CUR)) {
2601 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2603 ctxt->sax->error(ctxt->userData,
2604 "Space required after 'SYSTEM'\n");
2605 ctxt->wellFormed = 0;
2606 ctxt->disableSAX = 1;
2607 }
2608 SKIP_BLANKS;
2609 URI = xmlParseSystemLiteral(ctxt);
2610 if (URI == NULL) {
2611 ctxt->errNo = XML_ERR_URI_REQUIRED;
2612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2613 ctxt->sax->error(ctxt->userData,
2614 "xmlParseExternalID: SYSTEM, no URI\n");
2615 ctxt->wellFormed = 0;
2616 ctxt->disableSAX = 1;
2617 }
2618 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2619 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2620 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2621 SKIP(6);
2622 if (!IS_BLANK(CUR)) {
2623 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2625 ctxt->sax->error(ctxt->userData,
2626 "Space required after 'PUBLIC'\n");
2627 ctxt->wellFormed = 0;
2628 ctxt->disableSAX = 1;
2629 }
2630 SKIP_BLANKS;
2631 *publicID = xmlParsePubidLiteral(ctxt);
2632 if (*publicID == NULL) {
2633 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2635 ctxt->sax->error(ctxt->userData,
2636 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2637 ctxt->wellFormed = 0;
2638 ctxt->disableSAX = 1;
2639 }
2640 if (strict) {
2641 /*
2642 * We don't handle [83] so "S SystemLiteral" is required.
2643 */
2644 if (!IS_BLANK(CUR)) {
2645 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2647 ctxt->sax->error(ctxt->userData,
2648 "Space required after the Public Identifier\n");
2649 ctxt->wellFormed = 0;
2650 ctxt->disableSAX = 1;
2651 }
2652 } else {
2653 /*
2654 * We handle [83] so we return immediately, if
2655 * "S SystemLiteral" is not detected. From a purely parsing
2656 * point of view that's a nice mess.
2657 */
2658 const xmlChar *ptr;
2659 GROW;
2660
2661 ptr = CUR_PTR;
2662 if (!IS_BLANK(*ptr)) return(NULL);
2663
2664 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2665 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2666 }
2667 SKIP_BLANKS;
2668 URI = xmlParseSystemLiteral(ctxt);
2669 if (URI == NULL) {
2670 ctxt->errNo = XML_ERR_URI_REQUIRED;
2671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2672 ctxt->sax->error(ctxt->userData,
2673 "xmlParseExternalID: PUBLIC, no URI\n");
2674 ctxt->wellFormed = 0;
2675 ctxt->disableSAX = 1;
2676 }
2677 }
2678 return(URI);
2679}
2680
2681/**
2682 * xmlParseComment:
2683 * @ctxt: an XML parser context
2684 *
2685 * Skip an XML (SGML) comment <!-- .... -->
2686 * The spec says that "For compatibility, the string "--" (double-hyphen)
2687 * must not occur within comments. "
2688 *
2689 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2690 */
2691void
2692xmlParseComment(xmlParserCtxtPtr ctxt) {
2693 xmlChar *buf = NULL;
2694 int len;
2695 int size = XML_PARSER_BUFFER_SIZE;
2696 int q, ql;
2697 int r, rl;
2698 int cur, l;
2699 xmlParserInputState state;
2700 xmlParserInputPtr input = ctxt->input;
2701 int count = 0;
2702
2703 /*
2704 * Check that there is a comment right here.
2705 */
2706 if ((RAW != '<') || (NXT(1) != '!') ||
2707 (NXT(2) != '-') || (NXT(3) != '-')) return;
2708
2709 state = ctxt->instate;
2710 ctxt->instate = XML_PARSER_COMMENT;
2711 SHRINK;
2712 SKIP(4);
2713 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2714 if (buf == NULL) {
2715 xmlGenericError(xmlGenericErrorContext,
2716 "malloc of %d byte failed\n", size);
2717 ctxt->instate = state;
2718 return;
2719 }
2720 q = CUR_CHAR(ql);
2721 NEXTL(ql);
2722 r = CUR_CHAR(rl);
2723 NEXTL(rl);
2724 cur = CUR_CHAR(l);
2725 len = 0;
2726 while (IS_CHAR(cur) && /* checked */
2727 ((cur != '>') ||
2728 (r != '-') || (q != '-'))) {
2729 if ((r == '-') && (q == '-') && (len > 1)) {
2730 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2732 ctxt->sax->error(ctxt->userData,
2733 "Comment must not contain '--' (double-hyphen)`\n");
2734 ctxt->wellFormed = 0;
2735 ctxt->disableSAX = 1;
2736 }
2737 if (len + 5 >= size) {
2738 size *= 2;
2739 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2740 if (buf == NULL) {
2741 xmlGenericError(xmlGenericErrorContext,
2742 "realloc of %d byte failed\n", size);
2743 ctxt->instate = state;
2744 return;
2745 }
2746 }
2747 COPY_BUF(ql,buf,len,q);
2748 q = r;
2749 ql = rl;
2750 r = cur;
2751 rl = l;
2752
2753 count++;
2754 if (count > 50) {
2755 GROW;
2756 count = 0;
2757 }
2758 NEXTL(l);
2759 cur = CUR_CHAR(l);
2760 if (cur == 0) {
2761 SHRINK;
2762 GROW;
2763 cur = CUR_CHAR(l);
2764 }
2765 }
2766 buf[len] = 0;
2767 if (!IS_CHAR(cur)) {
2768 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2770 ctxt->sax->error(ctxt->userData,
2771 "Comment not terminated \n<!--%.50s\n", buf);
2772 ctxt->wellFormed = 0;
2773 ctxt->disableSAX = 1;
2774 xmlFree(buf);
2775 } else {
2776 if (input != ctxt->input) {
2777 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2779 ctxt->sax->error(ctxt->userData,
2780"Comment doesn't start and stop in the same entity\n");
2781 ctxt->wellFormed = 0;
2782 ctxt->disableSAX = 1;
2783 }
2784 NEXT;
2785 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2786 (!ctxt->disableSAX))
2787 ctxt->sax->comment(ctxt->userData, buf);
2788 xmlFree(buf);
2789 }
2790 ctxt->instate = state;
2791}
2792
2793/**
2794 * xmlParsePITarget:
2795 * @ctxt: an XML parser context
2796 *
2797 * parse the name of a PI
2798 *
2799 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2800 *
2801 * Returns the PITarget name or NULL
2802 */
2803
2804xmlChar *
2805xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2806 xmlChar *name;
2807
2808 name = xmlParseName(ctxt);
2809 if ((name != NULL) &&
2810 ((name[0] == 'x') || (name[0] == 'X')) &&
2811 ((name[1] == 'm') || (name[1] == 'M')) &&
2812 ((name[2] == 'l') || (name[2] == 'L'))) {
2813 int i;
2814 if ((name[0] == 'x') && (name[1] == 'm') &&
2815 (name[2] == 'l') && (name[3] == 0)) {
2816 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2817 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2818 ctxt->sax->error(ctxt->userData,
2819 "XML declaration allowed only at the start of the document\n");
2820 ctxt->wellFormed = 0;
2821 ctxt->disableSAX = 1;
2822 return(name);
2823 } else if (name[3] == 0) {
2824 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2826 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2827 ctxt->wellFormed = 0;
2828 ctxt->disableSAX = 1;
2829 return(name);
2830 }
2831 for (i = 0;;i++) {
2832 if (xmlW3CPIs[i] == NULL) break;
2833 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2834 return(name);
2835 }
2836 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2837 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2838 ctxt->sax->warning(ctxt->userData,
2839 "xmlParsePItarget: invalid name prefix 'xml'\n");
2840 }
2841 }
2842 return(name);
2843}
2844
2845/**
2846 * xmlParsePI:
2847 * @ctxt: an XML parser context
2848 *
2849 * parse an XML Processing Instruction.
2850 *
2851 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2852 *
2853 * The processing is transfered to SAX once parsed.
2854 */
2855
2856void
2857xmlParsePI(xmlParserCtxtPtr ctxt) {
2858 xmlChar *buf = NULL;
2859 int len = 0;
2860 int size = XML_PARSER_BUFFER_SIZE;
2861 int cur, l;
2862 xmlChar *target;
2863 xmlParserInputState state;
2864 int count = 0;
2865
2866 if ((RAW == '<') && (NXT(1) == '?')) {
2867 xmlParserInputPtr input = ctxt->input;
2868 state = ctxt->instate;
2869 ctxt->instate = XML_PARSER_PI;
2870 /*
2871 * this is a Processing Instruction.
2872 */
2873 SKIP(2);
2874 SHRINK;
2875
2876 /*
2877 * Parse the target name and check for special support like
2878 * namespace.
2879 */
2880 target = xmlParsePITarget(ctxt);
2881 if (target != NULL) {
2882 if ((RAW == '?') && (NXT(1) == '>')) {
2883 if (input != ctxt->input) {
2884 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2885 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2886 ctxt->sax->error(ctxt->userData,
2887 "PI declaration doesn't start and stop in the same entity\n");
2888 ctxt->wellFormed = 0;
2889 ctxt->disableSAX = 1;
2890 }
2891 SKIP(2);
2892
2893 /*
2894 * SAX: PI detected.
2895 */
2896 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2897 (ctxt->sax->processingInstruction != NULL))
2898 ctxt->sax->processingInstruction(ctxt->userData,
2899 target, NULL);
2900 ctxt->instate = state;
2901 xmlFree(target);
2902 return;
2903 }
2904 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2905 if (buf == NULL) {
2906 xmlGenericError(xmlGenericErrorContext,
2907 "malloc of %d byte failed\n", size);
2908 ctxt->instate = state;
2909 return;
2910 }
2911 cur = CUR;
2912 if (!IS_BLANK(cur)) {
2913 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2915 ctxt->sax->error(ctxt->userData,
2916 "xmlParsePI: PI %s space expected\n", target);
2917 ctxt->wellFormed = 0;
2918 ctxt->disableSAX = 1;
2919 }
2920 SKIP_BLANKS;
2921 cur = CUR_CHAR(l);
2922 while (IS_CHAR(cur) && /* checked */
2923 ((cur != '?') || (NXT(1) != '>'))) {
2924 if (len + 5 >= size) {
2925 size *= 2;
2926 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2927 if (buf == NULL) {
2928 xmlGenericError(xmlGenericErrorContext,
2929 "realloc of %d byte failed\n", size);
2930 ctxt->instate = state;
2931 return;
2932 }
2933 }
2934 count++;
2935 if (count > 50) {
2936 GROW;
2937 count = 0;
2938 }
2939 COPY_BUF(l,buf,len,cur);
2940 NEXTL(l);
2941 cur = CUR_CHAR(l);
2942 if (cur == 0) {
2943 SHRINK;
2944 GROW;
2945 cur = CUR_CHAR(l);
2946 }
2947 }
2948 buf[len] = 0;
2949 if (cur != '?') {
2950 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2952 ctxt->sax->error(ctxt->userData,
2953 "xmlParsePI: PI %s never end ...\n", target);
2954 ctxt->wellFormed = 0;
2955 ctxt->disableSAX = 1;
2956 } else {
2957 if (input != ctxt->input) {
2958 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2960 ctxt->sax->error(ctxt->userData,
2961 "PI declaration doesn't start and stop in the same entity\n");
2962 ctxt->wellFormed = 0;
2963 ctxt->disableSAX = 1;
2964 }
2965 SKIP(2);
2966
2967 /*
2968 * SAX: PI detected.
2969 */
2970 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2971 (ctxt->sax->processingInstruction != NULL))
2972 ctxt->sax->processingInstruction(ctxt->userData,
2973 target, buf);
2974 }
2975 xmlFree(buf);
2976 xmlFree(target);
2977 } else {
2978 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
2979 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2980 ctxt->sax->error(ctxt->userData,
2981 "xmlParsePI : no target name\n");
2982 ctxt->wellFormed = 0;
2983 ctxt->disableSAX = 1;
2984 }
2985 ctxt->instate = state;
2986 }
2987}
2988
2989/**
2990 * xmlParseNotationDecl:
2991 * @ctxt: an XML parser context
2992 *
2993 * parse a notation declaration
2994 *
2995 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2996 *
2997 * Hence there is actually 3 choices:
2998 * 'PUBLIC' S PubidLiteral
2999 * 'PUBLIC' S PubidLiteral S SystemLiteral
3000 * and 'SYSTEM' S SystemLiteral
3001 *
3002 * See the NOTE on xmlParseExternalID().
3003 */
3004
3005void
3006xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3007 xmlChar *name;
3008 xmlChar *Pubid;
3009 xmlChar *Systemid;
3010
3011 if ((RAW == '<') && (NXT(1) == '!') &&
3012 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3013 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3014 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3015 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3016 xmlParserInputPtr input = ctxt->input;
3017 SHRINK;
3018 SKIP(10);
3019 if (!IS_BLANK(CUR)) {
3020 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3022 ctxt->sax->error(ctxt->userData,
3023 "Space required after '<!NOTATION'\n");
3024 ctxt->wellFormed = 0;
3025 ctxt->disableSAX = 1;
3026 return;
3027 }
3028 SKIP_BLANKS;
3029
Daniel Veillard29631a82001-03-05 09:49:20 +00003030 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003031 if (name == NULL) {
3032 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3034 ctxt->sax->error(ctxt->userData,
3035 "NOTATION: Name expected here\n");
3036 ctxt->wellFormed = 0;
3037 ctxt->disableSAX = 1;
3038 return;
3039 }
3040 if (!IS_BLANK(CUR)) {
3041 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3042 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3043 ctxt->sax->error(ctxt->userData,
3044 "Space required after the NOTATION name'\n");
3045 ctxt->wellFormed = 0;
3046 ctxt->disableSAX = 1;
3047 return;
3048 }
3049 SKIP_BLANKS;
3050
3051 /*
3052 * Parse the IDs.
3053 */
3054 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3055 SKIP_BLANKS;
3056
3057 if (RAW == '>') {
3058 if (input != ctxt->input) {
3059 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3061 ctxt->sax->error(ctxt->userData,
3062"Notation declaration doesn't start and stop in the same entity\n");
3063 ctxt->wellFormed = 0;
3064 ctxt->disableSAX = 1;
3065 }
3066 NEXT;
3067 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3068 (ctxt->sax->notationDecl != NULL))
3069 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3070 } else {
3071 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3073 ctxt->sax->error(ctxt->userData,
3074 "'>' required to close NOTATION declaration\n");
3075 ctxt->wellFormed = 0;
3076 ctxt->disableSAX = 1;
3077 }
3078 xmlFree(name);
3079 if (Systemid != NULL) xmlFree(Systemid);
3080 if (Pubid != NULL) xmlFree(Pubid);
3081 }
3082}
3083
3084/**
3085 * xmlParseEntityDecl:
3086 * @ctxt: an XML parser context
3087 *
3088 * parse <!ENTITY declarations
3089 *
3090 * [70] EntityDecl ::= GEDecl | PEDecl
3091 *
3092 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3093 *
3094 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3095 *
3096 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3097 *
3098 * [74] PEDef ::= EntityValue | ExternalID
3099 *
3100 * [76] NDataDecl ::= S 'NDATA' S Name
3101 *
3102 * [ VC: Notation Declared ]
3103 * The Name must match the declared name of a notation.
3104 */
3105
3106void
3107xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3108 xmlChar *name = NULL;
3109 xmlChar *value = NULL;
3110 xmlChar *URI = NULL, *literal = NULL;
3111 xmlChar *ndata = NULL;
3112 int isParameter = 0;
3113 xmlChar *orig = NULL;
3114
3115 GROW;
3116 if ((RAW == '<') && (NXT(1) == '!') &&
3117 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3118 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3119 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3120 xmlParserInputPtr input = ctxt->input;
3121 ctxt->instate = XML_PARSER_ENTITY_DECL;
3122 SHRINK;
3123 SKIP(8);
3124 if (!IS_BLANK(CUR)) {
3125 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3127 ctxt->sax->error(ctxt->userData,
3128 "Space required after '<!ENTITY'\n");
3129 ctxt->wellFormed = 0;
3130 ctxt->disableSAX = 1;
3131 }
3132 SKIP_BLANKS;
3133
3134 if (RAW == '%') {
3135 NEXT;
3136 if (!IS_BLANK(CUR)) {
3137 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3139 ctxt->sax->error(ctxt->userData,
3140 "Space required after '%'\n");
3141 ctxt->wellFormed = 0;
3142 ctxt->disableSAX = 1;
3143 }
3144 SKIP_BLANKS;
3145 isParameter = 1;
3146 }
3147
Daniel Veillard29631a82001-03-05 09:49:20 +00003148 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003149 if (name == NULL) {
3150 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3152 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3153 ctxt->wellFormed = 0;
3154 ctxt->disableSAX = 1;
3155 return;
3156 }
3157 if (!IS_BLANK(CUR)) {
3158 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3159 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3160 ctxt->sax->error(ctxt->userData,
3161 "Space required after the entity name\n");
3162 ctxt->wellFormed = 0;
3163 ctxt->disableSAX = 1;
3164 }
3165 SKIP_BLANKS;
3166
3167 /*
3168 * handle the various case of definitions...
3169 */
3170 if (isParameter) {
3171 if ((RAW == '"') || (RAW == '\'')) {
3172 value = xmlParseEntityValue(ctxt, &orig);
3173 if (value) {
3174 if ((ctxt->sax != NULL) &&
3175 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3176 ctxt->sax->entityDecl(ctxt->userData, name,
3177 XML_INTERNAL_PARAMETER_ENTITY,
3178 NULL, NULL, value);
3179 }
3180 } else {
3181 URI = xmlParseExternalID(ctxt, &literal, 1);
3182 if ((URI == NULL) && (literal == NULL)) {
3183 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3185 ctxt->sax->error(ctxt->userData,
3186 "Entity value required\n");
3187 ctxt->wellFormed = 0;
3188 ctxt->disableSAX = 1;
3189 }
3190 if (URI) {
3191 xmlURIPtr uri;
3192
3193 uri = xmlParseURI((const char *) URI);
3194 if (uri == NULL) {
3195 ctxt->errNo = XML_ERR_INVALID_URI;
3196 if ((ctxt->sax != NULL) &&
3197 (!ctxt->disableSAX) &&
3198 (ctxt->sax->error != NULL))
3199 ctxt->sax->error(ctxt->userData,
3200 "Invalid URI: %s\n", URI);
3201 ctxt->wellFormed = 0;
3202 } else {
3203 if (uri->fragment != NULL) {
3204 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3205 if ((ctxt->sax != NULL) &&
3206 (!ctxt->disableSAX) &&
3207 (ctxt->sax->error != NULL))
3208 ctxt->sax->error(ctxt->userData,
3209 "Fragment not allowed: %s\n", URI);
3210 ctxt->wellFormed = 0;
3211 } else {
3212 if ((ctxt->sax != NULL) &&
3213 (!ctxt->disableSAX) &&
3214 (ctxt->sax->entityDecl != NULL))
3215 ctxt->sax->entityDecl(ctxt->userData, name,
3216 XML_EXTERNAL_PARAMETER_ENTITY,
3217 literal, URI, NULL);
3218 }
3219 xmlFreeURI(uri);
3220 }
3221 }
3222 }
3223 } else {
3224 if ((RAW == '"') || (RAW == '\'')) {
3225 value = xmlParseEntityValue(ctxt, &orig);
3226 if ((ctxt->sax != NULL) &&
3227 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3228 ctxt->sax->entityDecl(ctxt->userData, name,
3229 XML_INTERNAL_GENERAL_ENTITY,
3230 NULL, NULL, value);
3231 } else {
3232 URI = xmlParseExternalID(ctxt, &literal, 1);
3233 if ((URI == NULL) && (literal == NULL)) {
3234 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3236 ctxt->sax->error(ctxt->userData,
3237 "Entity value required\n");
3238 ctxt->wellFormed = 0;
3239 ctxt->disableSAX = 1;
3240 }
3241 if (URI) {
3242 xmlURIPtr uri;
3243
3244 uri = xmlParseURI((const char *)URI);
3245 if (uri == NULL) {
3246 ctxt->errNo = XML_ERR_INVALID_URI;
3247 if ((ctxt->sax != NULL) &&
3248 (!ctxt->disableSAX) &&
3249 (ctxt->sax->error != NULL))
3250 ctxt->sax->error(ctxt->userData,
3251 "Invalid URI: %s\n", URI);
3252 ctxt->wellFormed = 0;
3253 } else {
3254 if (uri->fragment != NULL) {
3255 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3256 if ((ctxt->sax != NULL) &&
3257 (!ctxt->disableSAX) &&
3258 (ctxt->sax->error != NULL))
3259 ctxt->sax->error(ctxt->userData,
3260 "Fragment not allowed: %s\n", URI);
3261 ctxt->wellFormed = 0;
3262 }
3263 xmlFreeURI(uri);
3264 }
3265 }
3266 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3267 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3269 ctxt->sax->error(ctxt->userData,
3270 "Space required before 'NDATA'\n");
3271 ctxt->wellFormed = 0;
3272 ctxt->disableSAX = 1;
3273 }
3274 SKIP_BLANKS;
3275 if ((RAW == 'N') && (NXT(1) == 'D') &&
3276 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3277 (NXT(4) == 'A')) {
3278 SKIP(5);
3279 if (!IS_BLANK(CUR)) {
3280 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3281 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3282 ctxt->sax->error(ctxt->userData,
3283 "Space required after 'NDATA'\n");
3284 ctxt->wellFormed = 0;
3285 ctxt->disableSAX = 1;
3286 }
3287 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003288 ndata = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003289 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3290 (ctxt->sax->unparsedEntityDecl != NULL))
3291 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3292 literal, URI, ndata);
3293 } else {
3294 if ((ctxt->sax != NULL) &&
3295 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3296 ctxt->sax->entityDecl(ctxt->userData, name,
3297 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3298 literal, URI, NULL);
3299 }
3300 }
3301 }
3302 SKIP_BLANKS;
3303 if (RAW != '>') {
3304 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3306 ctxt->sax->error(ctxt->userData,
3307 "xmlParseEntityDecl: entity %s not terminated\n", name);
3308 ctxt->wellFormed = 0;
3309 ctxt->disableSAX = 1;
3310 } else {
3311 if (input != ctxt->input) {
3312 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3314 ctxt->sax->error(ctxt->userData,
3315"Entity declaration doesn't start and stop in the same entity\n");
3316 ctxt->wellFormed = 0;
3317 ctxt->disableSAX = 1;
3318 }
3319 NEXT;
3320 }
3321 if (orig != NULL) {
3322 /*
3323 * Ugly mechanism to save the raw entity value.
3324 */
3325 xmlEntityPtr cur = NULL;
3326
3327 if (isParameter) {
3328 if ((ctxt->sax != NULL) &&
3329 (ctxt->sax->getParameterEntity != NULL))
3330 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3331 } else {
3332 if ((ctxt->sax != NULL) &&
3333 (ctxt->sax->getEntity != NULL))
3334 cur = ctxt->sax->getEntity(ctxt->userData, name);
3335 }
3336 if (cur != NULL) {
3337 if (cur->orig != NULL)
3338 xmlFree(orig);
3339 else
3340 cur->orig = orig;
3341 } else
3342 xmlFree(orig);
3343 }
3344 if (name != NULL) xmlFree(name);
3345 if (value != NULL) xmlFree(value);
3346 if (URI != NULL) xmlFree(URI);
3347 if (literal != NULL) xmlFree(literal);
3348 if (ndata != NULL) xmlFree(ndata);
3349 }
3350}
3351
3352/**
3353 * xmlParseDefaultDecl:
3354 * @ctxt: an XML parser context
3355 * @value: Receive a possible fixed default value for the attribute
3356 *
3357 * Parse an attribute default declaration
3358 *
3359 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3360 *
3361 * [ VC: Required Attribute ]
3362 * if the default declaration is the keyword #REQUIRED, then the
3363 * attribute must be specified for all elements of the type in the
3364 * attribute-list declaration.
3365 *
3366 * [ VC: Attribute Default Legal ]
3367 * The declared default value must meet the lexical constraints of
3368 * the declared attribute type c.f. xmlValidateAttributeDecl()
3369 *
3370 * [ VC: Fixed Attribute Default ]
3371 * if an attribute has a default value declared with the #FIXED
3372 * keyword, instances of that attribute must match the default value.
3373 *
3374 * [ WFC: No < in Attribute Values ]
3375 * handled in xmlParseAttValue()
3376 *
3377 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3378 * or XML_ATTRIBUTE_FIXED.
3379 */
3380
3381int
3382xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3383 int val;
3384 xmlChar *ret;
3385
3386 *value = NULL;
3387 if ((RAW == '#') && (NXT(1) == 'R') &&
3388 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3389 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3390 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3391 (NXT(8) == 'D')) {
3392 SKIP(9);
3393 return(XML_ATTRIBUTE_REQUIRED);
3394 }
3395 if ((RAW == '#') && (NXT(1) == 'I') &&
3396 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3397 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3398 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3399 SKIP(8);
3400 return(XML_ATTRIBUTE_IMPLIED);
3401 }
3402 val = XML_ATTRIBUTE_NONE;
3403 if ((RAW == '#') && (NXT(1) == 'F') &&
3404 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3405 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3406 SKIP(6);
3407 val = XML_ATTRIBUTE_FIXED;
3408 if (!IS_BLANK(CUR)) {
3409 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3411 ctxt->sax->error(ctxt->userData,
3412 "Space required after '#FIXED'\n");
3413 ctxt->wellFormed = 0;
3414 ctxt->disableSAX = 1;
3415 }
3416 SKIP_BLANKS;
3417 }
3418 ret = xmlParseAttValue(ctxt);
3419 ctxt->instate = XML_PARSER_DTD;
3420 if (ret == NULL) {
3421 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3422 ctxt->sax->error(ctxt->userData,
3423 "Attribute default value declaration error\n");
3424 ctxt->wellFormed = 0;
3425 ctxt->disableSAX = 1;
3426 } else
3427 *value = ret;
3428 return(val);
3429}
3430
3431/**
3432 * xmlParseNotationType:
3433 * @ctxt: an XML parser context
3434 *
3435 * parse an Notation attribute type.
3436 *
3437 * Note: the leading 'NOTATION' S part has already being parsed...
3438 *
3439 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3440 *
3441 * [ VC: Notation Attributes ]
3442 * Values of this type must match one of the notation names included
3443 * in the declaration; all notation names in the declaration must be declared.
3444 *
3445 * Returns: the notation attribute tree built while parsing
3446 */
3447
3448xmlEnumerationPtr
3449xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3450 xmlChar *name;
3451 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3452
3453 if (RAW != '(') {
3454 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3456 ctxt->sax->error(ctxt->userData,
3457 "'(' required to start 'NOTATION'\n");
3458 ctxt->wellFormed = 0;
3459 ctxt->disableSAX = 1;
3460 return(NULL);
3461 }
3462 SHRINK;
3463 do {
3464 NEXT;
3465 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003466 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003467 if (name == NULL) {
3468 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3470 ctxt->sax->error(ctxt->userData,
3471 "Name expected in NOTATION declaration\n");
3472 ctxt->wellFormed = 0;
3473 ctxt->disableSAX = 1;
3474 return(ret);
3475 }
3476 cur = xmlCreateEnumeration(name);
3477 xmlFree(name);
3478 if (cur == NULL) return(ret);
3479 if (last == NULL) ret = last = cur;
3480 else {
3481 last->next = cur;
3482 last = cur;
3483 }
3484 SKIP_BLANKS;
3485 } while (RAW == '|');
3486 if (RAW != ')') {
3487 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3488 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3489 ctxt->sax->error(ctxt->userData,
3490 "')' required to finish NOTATION declaration\n");
3491 ctxt->wellFormed = 0;
3492 ctxt->disableSAX = 1;
3493 if ((last != NULL) && (last != ret))
3494 xmlFreeEnumeration(last);
3495 return(ret);
3496 }
3497 NEXT;
3498 return(ret);
3499}
3500
3501/**
3502 * xmlParseEnumerationType:
3503 * @ctxt: an XML parser context
3504 *
3505 * parse an Enumeration attribute type.
3506 *
3507 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3508 *
3509 * [ VC: Enumeration ]
3510 * Values of this type must match one of the Nmtoken tokens in
3511 * the declaration
3512 *
3513 * Returns: the enumeration attribute tree built while parsing
3514 */
3515
3516xmlEnumerationPtr
3517xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3518 xmlChar *name;
3519 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3520
3521 if (RAW != '(') {
3522 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3524 ctxt->sax->error(ctxt->userData,
3525 "'(' required to start ATTLIST enumeration\n");
3526 ctxt->wellFormed = 0;
3527 ctxt->disableSAX = 1;
3528 return(NULL);
3529 }
3530 SHRINK;
3531 do {
3532 NEXT;
3533 SKIP_BLANKS;
3534 name = xmlParseNmtoken(ctxt);
3535 if (name == NULL) {
3536 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3538 ctxt->sax->error(ctxt->userData,
3539 "NmToken expected in ATTLIST enumeration\n");
3540 ctxt->wellFormed = 0;
3541 ctxt->disableSAX = 1;
3542 return(ret);
3543 }
3544 cur = xmlCreateEnumeration(name);
3545 xmlFree(name);
3546 if (cur == NULL) return(ret);
3547 if (last == NULL) ret = last = cur;
3548 else {
3549 last->next = cur;
3550 last = cur;
3551 }
3552 SKIP_BLANKS;
3553 } while (RAW == '|');
3554 if (RAW != ')') {
3555 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3557 ctxt->sax->error(ctxt->userData,
3558 "')' required to finish ATTLIST enumeration\n");
3559 ctxt->wellFormed = 0;
3560 ctxt->disableSAX = 1;
3561 return(ret);
3562 }
3563 NEXT;
3564 return(ret);
3565}
3566
3567/**
3568 * xmlParseEnumeratedType:
3569 * @ctxt: an XML parser context
3570 * @tree: the enumeration tree built while parsing
3571 *
3572 * parse an Enumerated attribute type.
3573 *
3574 * [57] EnumeratedType ::= NotationType | Enumeration
3575 *
3576 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3577 *
3578 *
3579 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3580 */
3581
3582int
3583xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3584 if ((RAW == 'N') && (NXT(1) == 'O') &&
3585 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3586 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3587 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3588 SKIP(8);
3589 if (!IS_BLANK(CUR)) {
3590 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3592 ctxt->sax->error(ctxt->userData,
3593 "Space required after 'NOTATION'\n");
3594 ctxt->wellFormed = 0;
3595 ctxt->disableSAX = 1;
3596 return(0);
3597 }
3598 SKIP_BLANKS;
3599 *tree = xmlParseNotationType(ctxt);
3600 if (*tree == NULL) return(0);
3601 return(XML_ATTRIBUTE_NOTATION);
3602 }
3603 *tree = xmlParseEnumerationType(ctxt);
3604 if (*tree == NULL) return(0);
3605 return(XML_ATTRIBUTE_ENUMERATION);
3606}
3607
3608/**
3609 * xmlParseAttributeType:
3610 * @ctxt: an XML parser context
3611 * @tree: the enumeration tree built while parsing
3612 *
3613 * parse the Attribute list def for an element
3614 *
3615 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3616 *
3617 * [55] StringType ::= 'CDATA'
3618 *
3619 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3620 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3621 *
3622 * Validity constraints for attribute values syntax are checked in
3623 * xmlValidateAttributeValue()
3624 *
3625 * [ VC: ID ]
3626 * Values of type ID must match the Name production. A name must not
3627 * appear more than once in an XML document as a value of this type;
3628 * i.e., ID values must uniquely identify the elements which bear them.
3629 *
3630 * [ VC: One ID per Element Type ]
3631 * No element type may have more than one ID attribute specified.
3632 *
3633 * [ VC: ID Attribute Default ]
3634 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3635 *
3636 * [ VC: IDREF ]
3637 * Values of type IDREF must match the Name production, and values
3638 * of type IDREFS must match Names; each IDREF Name must match the value
3639 * of an ID attribute on some element in the XML document; i.e. IDREF
3640 * values must match the value of some ID attribute.
3641 *
3642 * [ VC: Entity Name ]
3643 * Values of type ENTITY must match the Name production, values
3644 * of type ENTITIES must match Names; each Entity Name must match the
3645 * name of an unparsed entity declared in the DTD.
3646 *
3647 * [ VC: Name Token ]
3648 * Values of type NMTOKEN must match the Nmtoken production; values
3649 * of type NMTOKENS must match Nmtokens.
3650 *
3651 * Returns the attribute type
3652 */
3653int
3654xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3655 SHRINK;
3656 if ((RAW == 'C') && (NXT(1) == 'D') &&
3657 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3658 (NXT(4) == 'A')) {
3659 SKIP(5);
3660 return(XML_ATTRIBUTE_CDATA);
3661 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3662 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3663 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3664 SKIP(6);
3665 return(XML_ATTRIBUTE_IDREFS);
3666 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3667 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3668 (NXT(4) == 'F')) {
3669 SKIP(5);
3670 return(XML_ATTRIBUTE_IDREF);
3671 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3672 SKIP(2);
3673 return(XML_ATTRIBUTE_ID);
3674 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3675 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3676 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3677 SKIP(6);
3678 return(XML_ATTRIBUTE_ENTITY);
3679 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3680 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3681 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3682 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3683 SKIP(8);
3684 return(XML_ATTRIBUTE_ENTITIES);
3685 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3686 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3687 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3688 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3689 SKIP(8);
3690 return(XML_ATTRIBUTE_NMTOKENS);
3691 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3692 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3693 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3694 (NXT(6) == 'N')) {
3695 SKIP(7);
3696 return(XML_ATTRIBUTE_NMTOKEN);
3697 }
3698 return(xmlParseEnumeratedType(ctxt, tree));
3699}
3700
3701/**
3702 * xmlParseAttributeListDecl:
3703 * @ctxt: an XML parser context
3704 *
3705 * : parse the Attribute list def for an element
3706 *
3707 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3708 *
3709 * [53] AttDef ::= S Name S AttType S DefaultDecl
3710 *
3711 */
3712void
3713xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3714 xmlChar *elemName;
3715 xmlChar *attrName;
3716 xmlEnumerationPtr tree;
3717
3718 if ((RAW == '<') && (NXT(1) == '!') &&
3719 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3720 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3721 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3722 (NXT(8) == 'T')) {
3723 xmlParserInputPtr input = ctxt->input;
3724
3725 SKIP(9);
3726 if (!IS_BLANK(CUR)) {
3727 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3729 ctxt->sax->error(ctxt->userData,
3730 "Space required after '<!ATTLIST'\n");
3731 ctxt->wellFormed = 0;
3732 ctxt->disableSAX = 1;
3733 }
3734 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003735 elemName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003736 if (elemName == NULL) {
3737 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3739 ctxt->sax->error(ctxt->userData,
3740 "ATTLIST: no name for Element\n");
3741 ctxt->wellFormed = 0;
3742 ctxt->disableSAX = 1;
3743 return;
3744 }
3745 SKIP_BLANKS;
3746 GROW;
3747 while (RAW != '>') {
3748 const xmlChar *check = CUR_PTR;
3749 int type;
3750 int def;
3751 xmlChar *defaultValue = NULL;
3752
3753 GROW;
3754 tree = NULL;
Daniel Veillard29631a82001-03-05 09:49:20 +00003755 attrName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003756 if (attrName == NULL) {
3757 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3759 ctxt->sax->error(ctxt->userData,
3760 "ATTLIST: no name for Attribute\n");
3761 ctxt->wellFormed = 0;
3762 ctxt->disableSAX = 1;
3763 break;
3764 }
3765 GROW;
3766 if (!IS_BLANK(CUR)) {
3767 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3769 ctxt->sax->error(ctxt->userData,
3770 "Space required after the attribute name\n");
3771 ctxt->wellFormed = 0;
3772 ctxt->disableSAX = 1;
3773 if (attrName != NULL)
3774 xmlFree(attrName);
3775 if (defaultValue != NULL)
3776 xmlFree(defaultValue);
3777 break;
3778 }
3779 SKIP_BLANKS;
3780
3781 type = xmlParseAttributeType(ctxt, &tree);
3782 if (type <= 0) {
3783 if (attrName != NULL)
3784 xmlFree(attrName);
3785 if (defaultValue != NULL)
3786 xmlFree(defaultValue);
3787 break;
3788 }
3789
3790 GROW;
3791 if (!IS_BLANK(CUR)) {
3792 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3794 ctxt->sax->error(ctxt->userData,
3795 "Space required after the attribute type\n");
3796 ctxt->wellFormed = 0;
3797 ctxt->disableSAX = 1;
3798 if (attrName != NULL)
3799 xmlFree(attrName);
3800 if (defaultValue != NULL)
3801 xmlFree(defaultValue);
3802 if (tree != NULL)
3803 xmlFreeEnumeration(tree);
3804 break;
3805 }
3806 SKIP_BLANKS;
3807
3808 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3809 if (def <= 0) {
3810 if (attrName != NULL)
3811 xmlFree(attrName);
3812 if (defaultValue != NULL)
3813 xmlFree(defaultValue);
3814 if (tree != NULL)
3815 xmlFreeEnumeration(tree);
3816 break;
3817 }
3818
3819 GROW;
3820 if (RAW != '>') {
3821 if (!IS_BLANK(CUR)) {
3822 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3824 ctxt->sax->error(ctxt->userData,
3825 "Space required after the attribute default value\n");
3826 ctxt->wellFormed = 0;
3827 ctxt->disableSAX = 1;
3828 if (attrName != NULL)
3829 xmlFree(attrName);
3830 if (defaultValue != NULL)
3831 xmlFree(defaultValue);
3832 if (tree != NULL)
3833 xmlFreeEnumeration(tree);
3834 break;
3835 }
3836 SKIP_BLANKS;
3837 }
3838 if (check == CUR_PTR) {
3839 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3840 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3841 ctxt->sax->error(ctxt->userData,
3842 "xmlParseAttributeListDecl: detected internal error\n");
3843 if (attrName != NULL)
3844 xmlFree(attrName);
3845 if (defaultValue != NULL)
3846 xmlFree(defaultValue);
3847 if (tree != NULL)
3848 xmlFreeEnumeration(tree);
3849 break;
3850 }
3851 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3852 (ctxt->sax->attributeDecl != NULL))
3853 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3854 type, def, defaultValue, tree);
3855 if (attrName != NULL)
3856 xmlFree(attrName);
3857 if (defaultValue != NULL)
3858 xmlFree(defaultValue);
3859 GROW;
3860 }
3861 if (RAW == '>') {
3862 if (input != ctxt->input) {
3863 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3865 ctxt->sax->error(ctxt->userData,
3866"Attribute list declaration doesn't start and stop in the same entity\n");
3867 ctxt->wellFormed = 0;
3868 ctxt->disableSAX = 1;
3869 }
3870 NEXT;
3871 }
3872
3873 xmlFree(elemName);
3874 }
3875}
3876
3877/**
3878 * xmlParseElementMixedContentDecl:
3879 * @ctxt: an XML parser context
3880 *
3881 * parse the declaration for a Mixed Element content
3882 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3883 *
3884 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3885 * '(' S? '#PCDATA' S? ')'
3886 *
3887 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3888 *
3889 * [ VC: No Duplicate Types ]
3890 * The same name must not appear more than once in a single
3891 * mixed-content declaration.
3892 *
3893 * returns: the list of the xmlElementContentPtr describing the element choices
3894 */
3895xmlElementContentPtr
3896xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3897 xmlElementContentPtr ret = NULL, cur = NULL, n;
3898 xmlChar *elem = NULL;
3899
3900 GROW;
3901 if ((RAW == '#') && (NXT(1) == 'P') &&
3902 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3903 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3904 (NXT(6) == 'A')) {
3905 SKIP(7);
3906 SKIP_BLANKS;
3907 SHRINK;
3908 if (RAW == ')') {
3909 ctxt->entity = ctxt->input;
3910 NEXT;
3911 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3912 if (RAW == '*') {
3913 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3914 NEXT;
3915 }
3916 return(ret);
3917 }
3918 if ((RAW == '(') || (RAW == '|')) {
3919 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3920 if (ret == NULL) return(NULL);
3921 }
3922 while (RAW == '|') {
3923 NEXT;
3924 if (elem == NULL) {
3925 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3926 if (ret == NULL) return(NULL);
3927 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003928 if (cur != NULL)
3929 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003930 cur = ret;
3931 } else {
3932 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3933 if (n == NULL) return(NULL);
3934 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003935 if (n->c1 != NULL)
3936 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00003937 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003938 if (n != NULL)
3939 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003940 cur = n;
3941 xmlFree(elem);
3942 }
3943 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003944 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003945 if (elem == NULL) {
3946 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3948 ctxt->sax->error(ctxt->userData,
3949 "xmlParseElementMixedContentDecl : Name expected\n");
3950 ctxt->wellFormed = 0;
3951 ctxt->disableSAX = 1;
3952 xmlFreeElementContent(cur);
3953 return(NULL);
3954 }
3955 SKIP_BLANKS;
3956 GROW;
3957 }
3958 if ((RAW == ')') && (NXT(1) == '*')) {
3959 if (elem != NULL) {
3960 cur->c2 = xmlNewElementContent(elem,
3961 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003962 if (cur->c2 != NULL)
3963 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003964 xmlFree(elem);
3965 }
3966 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3967 ctxt->entity = ctxt->input;
3968 SKIP(2);
3969 } else {
3970 if (elem != NULL) xmlFree(elem);
3971 xmlFreeElementContent(ret);
3972 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
3973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3974 ctxt->sax->error(ctxt->userData,
3975 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
3976 ctxt->wellFormed = 0;
3977 ctxt->disableSAX = 1;
3978 return(NULL);
3979 }
3980
3981 } else {
3982 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
3983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3984 ctxt->sax->error(ctxt->userData,
3985 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3986 ctxt->wellFormed = 0;
3987 ctxt->disableSAX = 1;
3988 }
3989 return(ret);
3990}
3991
3992/**
3993 * xmlParseElementChildrenContentDecl:
3994 * @ctxt: an XML parser context
3995 *
3996 * parse the declaration for a Mixed Element content
3997 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3998 *
3999 *
4000 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4001 *
4002 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4003 *
4004 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4005 *
4006 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4007 *
4008 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4009 * TODO Parameter-entity replacement text must be properly nested
4010 * with parenthetized groups. That is to say, if either of the
4011 * opening or closing parentheses in a choice, seq, or Mixed
4012 * construct is contained in the replacement text for a parameter
4013 * entity, both must be contained in the same replacement text. For
4014 * interoperability, if a parameter-entity reference appears in a
4015 * choice, seq, or Mixed construct, its replacement text should not
4016 * be empty, and neither the first nor last non-blank character of
4017 * the replacement text should be a connector (| or ,).
4018 *
4019 * returns: the tree of xmlElementContentPtr describing the element
4020 * hierarchy.
4021 */
4022xmlElementContentPtr
4023#ifdef VMS
4024xmlParseElementChildrenContentD
4025#else
4026xmlParseElementChildrenContentDecl
4027#endif
4028(xmlParserCtxtPtr ctxt) {
4029 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4030 xmlChar *elem;
4031 xmlChar type = 0;
4032
4033 SKIP_BLANKS;
4034 GROW;
4035 if (RAW == '(') {
4036 /* Recurse on first child */
4037 NEXT;
4038 SKIP_BLANKS;
4039 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4040 SKIP_BLANKS;
4041 GROW;
4042 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004043 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004044 if (elem == NULL) {
4045 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4047 ctxt->sax->error(ctxt->userData,
4048 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4049 ctxt->wellFormed = 0;
4050 ctxt->disableSAX = 1;
4051 return(NULL);
4052 }
4053 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4054 GROW;
4055 if (RAW == '?') {
4056 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4057 NEXT;
4058 } else if (RAW == '*') {
4059 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4060 NEXT;
4061 } else if (RAW == '+') {
4062 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4063 NEXT;
4064 } else {
4065 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4066 }
4067 xmlFree(elem);
4068 GROW;
4069 }
4070 SKIP_BLANKS;
4071 SHRINK;
4072 while (RAW != ')') {
4073 /*
4074 * Each loop we parse one separator and one element.
4075 */
4076 if (RAW == ',') {
4077 if (type == 0) type = CUR;
4078
4079 /*
4080 * Detect "Name | Name , Name" error
4081 */
4082 else if (type != CUR) {
4083 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4085 ctxt->sax->error(ctxt->userData,
4086 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4087 type);
4088 ctxt->wellFormed = 0;
4089 ctxt->disableSAX = 1;
4090 if ((op != NULL) && (op != ret))
4091 xmlFreeElementContent(op);
4092 if ((last != NULL) && (last != ret) &&
4093 (last != ret->c1) && (last != ret->c2))
4094 xmlFreeElementContent(last);
4095 if (ret != NULL)
4096 xmlFreeElementContent(ret);
4097 return(NULL);
4098 }
4099 NEXT;
4100
4101 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4102 if (op == NULL) {
4103 xmlFreeElementContent(ret);
4104 return(NULL);
4105 }
4106 if (last == NULL) {
4107 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004108 if (ret != NULL)
4109 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004110 ret = cur = op;
4111 } else {
4112 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004113 if (op != NULL)
4114 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004115 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004116 if (last != NULL)
4117 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004118 cur =op;
4119 last = NULL;
4120 }
4121 } else if (RAW == '|') {
4122 if (type == 0) type = CUR;
4123
4124 /*
4125 * Detect "Name , Name | Name" error
4126 */
4127 else if (type != CUR) {
4128 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4130 ctxt->sax->error(ctxt->userData,
4131 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4132 type);
4133 ctxt->wellFormed = 0;
4134 ctxt->disableSAX = 1;
4135 if ((op != NULL) && (op != ret) && (op != last))
4136 xmlFreeElementContent(op);
4137 if ((last != NULL) && (last != ret) &&
4138 (last != ret->c1) && (last != ret->c2))
4139 xmlFreeElementContent(last);
4140 if (ret != NULL)
4141 xmlFreeElementContent(ret);
4142 return(NULL);
4143 }
4144 NEXT;
4145
4146 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4147 if (op == NULL) {
4148 if ((op != NULL) && (op != ret))
4149 xmlFreeElementContent(op);
4150 if ((last != NULL) && (last != ret) &&
4151 (last != ret->c1) && (last != ret->c2))
4152 xmlFreeElementContent(last);
4153 if (ret != NULL)
4154 xmlFreeElementContent(ret);
4155 return(NULL);
4156 }
4157 if (last == NULL) {
4158 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004159 if (ret != NULL)
4160 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004161 ret = cur = op;
4162 } else {
4163 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004164 if (op != NULL)
4165 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004166 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004167 if (last != NULL)
4168 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004169 cur =op;
4170 last = NULL;
4171 }
4172 } else {
4173 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4175 ctxt->sax->error(ctxt->userData,
4176 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4177 ctxt->wellFormed = 0;
4178 ctxt->disableSAX = 1;
4179 if ((op != NULL) && (op != ret))
4180 xmlFreeElementContent(op);
4181 if ((last != NULL) && (last != ret) &&
4182 (last != ret->c1) && (last != ret->c2))
4183 xmlFreeElementContent(last);
4184 if (ret != NULL)
4185 xmlFreeElementContent(ret);
4186 return(NULL);
4187 }
4188 GROW;
4189 SKIP_BLANKS;
4190 GROW;
4191 if (RAW == '(') {
4192 /* Recurse on second child */
4193 NEXT;
4194 SKIP_BLANKS;
4195 last = xmlParseElementChildrenContentDecl(ctxt);
4196 SKIP_BLANKS;
4197 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004198 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004199 if (elem == NULL) {
4200 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4202 ctxt->sax->error(ctxt->userData,
4203 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4204 ctxt->wellFormed = 0;
4205 ctxt->disableSAX = 1;
4206 if ((op != NULL) && (op != ret))
4207 xmlFreeElementContent(op);
4208 if ((last != NULL) && (last != ret) &&
4209 (last != ret->c1) && (last != ret->c2))
4210 xmlFreeElementContent(last);
4211 if (ret != NULL)
4212 xmlFreeElementContent(ret);
4213 return(NULL);
4214 }
4215 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4216 xmlFree(elem);
4217 if (RAW == '?') {
4218 last->ocur = XML_ELEMENT_CONTENT_OPT;
4219 NEXT;
4220 } else if (RAW == '*') {
4221 last->ocur = XML_ELEMENT_CONTENT_MULT;
4222 NEXT;
4223 } else if (RAW == '+') {
4224 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4225 NEXT;
4226 } else {
4227 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4228 }
4229 }
4230 SKIP_BLANKS;
4231 GROW;
4232 }
4233 if ((cur != NULL) && (last != NULL)) {
4234 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004235 if (last != NULL)
4236 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004237 }
4238 ctxt->entity = ctxt->input;
4239 NEXT;
4240 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004241 if (ret != NULL)
4242 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004243 NEXT;
4244 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004245 if (ret != NULL)
4246 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004247 NEXT;
4248 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004249 if (ret != NULL)
4250 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004251 NEXT;
4252 }
4253 return(ret);
4254}
4255
4256/**
4257 * xmlParseElementContentDecl:
4258 * @ctxt: an XML parser context
4259 * @name: the name of the element being defined.
4260 * @result: the Element Content pointer will be stored here if any
4261 *
4262 * parse the declaration for an Element content either Mixed or Children,
4263 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4264 *
4265 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4266 *
4267 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4268 */
4269
4270int
4271xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4272 xmlElementContentPtr *result) {
4273
4274 xmlElementContentPtr tree = NULL;
4275 xmlParserInputPtr input = ctxt->input;
4276 int res;
4277
4278 *result = NULL;
4279
4280 if (RAW != '(') {
4281 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4282 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4283 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004284 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004285 ctxt->wellFormed = 0;
4286 ctxt->disableSAX = 1;
4287 return(-1);
4288 }
4289 NEXT;
4290 GROW;
4291 SKIP_BLANKS;
4292 if ((RAW == '#') && (NXT(1) == 'P') &&
4293 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4294 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4295 (NXT(6) == 'A')) {
4296 tree = xmlParseElementMixedContentDecl(ctxt);
4297 res = XML_ELEMENT_TYPE_MIXED;
4298 } else {
4299 tree = xmlParseElementChildrenContentDecl(ctxt);
4300 res = XML_ELEMENT_TYPE_ELEMENT;
4301 }
4302 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4303 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4305 ctxt->sax->error(ctxt->userData,
4306"Element content declaration doesn't start and stop in the same entity\n");
4307 ctxt->wellFormed = 0;
4308 ctxt->disableSAX = 1;
4309 }
4310 SKIP_BLANKS;
4311 *result = tree;
4312 return(res);
4313}
4314
4315/**
4316 * xmlParseElementDecl:
4317 * @ctxt: an XML parser context
4318 *
4319 * parse an Element declaration.
4320 *
4321 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4322 *
4323 * [ VC: Unique Element Type Declaration ]
4324 * No element type may be declared more than once
4325 *
4326 * Returns the type of the element, or -1 in case of error
4327 */
4328int
4329xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4330 xmlChar *name;
4331 int ret = -1;
4332 xmlElementContentPtr content = NULL;
4333
4334 GROW;
4335 if ((RAW == '<') && (NXT(1) == '!') &&
4336 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4337 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4338 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4339 (NXT(8) == 'T')) {
4340 xmlParserInputPtr input = ctxt->input;
4341
4342 SKIP(9);
4343 if (!IS_BLANK(CUR)) {
4344 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4345 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4346 ctxt->sax->error(ctxt->userData,
4347 "Space required after 'ELEMENT'\n");
4348 ctxt->wellFormed = 0;
4349 ctxt->disableSAX = 1;
4350 }
4351 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00004352 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004353 if (name == NULL) {
4354 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4356 ctxt->sax->error(ctxt->userData,
4357 "xmlParseElementDecl: no name for Element\n");
4358 ctxt->wellFormed = 0;
4359 ctxt->disableSAX = 1;
4360 return(-1);
4361 }
4362 while ((RAW == 0) && (ctxt->inputNr > 1))
4363 xmlPopInput(ctxt);
4364 if (!IS_BLANK(CUR)) {
4365 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4367 ctxt->sax->error(ctxt->userData,
4368 "Space required after the element name\n");
4369 ctxt->wellFormed = 0;
4370 ctxt->disableSAX = 1;
4371 }
4372 SKIP_BLANKS;
4373 if ((RAW == 'E') && (NXT(1) == 'M') &&
4374 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4375 (NXT(4) == 'Y')) {
4376 SKIP(5);
4377 /*
4378 * Element must always be empty.
4379 */
4380 ret = XML_ELEMENT_TYPE_EMPTY;
4381 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4382 (NXT(2) == 'Y')) {
4383 SKIP(3);
4384 /*
4385 * Element is a generic container.
4386 */
4387 ret = XML_ELEMENT_TYPE_ANY;
4388 } else if (RAW == '(') {
4389 ret = xmlParseElementContentDecl(ctxt, name, &content);
4390 } else {
4391 /*
4392 * [ WFC: PEs in Internal Subset ] error handling.
4393 */
4394 if ((RAW == '%') && (ctxt->external == 0) &&
4395 (ctxt->inputNr == 1)) {
4396 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4398 ctxt->sax->error(ctxt->userData,
4399 "PEReference: forbidden within markup decl in internal subset\n");
4400 } else {
4401 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4403 ctxt->sax->error(ctxt->userData,
4404 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4405 }
4406 ctxt->wellFormed = 0;
4407 ctxt->disableSAX = 1;
4408 if (name != NULL) xmlFree(name);
4409 return(-1);
4410 }
4411
4412 SKIP_BLANKS;
4413 /*
4414 * Pop-up of finished entities.
4415 */
4416 while ((RAW == 0) && (ctxt->inputNr > 1))
4417 xmlPopInput(ctxt);
4418 SKIP_BLANKS;
4419
4420 if (RAW != '>') {
4421 ctxt->errNo = XML_ERR_GT_REQUIRED;
4422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4423 ctxt->sax->error(ctxt->userData,
4424 "xmlParseElementDecl: expected '>' at the end\n");
4425 ctxt->wellFormed = 0;
4426 ctxt->disableSAX = 1;
4427 } else {
4428 if (input != ctxt->input) {
4429 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4431 ctxt->sax->error(ctxt->userData,
4432"Element declaration doesn't start and stop in the same entity\n");
4433 ctxt->wellFormed = 0;
4434 ctxt->disableSAX = 1;
4435 }
4436
4437 NEXT;
4438 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4439 (ctxt->sax->elementDecl != NULL))
4440 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4441 content);
4442 }
4443 if (content != NULL) {
4444 xmlFreeElementContent(content);
4445 }
4446 if (name != NULL) {
4447 xmlFree(name);
4448 }
4449 }
4450 return(ret);
4451}
4452
4453/**
4454 * xmlParseMarkupDecl:
4455 * @ctxt: an XML parser context
4456 *
4457 * parse Markup declarations
4458 *
4459 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4460 * NotationDecl | PI | Comment
4461 *
4462 * [ VC: Proper Declaration/PE Nesting ]
4463 * Parameter-entity replacement text must be properly nested with
4464 * markup declarations. That is to say, if either the first character
4465 * or the last character of a markup declaration (markupdecl above) is
4466 * contained in the replacement text for a parameter-entity reference,
4467 * both must be contained in the same replacement text.
4468 *
4469 * [ WFC: PEs in Internal Subset ]
4470 * In the internal DTD subset, parameter-entity references can occur
4471 * only where markup declarations can occur, not within markup declarations.
4472 * (This does not apply to references that occur in external parameter
4473 * entities or to the external subset.)
4474 */
4475void
4476xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4477 GROW;
4478 xmlParseElementDecl(ctxt);
4479 xmlParseAttributeListDecl(ctxt);
4480 xmlParseEntityDecl(ctxt);
4481 xmlParseNotationDecl(ctxt);
4482 xmlParsePI(ctxt);
4483 xmlParseComment(ctxt);
4484 /*
4485 * This is only for internal subset. On external entities,
4486 * the replacement is done before parsing stage
4487 */
4488 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4489 xmlParsePEReference(ctxt);
4490 ctxt->instate = XML_PARSER_DTD;
4491}
4492
4493/**
4494 * xmlParseTextDecl:
4495 * @ctxt: an XML parser context
4496 *
4497 * parse an XML declaration header for external entities
4498 *
4499 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4500 *
4501 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4502 */
4503
4504void
4505xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4506 xmlChar *version;
4507
4508 /*
4509 * We know that '<?xml' is here.
4510 */
4511 if ((RAW == '<') && (NXT(1) == '?') &&
4512 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4513 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4514 SKIP(5);
4515 } else {
4516 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4518 ctxt->sax->error(ctxt->userData,
4519 "Text declaration '<?xml' required\n");
4520 ctxt->wellFormed = 0;
4521 ctxt->disableSAX = 1;
4522
4523 return;
4524 }
4525
4526 if (!IS_BLANK(CUR)) {
4527 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4529 ctxt->sax->error(ctxt->userData,
4530 "Space needed after '<?xml'\n");
4531 ctxt->wellFormed = 0;
4532 ctxt->disableSAX = 1;
4533 }
4534 SKIP_BLANKS;
4535
4536 /*
4537 * We may have the VersionInfo here.
4538 */
4539 version = xmlParseVersionInfo(ctxt);
4540 if (version == NULL)
4541 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4542 ctxt->input->version = version;
4543
4544 /*
4545 * We must have the encoding declaration
4546 */
4547 if (!IS_BLANK(CUR)) {
4548 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4550 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4551 ctxt->wellFormed = 0;
4552 ctxt->disableSAX = 1;
4553 }
4554 xmlParseEncodingDecl(ctxt);
4555 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4556 /*
4557 * The XML REC instructs us to stop parsing right here
4558 */
4559 return;
4560 }
4561
4562 SKIP_BLANKS;
4563 if ((RAW == '?') && (NXT(1) == '>')) {
4564 SKIP(2);
4565 } else if (RAW == '>') {
4566 /* Deprecated old WD ... */
4567 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4569 ctxt->sax->error(ctxt->userData,
4570 "XML declaration must end-up with '?>'\n");
4571 ctxt->wellFormed = 0;
4572 ctxt->disableSAX = 1;
4573 NEXT;
4574 } else {
4575 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4577 ctxt->sax->error(ctxt->userData,
4578 "parsing XML declaration: '?>' expected\n");
4579 ctxt->wellFormed = 0;
4580 ctxt->disableSAX = 1;
4581 MOVETO_ENDTAG(CUR_PTR);
4582 NEXT;
4583 }
4584}
4585
4586/*
4587 * xmlParseConditionalSections
4588 * @ctxt: an XML parser context
4589 *
4590 * [61] conditionalSect ::= includeSect | ignoreSect
4591 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4592 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4593 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4594 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4595 */
4596
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004597static void
Owen Taylor3473f882001-02-23 17:55:21 +00004598xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4599 SKIP(3);
4600 SKIP_BLANKS;
4601 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4602 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4603 (NXT(6) == 'E')) {
4604 SKIP(7);
4605 SKIP_BLANKS;
4606 if (RAW != '[') {
4607 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4609 ctxt->sax->error(ctxt->userData,
4610 "XML conditional section '[' expected\n");
4611 ctxt->wellFormed = 0;
4612 ctxt->disableSAX = 1;
4613 } else {
4614 NEXT;
4615 }
4616 if (xmlParserDebugEntities) {
4617 if ((ctxt->input != NULL) && (ctxt->input->filename))
4618 xmlGenericError(xmlGenericErrorContext,
4619 "%s(%d): ", ctxt->input->filename,
4620 ctxt->input->line);
4621 xmlGenericError(xmlGenericErrorContext,
4622 "Entering INCLUDE Conditional Section\n");
4623 }
4624
4625 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4626 (NXT(2) != '>'))) {
4627 const xmlChar *check = CUR_PTR;
4628 int cons = ctxt->input->consumed;
4629 int tok = ctxt->token;
4630
4631 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4632 xmlParseConditionalSections(ctxt);
4633 } else if (IS_BLANK(CUR)) {
4634 NEXT;
4635 } else if (RAW == '%') {
4636 xmlParsePEReference(ctxt);
4637 } else
4638 xmlParseMarkupDecl(ctxt);
4639
4640 /*
4641 * Pop-up of finished entities.
4642 */
4643 while ((RAW == 0) && (ctxt->inputNr > 1))
4644 xmlPopInput(ctxt);
4645
4646 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4647 (tok == ctxt->token)) {
4648 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4650 ctxt->sax->error(ctxt->userData,
4651 "Content error in the external subset\n");
4652 ctxt->wellFormed = 0;
4653 ctxt->disableSAX = 1;
4654 break;
4655 }
4656 }
4657 if (xmlParserDebugEntities) {
4658 if ((ctxt->input != NULL) && (ctxt->input->filename))
4659 xmlGenericError(xmlGenericErrorContext,
4660 "%s(%d): ", ctxt->input->filename,
4661 ctxt->input->line);
4662 xmlGenericError(xmlGenericErrorContext,
4663 "Leaving INCLUDE Conditional Section\n");
4664 }
4665
4666 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4667 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4668 int state;
4669 int instate;
4670 int depth = 0;
4671
4672 SKIP(6);
4673 SKIP_BLANKS;
4674 if (RAW != '[') {
4675 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4677 ctxt->sax->error(ctxt->userData,
4678 "XML conditional section '[' expected\n");
4679 ctxt->wellFormed = 0;
4680 ctxt->disableSAX = 1;
4681 } else {
4682 NEXT;
4683 }
4684 if (xmlParserDebugEntities) {
4685 if ((ctxt->input != NULL) && (ctxt->input->filename))
4686 xmlGenericError(xmlGenericErrorContext,
4687 "%s(%d): ", ctxt->input->filename,
4688 ctxt->input->line);
4689 xmlGenericError(xmlGenericErrorContext,
4690 "Entering IGNORE Conditional Section\n");
4691 }
4692
4693 /*
4694 * Parse up to the end of the conditionnal section
4695 * But disable SAX event generating DTD building in the meantime
4696 */
4697 state = ctxt->disableSAX;
4698 instate = ctxt->instate;
4699 ctxt->disableSAX = 1;
4700 ctxt->instate = XML_PARSER_IGNORE;
4701
4702 while (depth >= 0) {
4703 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4704 depth++;
4705 SKIP(3);
4706 continue;
4707 }
4708 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4709 if (--depth >= 0) SKIP(3);
4710 continue;
4711 }
4712 NEXT;
4713 continue;
4714 }
4715
4716 ctxt->disableSAX = state;
4717 ctxt->instate = instate;
4718
4719 if (xmlParserDebugEntities) {
4720 if ((ctxt->input != NULL) && (ctxt->input->filename))
4721 xmlGenericError(xmlGenericErrorContext,
4722 "%s(%d): ", ctxt->input->filename,
4723 ctxt->input->line);
4724 xmlGenericError(xmlGenericErrorContext,
4725 "Leaving IGNORE Conditional Section\n");
4726 }
4727
4728 } else {
4729 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4731 ctxt->sax->error(ctxt->userData,
4732 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4733 ctxt->wellFormed = 0;
4734 ctxt->disableSAX = 1;
4735 }
4736
4737 if (RAW == 0)
4738 SHRINK;
4739
4740 if (RAW == 0) {
4741 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4743 ctxt->sax->error(ctxt->userData,
4744 "XML conditional section not closed\n");
4745 ctxt->wellFormed = 0;
4746 ctxt->disableSAX = 1;
4747 } else {
4748 SKIP(3);
4749 }
4750}
4751
4752/**
4753 * xmlParseExternalSubset:
4754 * @ctxt: an XML parser context
4755 * @ExternalID: the external identifier
4756 * @SystemID: the system identifier (or URL)
4757 *
4758 * parse Markup declarations from an external subset
4759 *
4760 * [30] extSubset ::= textDecl? extSubsetDecl
4761 *
4762 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4763 */
4764void
4765xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4766 const xmlChar *SystemID) {
4767 GROW;
4768 if ((RAW == '<') && (NXT(1) == '?') &&
4769 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4770 (NXT(4) == 'l')) {
4771 xmlParseTextDecl(ctxt);
4772 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4773 /*
4774 * The XML REC instructs us to stop parsing right here
4775 */
4776 ctxt->instate = XML_PARSER_EOF;
4777 return;
4778 }
4779 }
4780 if (ctxt->myDoc == NULL) {
4781 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4782 }
4783 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4784 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4785
4786 ctxt->instate = XML_PARSER_DTD;
4787 ctxt->external = 1;
4788 while (((RAW == '<') && (NXT(1) == '?')) ||
4789 ((RAW == '<') && (NXT(1) == '!')) ||
4790 IS_BLANK(CUR)) {
4791 const xmlChar *check = CUR_PTR;
4792 int cons = ctxt->input->consumed;
4793 int tok = ctxt->token;
4794
4795 GROW;
4796 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4797 xmlParseConditionalSections(ctxt);
4798 } else if (IS_BLANK(CUR)) {
4799 NEXT;
4800 } else if (RAW == '%') {
4801 xmlParsePEReference(ctxt);
4802 } else
4803 xmlParseMarkupDecl(ctxt);
4804
4805 /*
4806 * Pop-up of finished entities.
4807 */
4808 while ((RAW == 0) && (ctxt->inputNr > 1))
4809 xmlPopInput(ctxt);
4810
4811 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4812 (tok == ctxt->token)) {
4813 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4815 ctxt->sax->error(ctxt->userData,
4816 "Content error in the external subset\n");
4817 ctxt->wellFormed = 0;
4818 ctxt->disableSAX = 1;
4819 break;
4820 }
4821 }
4822
4823 if (RAW != 0) {
4824 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4826 ctxt->sax->error(ctxt->userData,
4827 "Extra content at the end of the document\n");
4828 ctxt->wellFormed = 0;
4829 ctxt->disableSAX = 1;
4830 }
4831
4832}
4833
4834/**
4835 * xmlParseReference:
4836 * @ctxt: an XML parser context
4837 *
4838 * parse and handle entity references in content, depending on the SAX
4839 * interface, this may end-up in a call to character() if this is a
4840 * CharRef, a predefined entity, if there is no reference() callback.
4841 * or if the parser was asked to switch to that mode.
4842 *
4843 * [67] Reference ::= EntityRef | CharRef
4844 */
4845void
4846xmlParseReference(xmlParserCtxtPtr ctxt) {
4847 xmlEntityPtr ent;
4848 xmlChar *val;
4849 if (RAW != '&') return;
4850
4851 if (NXT(1) == '#') {
4852 int i = 0;
4853 xmlChar out[10];
4854 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004855 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004856
4857 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4858 /*
4859 * So we are using non-UTF-8 buffers
4860 * Check that the char fit on 8bits, if not
4861 * generate a CharRef.
4862 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004863 if (value <= 0xFF) {
4864 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004865 out[1] = 0;
4866 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4867 (!ctxt->disableSAX))
4868 ctxt->sax->characters(ctxt->userData, out, 1);
4869 } else {
4870 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004871 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004872 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004873 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004874 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4875 (!ctxt->disableSAX))
4876 ctxt->sax->reference(ctxt->userData, out);
4877 }
4878 } else {
4879 /*
4880 * Just encode the value in UTF-8
4881 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004882 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004883 out[i] = 0;
4884 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4885 (!ctxt->disableSAX))
4886 ctxt->sax->characters(ctxt->userData, out, i);
4887 }
4888 } else {
4889 ent = xmlParseEntityRef(ctxt);
4890 if (ent == NULL) return;
4891 if ((ent->name != NULL) &&
4892 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4893 xmlNodePtr list = NULL;
4894 int ret;
4895
4896
4897 /*
4898 * The first reference to the entity trigger a parsing phase
4899 * where the ent->children is filled with the result from
4900 * the parsing.
4901 */
4902 if (ent->children == NULL) {
4903 xmlChar *value;
4904 value = ent->content;
4905
4906 /*
4907 * Check that this entity is well formed
4908 */
4909 if ((value != NULL) &&
4910 (value[1] == 0) && (value[0] == '<') &&
4911 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4912 /*
4913 * DONE: get definite answer on this !!!
4914 * Lots of entity decls are used to declare a single
4915 * char
4916 * <!ENTITY lt "<">
4917 * Which seems to be valid since
4918 * 2.4: The ampersand character (&) and the left angle
4919 * bracket (<) may appear in their literal form only
4920 * when used ... They are also legal within the literal
4921 * entity value of an internal entity declaration;i
4922 * see "4.3.2 Well-Formed Parsed Entities".
4923 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4924 * Looking at the OASIS test suite and James Clark
4925 * tests, this is broken. However the XML REC uses
4926 * it. Is the XML REC not well-formed ????
4927 * This is a hack to avoid this problem
4928 *
4929 * ANSWER: since lt gt amp .. are already defined,
4930 * this is a redefinition and hence the fact that the
4931 * contentis not well balanced is not a Wf error, this
4932 * is lousy but acceptable.
4933 */
4934 list = xmlNewDocText(ctxt->myDoc, value);
4935 if (list != NULL) {
4936 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4937 (ent->children == NULL)) {
4938 ent->children = list;
4939 ent->last = list;
4940 list->parent = (xmlNodePtr) ent;
4941 } else {
4942 xmlFreeNodeList(list);
4943 }
4944 } else if (list != NULL) {
4945 xmlFreeNodeList(list);
4946 }
4947 } else {
4948 /*
4949 * 4.3.2: An internal general parsed entity is well-formed
4950 * if its replacement text matches the production labeled
4951 * content.
4952 */
4953 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4954 ctxt->depth++;
4955 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4956 ctxt->sax, NULL, ctxt->depth,
4957 value, &list);
4958 ctxt->depth--;
4959 } else if (ent->etype ==
4960 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4961 ctxt->depth++;
4962 ret = xmlParseExternalEntity(ctxt->myDoc,
4963 ctxt->sax, NULL, ctxt->depth,
4964 ent->URI, ent->ExternalID, &list);
4965 ctxt->depth--;
4966 } else {
4967 ret = -1;
4968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4969 ctxt->sax->error(ctxt->userData,
4970 "Internal: invalid entity type\n");
4971 }
4972 if (ret == XML_ERR_ENTITY_LOOP) {
4973 ctxt->errNo = XML_ERR_ENTITY_LOOP;
4974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4975 ctxt->sax->error(ctxt->userData,
4976 "Detected entity reference loop\n");
4977 ctxt->wellFormed = 0;
4978 ctxt->disableSAX = 1;
4979 } else if ((ret == 0) && (list != NULL)) {
4980 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4981 (ent->children == NULL)) {
4982 ent->children = list;
4983 while (list != NULL) {
4984 list->parent = (xmlNodePtr) ent;
4985 if (list->next == NULL)
4986 ent->last = list;
4987 list = list->next;
4988 }
4989 } else {
4990 xmlFreeNodeList(list);
4991 }
4992 } else if (ret > 0) {
4993 ctxt->errNo = ret;
4994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4995 ctxt->sax->error(ctxt->userData,
4996 "Entity value required\n");
4997 ctxt->wellFormed = 0;
4998 ctxt->disableSAX = 1;
4999 } else if (list != NULL) {
5000 xmlFreeNodeList(list);
5001 }
5002 }
5003 }
5004 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5005 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5006 /*
5007 * Create a node.
5008 */
5009 ctxt->sax->reference(ctxt->userData, ent->name);
5010 return;
5011 } else if (ctxt->replaceEntities) {
5012 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5013 /*
5014 * Seems we are generating the DOM content, do
5015 * a simple tree copy
5016 */
5017 xmlNodePtr new;
5018 new = xmlCopyNodeList(ent->children);
5019
5020 xmlAddChildList(ctxt->node, new);
5021 /*
5022 * This is to avoid a nasty side effect, see
5023 * characters() in SAX.c
5024 */
5025 ctxt->nodemem = 0;
5026 ctxt->nodelen = 0;
5027 return;
5028 } else {
5029 /*
5030 * Probably running in SAX mode
5031 */
5032 xmlParserInputPtr input;
5033
5034 input = xmlNewEntityInputStream(ctxt, ent);
5035 xmlPushInput(ctxt, input);
5036 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5037 (RAW == '<') && (NXT(1) == '?') &&
5038 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5039 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5040 xmlParseTextDecl(ctxt);
5041 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5042 /*
5043 * The XML REC instructs us to stop parsing right here
5044 */
5045 ctxt->instate = XML_PARSER_EOF;
5046 return;
5047 }
5048 if (input->standalone == 1) {
5049 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5051 ctxt->sax->error(ctxt->userData,
5052 "external parsed entities cannot be standalone\n");
5053 ctxt->wellFormed = 0;
5054 ctxt->disableSAX = 1;
5055 }
5056 }
5057 return;
5058 }
5059 }
5060 } else {
5061 val = ent->content;
5062 if (val == NULL) return;
5063 /*
5064 * inline the entity.
5065 */
5066 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5067 (!ctxt->disableSAX))
5068 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5069 }
5070 }
5071}
5072
5073/**
5074 * xmlParseEntityRef:
5075 * @ctxt: an XML parser context
5076 *
5077 * parse ENTITY references declarations
5078 *
5079 * [68] EntityRef ::= '&' Name ';'
5080 *
5081 * [ WFC: Entity Declared ]
5082 * In a document without any DTD, a document with only an internal DTD
5083 * subset which contains no parameter entity references, or a document
5084 * with "standalone='yes'", the Name given in the entity reference
5085 * must match that in an entity declaration, except that well-formed
5086 * documents need not declare any of the following entities: amp, lt,
5087 * gt, apos, quot. The declaration of a parameter entity must precede
5088 * any reference to it. Similarly, the declaration of a general entity
5089 * must precede any reference to it which appears in a default value in an
5090 * attribute-list declaration. Note that if entities are declared in the
5091 * external subset or in external parameter entities, a non-validating
5092 * processor is not obligated to read and process their declarations;
5093 * for such documents, the rule that an entity must be declared is a
5094 * well-formedness constraint only if standalone='yes'.
5095 *
5096 * [ WFC: Parsed Entity ]
5097 * An entity reference must not contain the name of an unparsed entity
5098 *
5099 * Returns the xmlEntityPtr if found, or NULL otherwise.
5100 */
5101xmlEntityPtr
5102xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5103 xmlChar *name;
5104 xmlEntityPtr ent = NULL;
5105
5106 GROW;
5107
5108 if (RAW == '&') {
5109 NEXT;
5110 name = xmlParseName(ctxt);
5111 if (name == NULL) {
5112 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5114 ctxt->sax->error(ctxt->userData,
5115 "xmlParseEntityRef: no name\n");
5116 ctxt->wellFormed = 0;
5117 ctxt->disableSAX = 1;
5118 } else {
5119 if (RAW == ';') {
5120 NEXT;
5121 /*
5122 * Ask first SAX for entity resolution, otherwise try the
5123 * predefined set.
5124 */
5125 if (ctxt->sax != NULL) {
5126 if (ctxt->sax->getEntity != NULL)
5127 ent = ctxt->sax->getEntity(ctxt->userData, name);
5128 if (ent == NULL)
5129 ent = xmlGetPredefinedEntity(name);
5130 }
5131 /*
5132 * [ WFC: Entity Declared ]
5133 * In a document without any DTD, a document with only an
5134 * internal DTD subset which contains no parameter entity
5135 * references, or a document with "standalone='yes'", the
5136 * Name given in the entity reference must match that in an
5137 * entity declaration, except that well-formed documents
5138 * need not declare any of the following entities: amp, lt,
5139 * gt, apos, quot.
5140 * The declaration of a parameter entity must precede any
5141 * reference to it.
5142 * Similarly, the declaration of a general entity must
5143 * precede any reference to it which appears in a default
5144 * value in an attribute-list declaration. Note that if
5145 * entities are declared in the external subset or in
5146 * external parameter entities, a non-validating processor
5147 * is not obligated to read and process their declarations;
5148 * for such documents, the rule that an entity must be
5149 * declared is a well-formedness constraint only if
5150 * standalone='yes'.
5151 */
5152 if (ent == NULL) {
5153 if ((ctxt->standalone == 1) ||
5154 ((ctxt->hasExternalSubset == 0) &&
5155 (ctxt->hasPErefs == 0))) {
5156 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5158 ctxt->sax->error(ctxt->userData,
5159 "Entity '%s' not defined\n", name);
5160 ctxt->wellFormed = 0;
5161 ctxt->disableSAX = 1;
5162 } else {
5163 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5164 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5165 ctxt->sax->warning(ctxt->userData,
5166 "Entity '%s' not defined\n", name);
5167 }
5168 }
5169
5170 /*
5171 * [ WFC: Parsed Entity ]
5172 * An entity reference must not contain the name of an
5173 * unparsed entity
5174 */
5175 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5176 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5178 ctxt->sax->error(ctxt->userData,
5179 "Entity reference to unparsed entity %s\n", name);
5180 ctxt->wellFormed = 0;
5181 ctxt->disableSAX = 1;
5182 }
5183
5184 /*
5185 * [ WFC: No External Entity References ]
5186 * Attribute values cannot contain direct or indirect
5187 * entity references to external entities.
5188 */
5189 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5190 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5191 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5193 ctxt->sax->error(ctxt->userData,
5194 "Attribute references external entity '%s'\n", name);
5195 ctxt->wellFormed = 0;
5196 ctxt->disableSAX = 1;
5197 }
5198 /*
5199 * [ WFC: No < in Attribute Values ]
5200 * The replacement text of any entity referred to directly or
5201 * indirectly in an attribute value (other than "&lt;") must
5202 * not contain a <.
5203 */
5204 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5205 (ent != NULL) &&
5206 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5207 (ent->content != NULL) &&
5208 (xmlStrchr(ent->content, '<'))) {
5209 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5210 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5211 ctxt->sax->error(ctxt->userData,
5212 "'<' in entity '%s' is not allowed in attributes values\n", name);
5213 ctxt->wellFormed = 0;
5214 ctxt->disableSAX = 1;
5215 }
5216
5217 /*
5218 * Internal check, no parameter entities here ...
5219 */
5220 else {
5221 switch (ent->etype) {
5222 case XML_INTERNAL_PARAMETER_ENTITY:
5223 case XML_EXTERNAL_PARAMETER_ENTITY:
5224 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5226 ctxt->sax->error(ctxt->userData,
5227 "Attempt to reference the parameter entity '%s'\n", name);
5228 ctxt->wellFormed = 0;
5229 ctxt->disableSAX = 1;
5230 break;
5231 default:
5232 break;
5233 }
5234 }
5235
5236 /*
5237 * [ WFC: No Recursion ]
5238 * A parsed entity must not contain a recursive reference
5239 * to itself, either directly or indirectly.
5240 * Done somewhere else
5241 */
5242
5243 } else {
5244 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5246 ctxt->sax->error(ctxt->userData,
5247 "xmlParseEntityRef: expecting ';'\n");
5248 ctxt->wellFormed = 0;
5249 ctxt->disableSAX = 1;
5250 }
5251 xmlFree(name);
5252 }
5253 }
5254 return(ent);
5255}
5256
5257/**
5258 * xmlParseStringEntityRef:
5259 * @ctxt: an XML parser context
5260 * @str: a pointer to an index in the string
5261 *
5262 * parse ENTITY references declarations, but this version parses it from
5263 * a string value.
5264 *
5265 * [68] EntityRef ::= '&' Name ';'
5266 *
5267 * [ WFC: Entity Declared ]
5268 * In a document without any DTD, a document with only an internal DTD
5269 * subset which contains no parameter entity references, or a document
5270 * with "standalone='yes'", the Name given in the entity reference
5271 * must match that in an entity declaration, except that well-formed
5272 * documents need not declare any of the following entities: amp, lt,
5273 * gt, apos, quot. The declaration of a parameter entity must precede
5274 * any reference to it. Similarly, the declaration of a general entity
5275 * must precede any reference to it which appears in a default value in an
5276 * attribute-list declaration. Note that if entities are declared in the
5277 * external subset or in external parameter entities, a non-validating
5278 * processor is not obligated to read and process their declarations;
5279 * for such documents, the rule that an entity must be declared is a
5280 * well-formedness constraint only if standalone='yes'.
5281 *
5282 * [ WFC: Parsed Entity ]
5283 * An entity reference must not contain the name of an unparsed entity
5284 *
5285 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5286 * is updated to the current location in the string.
5287 */
5288xmlEntityPtr
5289xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5290 xmlChar *name;
5291 const xmlChar *ptr;
5292 xmlChar cur;
5293 xmlEntityPtr ent = NULL;
5294
5295 if ((str == NULL) || (*str == NULL))
5296 return(NULL);
5297 ptr = *str;
5298 cur = *ptr;
5299 if (cur == '&') {
5300 ptr++;
5301 cur = *ptr;
5302 name = xmlParseStringName(ctxt, &ptr);
5303 if (name == NULL) {
5304 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5306 ctxt->sax->error(ctxt->userData,
5307 "xmlParseEntityRef: no name\n");
5308 ctxt->wellFormed = 0;
5309 ctxt->disableSAX = 1;
5310 } else {
5311 if (*ptr == ';') {
5312 ptr++;
5313 /*
5314 * Ask first SAX for entity resolution, otherwise try the
5315 * predefined set.
5316 */
5317 if (ctxt->sax != NULL) {
5318 if (ctxt->sax->getEntity != NULL)
5319 ent = ctxt->sax->getEntity(ctxt->userData, name);
5320 if (ent == NULL)
5321 ent = xmlGetPredefinedEntity(name);
5322 }
5323 /*
5324 * [ WFC: Entity Declared ]
5325 * In a document without any DTD, a document with only an
5326 * internal DTD subset which contains no parameter entity
5327 * references, or a document with "standalone='yes'", the
5328 * Name given in the entity reference must match that in an
5329 * entity declaration, except that well-formed documents
5330 * need not declare any of the following entities: amp, lt,
5331 * gt, apos, quot.
5332 * The declaration of a parameter entity must precede any
5333 * reference to it.
5334 * Similarly, the declaration of a general entity must
5335 * precede any reference to it which appears in a default
5336 * value in an attribute-list declaration. Note that if
5337 * entities are declared in the external subset or in
5338 * external parameter entities, a non-validating processor
5339 * is not obligated to read and process their declarations;
5340 * for such documents, the rule that an entity must be
5341 * declared is a well-formedness constraint only if
5342 * standalone='yes'.
5343 */
5344 if (ent == NULL) {
5345 if ((ctxt->standalone == 1) ||
5346 ((ctxt->hasExternalSubset == 0) &&
5347 (ctxt->hasPErefs == 0))) {
5348 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5350 ctxt->sax->error(ctxt->userData,
5351 "Entity '%s' not defined\n", name);
5352 ctxt->wellFormed = 0;
5353 ctxt->disableSAX = 1;
5354 } else {
5355 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5356 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5357 ctxt->sax->warning(ctxt->userData,
5358 "Entity '%s' not defined\n", name);
5359 }
5360 }
5361
5362 /*
5363 * [ WFC: Parsed Entity ]
5364 * An entity reference must not contain the name of an
5365 * unparsed entity
5366 */
5367 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5368 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5370 ctxt->sax->error(ctxt->userData,
5371 "Entity reference to unparsed entity %s\n", name);
5372 ctxt->wellFormed = 0;
5373 ctxt->disableSAX = 1;
5374 }
5375
5376 /*
5377 * [ WFC: No External Entity References ]
5378 * Attribute values cannot contain direct or indirect
5379 * entity references to external entities.
5380 */
5381 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5382 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5383 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5385 ctxt->sax->error(ctxt->userData,
5386 "Attribute references external entity '%s'\n", name);
5387 ctxt->wellFormed = 0;
5388 ctxt->disableSAX = 1;
5389 }
5390 /*
5391 * [ WFC: No < in Attribute Values ]
5392 * The replacement text of any entity referred to directly or
5393 * indirectly in an attribute value (other than "&lt;") must
5394 * not contain a <.
5395 */
5396 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5397 (ent != NULL) &&
5398 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5399 (ent->content != NULL) &&
5400 (xmlStrchr(ent->content, '<'))) {
5401 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5403 ctxt->sax->error(ctxt->userData,
5404 "'<' in entity '%s' is not allowed in attributes values\n", name);
5405 ctxt->wellFormed = 0;
5406 ctxt->disableSAX = 1;
5407 }
5408
5409 /*
5410 * Internal check, no parameter entities here ...
5411 */
5412 else {
5413 switch (ent->etype) {
5414 case XML_INTERNAL_PARAMETER_ENTITY:
5415 case XML_EXTERNAL_PARAMETER_ENTITY:
5416 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5417 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5418 ctxt->sax->error(ctxt->userData,
5419 "Attempt to reference the parameter entity '%s'\n", name);
5420 ctxt->wellFormed = 0;
5421 ctxt->disableSAX = 1;
5422 break;
5423 default:
5424 break;
5425 }
5426 }
5427
5428 /*
5429 * [ WFC: No Recursion ]
5430 * A parsed entity must not contain a recursive reference
5431 * to itself, either directly or indirectly.
5432 * Done somewhwere else
5433 */
5434
5435 } else {
5436 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5437 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5438 ctxt->sax->error(ctxt->userData,
5439 "xmlParseEntityRef: expecting ';'\n");
5440 ctxt->wellFormed = 0;
5441 ctxt->disableSAX = 1;
5442 }
5443 xmlFree(name);
5444 }
5445 }
5446 *str = ptr;
5447 return(ent);
5448}
5449
5450/**
5451 * xmlParsePEReference:
5452 * @ctxt: an XML parser context
5453 *
5454 * parse PEReference declarations
5455 * The entity content is handled directly by pushing it's content as
5456 * a new input stream.
5457 *
5458 * [69] PEReference ::= '%' Name ';'
5459 *
5460 * [ WFC: No Recursion ]
5461 * A parsed entity must not contain a recursive
5462 * reference to itself, either directly or indirectly.
5463 *
5464 * [ WFC: Entity Declared ]
5465 * In a document without any DTD, a document with only an internal DTD
5466 * subset which contains no parameter entity references, or a document
5467 * with "standalone='yes'", ... ... The declaration of a parameter
5468 * entity must precede any reference to it...
5469 *
5470 * [ VC: Entity Declared ]
5471 * In a document with an external subset or external parameter entities
5472 * with "standalone='no'", ... ... The declaration of a parameter entity
5473 * must precede any reference to it...
5474 *
5475 * [ WFC: In DTD ]
5476 * Parameter-entity references may only appear in the DTD.
5477 * NOTE: misleading but this is handled.
5478 */
5479void
5480xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5481 xmlChar *name;
5482 xmlEntityPtr entity = NULL;
5483 xmlParserInputPtr input;
5484
5485 if (RAW == '%') {
5486 NEXT;
Daniel Veillard29631a82001-03-05 09:49:20 +00005487 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005488 if (name == NULL) {
5489 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5491 ctxt->sax->error(ctxt->userData,
5492 "xmlParsePEReference: no name\n");
5493 ctxt->wellFormed = 0;
5494 ctxt->disableSAX = 1;
5495 } else {
5496 if (RAW == ';') {
5497 NEXT;
5498 if ((ctxt->sax != NULL) &&
5499 (ctxt->sax->getParameterEntity != NULL))
5500 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5501 name);
5502 if (entity == NULL) {
5503 /*
5504 * [ WFC: Entity Declared ]
5505 * In a document without any DTD, a document with only an
5506 * internal DTD subset which contains no parameter entity
5507 * references, or a document with "standalone='yes'", ...
5508 * ... The declaration of a parameter entity must precede
5509 * any reference to it...
5510 */
5511 if ((ctxt->standalone == 1) ||
5512 ((ctxt->hasExternalSubset == 0) &&
5513 (ctxt->hasPErefs == 0))) {
5514 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5515 if ((!ctxt->disableSAX) &&
5516 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5517 ctxt->sax->error(ctxt->userData,
5518 "PEReference: %%%s; not found\n", name);
5519 ctxt->wellFormed = 0;
5520 ctxt->disableSAX = 1;
5521 } else {
5522 /*
5523 * [ VC: Entity Declared ]
5524 * In a document with an external subset or external
5525 * parameter entities with "standalone='no'", ...
5526 * ... The declaration of a parameter entity must precede
5527 * any reference to it...
5528 */
5529 if ((!ctxt->disableSAX) &&
5530 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5531 ctxt->sax->warning(ctxt->userData,
5532 "PEReference: %%%s; not found\n", name);
5533 ctxt->valid = 0;
5534 }
5535 } else {
5536 /*
5537 * Internal checking in case the entity quest barfed
5538 */
5539 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5540 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5541 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5542 ctxt->sax->warning(ctxt->userData,
5543 "Internal: %%%s; is not a parameter entity\n", name);
5544 } else {
5545 /*
5546 * TODO !!!
5547 * handle the extra spaces added before and after
5548 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5549 */
5550 input = xmlNewEntityInputStream(ctxt, entity);
5551 xmlPushInput(ctxt, input);
5552 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5553 (RAW == '<') && (NXT(1) == '?') &&
5554 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5555 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5556 xmlParseTextDecl(ctxt);
5557 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5558 /*
5559 * The XML REC instructs us to stop parsing
5560 * right here
5561 */
5562 ctxt->instate = XML_PARSER_EOF;
5563 xmlFree(name);
5564 return;
5565 }
5566 }
5567 if (ctxt->token == 0)
5568 ctxt->token = ' ';
5569 }
5570 }
5571 ctxt->hasPErefs = 1;
5572 } else {
5573 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5575 ctxt->sax->error(ctxt->userData,
5576 "xmlParsePEReference: expecting ';'\n");
5577 ctxt->wellFormed = 0;
5578 ctxt->disableSAX = 1;
5579 }
5580 xmlFree(name);
5581 }
5582 }
5583}
5584
5585/**
5586 * xmlParseStringPEReference:
5587 * @ctxt: an XML parser context
5588 * @str: a pointer to an index in the string
5589 *
5590 * parse PEReference declarations
5591 *
5592 * [69] PEReference ::= '%' Name ';'
5593 *
5594 * [ WFC: No Recursion ]
5595 * A parsed entity must not contain a recursive
5596 * reference to itself, either directly or indirectly.
5597 *
5598 * [ WFC: Entity Declared ]
5599 * In a document without any DTD, a document with only an internal DTD
5600 * subset which contains no parameter entity references, or a document
5601 * with "standalone='yes'", ... ... The declaration of a parameter
5602 * entity must precede any reference to it...
5603 *
5604 * [ VC: Entity Declared ]
5605 * In a document with an external subset or external parameter entities
5606 * with "standalone='no'", ... ... The declaration of a parameter entity
5607 * must precede any reference to it...
5608 *
5609 * [ WFC: In DTD ]
5610 * Parameter-entity references may only appear in the DTD.
5611 * NOTE: misleading but this is handled.
5612 *
5613 * Returns the string of the entity content.
5614 * str is updated to the current value of the index
5615 */
5616xmlEntityPtr
5617xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5618 const xmlChar *ptr;
5619 xmlChar cur;
5620 xmlChar *name;
5621 xmlEntityPtr entity = NULL;
5622
5623 if ((str == NULL) || (*str == NULL)) return(NULL);
5624 ptr = *str;
5625 cur = *ptr;
5626 if (cur == '%') {
5627 ptr++;
5628 cur = *ptr;
5629 name = xmlParseStringName(ctxt, &ptr);
5630 if (name == NULL) {
5631 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5633 ctxt->sax->error(ctxt->userData,
5634 "xmlParseStringPEReference: no name\n");
5635 ctxt->wellFormed = 0;
5636 ctxt->disableSAX = 1;
5637 } else {
5638 cur = *ptr;
5639 if (cur == ';') {
5640 ptr++;
5641 cur = *ptr;
5642 if ((ctxt->sax != NULL) &&
5643 (ctxt->sax->getParameterEntity != NULL))
5644 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5645 name);
5646 if (entity == NULL) {
5647 /*
5648 * [ WFC: Entity Declared ]
5649 * In a document without any DTD, a document with only an
5650 * internal DTD subset which contains no parameter entity
5651 * references, or a document with "standalone='yes'", ...
5652 * ... The declaration of a parameter entity must precede
5653 * any reference to it...
5654 */
5655 if ((ctxt->standalone == 1) ||
5656 ((ctxt->hasExternalSubset == 0) &&
5657 (ctxt->hasPErefs == 0))) {
5658 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5660 ctxt->sax->error(ctxt->userData,
5661 "PEReference: %%%s; not found\n", name);
5662 ctxt->wellFormed = 0;
5663 ctxt->disableSAX = 1;
5664 } else {
5665 /*
5666 * [ VC: Entity Declared ]
5667 * In a document with an external subset or external
5668 * parameter entities with "standalone='no'", ...
5669 * ... The declaration of a parameter entity must
5670 * precede any reference to it...
5671 */
5672 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5673 ctxt->sax->warning(ctxt->userData,
5674 "PEReference: %%%s; not found\n", name);
5675 ctxt->valid = 0;
5676 }
5677 } else {
5678 /*
5679 * Internal checking in case the entity quest barfed
5680 */
5681 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5682 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5683 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5684 ctxt->sax->warning(ctxt->userData,
5685 "Internal: %%%s; is not a parameter entity\n", name);
5686 }
5687 }
5688 ctxt->hasPErefs = 1;
5689 } else {
5690 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5692 ctxt->sax->error(ctxt->userData,
5693 "xmlParseStringPEReference: expecting ';'\n");
5694 ctxt->wellFormed = 0;
5695 ctxt->disableSAX = 1;
5696 }
5697 xmlFree(name);
5698 }
5699 }
5700 *str = ptr;
5701 return(entity);
5702}
5703
5704/**
5705 * xmlParseDocTypeDecl:
5706 * @ctxt: an XML parser context
5707 *
5708 * parse a DOCTYPE declaration
5709 *
5710 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5711 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5712 *
5713 * [ VC: Root Element Type ]
5714 * The Name in the document type declaration must match the element
5715 * type of the root element.
5716 */
5717
5718void
5719xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5720 xmlChar *name = NULL;
5721 xmlChar *ExternalID = NULL;
5722 xmlChar *URI = NULL;
5723
5724 /*
5725 * We know that '<!DOCTYPE' has been detected.
5726 */
5727 SKIP(9);
5728
5729 SKIP_BLANKS;
5730
5731 /*
5732 * Parse the DOCTYPE name.
5733 */
5734 name = xmlParseName(ctxt);
5735 if (name == NULL) {
5736 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5738 ctxt->sax->error(ctxt->userData,
5739 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5740 ctxt->wellFormed = 0;
5741 ctxt->disableSAX = 1;
5742 }
5743 ctxt->intSubName = name;
5744
5745 SKIP_BLANKS;
5746
5747 /*
5748 * Check for SystemID and ExternalID
5749 */
5750 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5751
5752 if ((URI != NULL) || (ExternalID != NULL)) {
5753 ctxt->hasExternalSubset = 1;
5754 }
5755 ctxt->extSubURI = URI;
5756 ctxt->extSubSystem = ExternalID;
5757
5758 SKIP_BLANKS;
5759
5760 /*
5761 * Create and update the internal subset.
5762 */
5763 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5764 (!ctxt->disableSAX))
5765 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5766
5767 /*
5768 * Is there any internal subset declarations ?
5769 * they are handled separately in xmlParseInternalSubset()
5770 */
5771 if (RAW == '[')
5772 return;
5773
5774 /*
5775 * We should be at the end of the DOCTYPE declaration.
5776 */
5777 if (RAW != '>') {
5778 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5779 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5780 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5781 ctxt->wellFormed = 0;
5782 ctxt->disableSAX = 1;
5783 }
5784 NEXT;
5785}
5786
5787/**
5788 * xmlParseInternalsubset:
5789 * @ctxt: an XML parser context
5790 *
5791 * parse the internal subset declaration
5792 *
5793 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5794 */
5795
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005796static void
Owen Taylor3473f882001-02-23 17:55:21 +00005797xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5798 /*
5799 * Is there any DTD definition ?
5800 */
5801 if (RAW == '[') {
5802 ctxt->instate = XML_PARSER_DTD;
5803 NEXT;
5804 /*
5805 * Parse the succession of Markup declarations and
5806 * PEReferences.
5807 * Subsequence (markupdecl | PEReference | S)*
5808 */
5809 while (RAW != ']') {
5810 const xmlChar *check = CUR_PTR;
5811 int cons = ctxt->input->consumed;
5812
5813 SKIP_BLANKS;
5814 xmlParseMarkupDecl(ctxt);
5815 xmlParsePEReference(ctxt);
5816
5817 /*
5818 * Pop-up of finished entities.
5819 */
5820 while ((RAW == 0) && (ctxt->inputNr > 1))
5821 xmlPopInput(ctxt);
5822
5823 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5824 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5826 ctxt->sax->error(ctxt->userData,
5827 "xmlParseInternalSubset: error detected in Markup declaration\n");
5828 ctxt->wellFormed = 0;
5829 ctxt->disableSAX = 1;
5830 break;
5831 }
5832 }
5833 if (RAW == ']') {
5834 NEXT;
5835 SKIP_BLANKS;
5836 }
5837 }
5838
5839 /*
5840 * We should be at the end of the DOCTYPE declaration.
5841 */
5842 if (RAW != '>') {
5843 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5844 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5845 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5846 ctxt->wellFormed = 0;
5847 ctxt->disableSAX = 1;
5848 }
5849 NEXT;
5850}
5851
5852/**
5853 * xmlParseAttribute:
5854 * @ctxt: an XML parser context
5855 * @value: a xmlChar ** used to store the value of the attribute
5856 *
5857 * parse an attribute
5858 *
5859 * [41] Attribute ::= Name Eq AttValue
5860 *
5861 * [ WFC: No External Entity References ]
5862 * Attribute values cannot contain direct or indirect entity references
5863 * to external entities.
5864 *
5865 * [ WFC: No < in Attribute Values ]
5866 * The replacement text of any entity referred to directly or indirectly in
5867 * an attribute value (other than "&lt;") must not contain a <.
5868 *
5869 * [ VC: Attribute Value Type ]
5870 * The attribute must have been declared; the value must be of the type
5871 * declared for it.
5872 *
5873 * [25] Eq ::= S? '=' S?
5874 *
5875 * With namespace:
5876 *
5877 * [NS 11] Attribute ::= QName Eq AttValue
5878 *
5879 * Also the case QName == xmlns:??? is handled independently as a namespace
5880 * definition.
5881 *
5882 * Returns the attribute name, and the value in *value.
5883 */
5884
5885xmlChar *
5886xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5887 xmlChar *name, *val;
5888
5889 *value = NULL;
5890 name = xmlParseName(ctxt);
5891 if (name == NULL) {
5892 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5893 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5894 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5895 ctxt->wellFormed = 0;
5896 ctxt->disableSAX = 1;
5897 return(NULL);
5898 }
5899
5900 /*
5901 * read the value
5902 */
5903 SKIP_BLANKS;
5904 if (RAW == '=') {
5905 NEXT;
5906 SKIP_BLANKS;
5907 val = xmlParseAttValue(ctxt);
5908 ctxt->instate = XML_PARSER_CONTENT;
5909 } else {
5910 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5912 ctxt->sax->error(ctxt->userData,
5913 "Specification mandate value for attribute %s\n", name);
5914 ctxt->wellFormed = 0;
5915 ctxt->disableSAX = 1;
5916 xmlFree(name);
5917 return(NULL);
5918 }
5919
5920 /*
5921 * Check that xml:lang conforms to the specification
5922 * No more registered as an error, just generate a warning now
5923 * since this was deprecated in XML second edition
5924 */
5925 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5926 if (!xmlCheckLanguageID(val)) {
5927 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5928 ctxt->sax->warning(ctxt->userData,
5929 "Malformed value for xml:lang : %s\n", val);
5930 }
5931 }
5932
5933 /*
5934 * Check that xml:space conforms to the specification
5935 */
5936 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5937 if (xmlStrEqual(val, BAD_CAST "default"))
5938 *(ctxt->space) = 0;
5939 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5940 *(ctxt->space) = 1;
5941 else {
5942 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5944 ctxt->sax->error(ctxt->userData,
5945"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5946 val);
5947 ctxt->wellFormed = 0;
5948 ctxt->disableSAX = 1;
5949 }
5950 }
5951
5952 *value = val;
5953 return(name);
5954}
5955
5956/**
5957 * xmlParseStartTag:
5958 * @ctxt: an XML parser context
5959 *
5960 * parse a start of tag either for rule element or
5961 * EmptyElement. In both case we don't parse the tag closing chars.
5962 *
5963 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5964 *
5965 * [ WFC: Unique Att Spec ]
5966 * No attribute name may appear more than once in the same start-tag or
5967 * empty-element tag.
5968 *
5969 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5970 *
5971 * [ WFC: Unique Att Spec ]
5972 * No attribute name may appear more than once in the same start-tag or
5973 * empty-element tag.
5974 *
5975 * With namespace:
5976 *
5977 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5978 *
5979 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
5980 *
5981 * Returns the element name parsed
5982 */
5983
5984xmlChar *
5985xmlParseStartTag(xmlParserCtxtPtr ctxt) {
5986 xmlChar *name;
5987 xmlChar *attname;
5988 xmlChar *attvalue;
5989 const xmlChar **atts = NULL;
5990 int nbatts = 0;
5991 int maxatts = 0;
5992 int i;
5993
5994 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00005995 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00005996
5997 name = xmlParseName(ctxt);
5998 if (name == NULL) {
5999 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6001 ctxt->sax->error(ctxt->userData,
6002 "xmlParseStartTag: invalid element name\n");
6003 ctxt->wellFormed = 0;
6004 ctxt->disableSAX = 1;
6005 return(NULL);
6006 }
6007
6008 /*
6009 * Now parse the attributes, it ends up with the ending
6010 *
6011 * (S Attribute)* S?
6012 */
6013 SKIP_BLANKS;
6014 GROW;
6015
Daniel Veillard21a0f912001-02-25 19:54:14 +00006016 while ((RAW != '>') &&
6017 ((RAW != '/') || (NXT(1) != '>')) &&
6018 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006019 const xmlChar *q = CUR_PTR;
6020 int cons = ctxt->input->consumed;
6021
6022 attname = xmlParseAttribute(ctxt, &attvalue);
6023 if ((attname != NULL) && (attvalue != NULL)) {
6024 /*
6025 * [ WFC: Unique Att Spec ]
6026 * No attribute name may appear more than once in the same
6027 * start-tag or empty-element tag.
6028 */
6029 for (i = 0; i < nbatts;i += 2) {
6030 if (xmlStrEqual(atts[i], attname)) {
6031 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6033 ctxt->sax->error(ctxt->userData,
6034 "Attribute %s redefined\n",
6035 attname);
6036 ctxt->wellFormed = 0;
6037 ctxt->disableSAX = 1;
6038 xmlFree(attname);
6039 xmlFree(attvalue);
6040 goto failed;
6041 }
6042 }
6043
6044 /*
6045 * Add the pair to atts
6046 */
6047 if (atts == NULL) {
6048 maxatts = 10;
6049 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6050 if (atts == NULL) {
6051 xmlGenericError(xmlGenericErrorContext,
6052 "malloc of %ld byte failed\n",
6053 maxatts * (long)sizeof(xmlChar *));
6054 return(NULL);
6055 }
6056 } else if (nbatts + 4 > maxatts) {
6057 maxatts *= 2;
6058 atts = (const xmlChar **) xmlRealloc((void *) atts,
6059 maxatts * sizeof(xmlChar *));
6060 if (atts == NULL) {
6061 xmlGenericError(xmlGenericErrorContext,
6062 "realloc of %ld byte failed\n",
6063 maxatts * (long)sizeof(xmlChar *));
6064 return(NULL);
6065 }
6066 }
6067 atts[nbatts++] = attname;
6068 atts[nbatts++] = attvalue;
6069 atts[nbatts] = NULL;
6070 atts[nbatts + 1] = NULL;
6071 } else {
6072 if (attname != NULL)
6073 xmlFree(attname);
6074 if (attvalue != NULL)
6075 xmlFree(attvalue);
6076 }
6077
6078failed:
6079
6080 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6081 break;
6082 if (!IS_BLANK(RAW)) {
6083 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6085 ctxt->sax->error(ctxt->userData,
6086 "attributes construct error\n");
6087 ctxt->wellFormed = 0;
6088 ctxt->disableSAX = 1;
6089 }
6090 SKIP_BLANKS;
6091 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6092 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6094 ctxt->sax->error(ctxt->userData,
6095 "xmlParseStartTag: problem parsing attributes\n");
6096 ctxt->wellFormed = 0;
6097 ctxt->disableSAX = 1;
6098 break;
6099 }
6100 GROW;
6101 }
6102
6103 /*
6104 * SAX: Start of Element !
6105 */
6106 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6107 (!ctxt->disableSAX))
6108 ctxt->sax->startElement(ctxt->userData, name, atts);
6109
6110 if (atts != NULL) {
6111 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6112 xmlFree((void *) atts);
6113 }
6114 return(name);
6115}
6116
6117/**
6118 * xmlParseEndTag:
6119 * @ctxt: an XML parser context
6120 *
6121 * parse an end of tag
6122 *
6123 * [42] ETag ::= '</' Name S? '>'
6124 *
6125 * With namespace
6126 *
6127 * [NS 9] ETag ::= '</' QName S? '>'
6128 */
6129
6130void
6131xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6132 xmlChar *name;
6133 xmlChar *oldname;
6134
6135 GROW;
6136 if ((RAW != '<') || (NXT(1) != '/')) {
6137 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6139 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6140 ctxt->wellFormed = 0;
6141 ctxt->disableSAX = 1;
6142 return;
6143 }
6144 SKIP(2);
6145
6146 name = xmlParseName(ctxt);
6147
6148 /*
6149 * We should definitely be at the ending "S? '>'" part
6150 */
6151 GROW;
6152 SKIP_BLANKS;
6153 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6154 ctxt->errNo = XML_ERR_GT_REQUIRED;
6155 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6156 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6157 ctxt->wellFormed = 0;
6158 ctxt->disableSAX = 1;
6159 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006160 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006161
6162 /*
6163 * [ WFC: Element Type Match ]
6164 * The Name in an element's end-tag must match the element type in the
6165 * start-tag.
6166 *
6167 */
6168 if ((name == NULL) || (ctxt->name == NULL) ||
6169 (!xmlStrEqual(name, ctxt->name))) {
6170 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6172 if ((name != NULL) && (ctxt->name != NULL)) {
6173 ctxt->sax->error(ctxt->userData,
6174 "Opening and ending tag mismatch: %s and %s\n",
6175 ctxt->name, name);
6176 } else if (ctxt->name != NULL) {
6177 ctxt->sax->error(ctxt->userData,
6178 "Ending tag eror for: %s\n", ctxt->name);
6179 } else {
6180 ctxt->sax->error(ctxt->userData,
6181 "Ending tag error: internal error ???\n");
6182 }
6183
6184 }
6185 ctxt->wellFormed = 0;
6186 ctxt->disableSAX = 1;
6187 }
6188
6189 /*
6190 * SAX: End of Tag
6191 */
6192 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6193 (!ctxt->disableSAX))
6194 ctxt->sax->endElement(ctxt->userData, name);
6195
6196 if (name != NULL)
6197 xmlFree(name);
6198 oldname = namePop(ctxt);
6199 spacePop(ctxt);
6200 if (oldname != NULL) {
6201#ifdef DEBUG_STACK
6202 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6203#endif
6204 xmlFree(oldname);
6205 }
6206 return;
6207}
6208
6209/**
6210 * xmlParseCDSect:
6211 * @ctxt: an XML parser context
6212 *
6213 * Parse escaped pure raw content.
6214 *
6215 * [18] CDSect ::= CDStart CData CDEnd
6216 *
6217 * [19] CDStart ::= '<![CDATA['
6218 *
6219 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6220 *
6221 * [21] CDEnd ::= ']]>'
6222 */
6223void
6224xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6225 xmlChar *buf = NULL;
6226 int len = 0;
6227 int size = XML_PARSER_BUFFER_SIZE;
6228 int r, rl;
6229 int s, sl;
6230 int cur, l;
6231 int count = 0;
6232
6233 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6234 (NXT(2) == '[') && (NXT(3) == 'C') &&
6235 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6236 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6237 (NXT(8) == '[')) {
6238 SKIP(9);
6239 } else
6240 return;
6241
6242 ctxt->instate = XML_PARSER_CDATA_SECTION;
6243 r = CUR_CHAR(rl);
6244 if (!IS_CHAR(r)) {
6245 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6247 ctxt->sax->error(ctxt->userData,
6248 "CData section not finished\n");
6249 ctxt->wellFormed = 0;
6250 ctxt->disableSAX = 1;
6251 ctxt->instate = XML_PARSER_CONTENT;
6252 return;
6253 }
6254 NEXTL(rl);
6255 s = CUR_CHAR(sl);
6256 if (!IS_CHAR(s)) {
6257 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6259 ctxt->sax->error(ctxt->userData,
6260 "CData section not finished\n");
6261 ctxt->wellFormed = 0;
6262 ctxt->disableSAX = 1;
6263 ctxt->instate = XML_PARSER_CONTENT;
6264 return;
6265 }
6266 NEXTL(sl);
6267 cur = CUR_CHAR(l);
6268 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6269 if (buf == NULL) {
6270 xmlGenericError(xmlGenericErrorContext,
6271 "malloc of %d byte failed\n", size);
6272 return;
6273 }
6274 while (IS_CHAR(cur) &&
6275 ((r != ']') || (s != ']') || (cur != '>'))) {
6276 if (len + 5 >= size) {
6277 size *= 2;
6278 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6279 if (buf == NULL) {
6280 xmlGenericError(xmlGenericErrorContext,
6281 "realloc of %d byte failed\n", size);
6282 return;
6283 }
6284 }
6285 COPY_BUF(rl,buf,len,r);
6286 r = s;
6287 rl = sl;
6288 s = cur;
6289 sl = l;
6290 count++;
6291 if (count > 50) {
6292 GROW;
6293 count = 0;
6294 }
6295 NEXTL(l);
6296 cur = CUR_CHAR(l);
6297 }
6298 buf[len] = 0;
6299 ctxt->instate = XML_PARSER_CONTENT;
6300 if (cur != '>') {
6301 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6303 ctxt->sax->error(ctxt->userData,
6304 "CData section not finished\n%.50s\n", buf);
6305 ctxt->wellFormed = 0;
6306 ctxt->disableSAX = 1;
6307 xmlFree(buf);
6308 return;
6309 }
6310 NEXTL(l);
6311
6312 /*
6313 * Ok the buffer is to be consumed as cdata.
6314 */
6315 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6316 if (ctxt->sax->cdataBlock != NULL)
6317 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6318 }
6319 xmlFree(buf);
6320}
6321
6322/**
6323 * xmlParseContent:
6324 * @ctxt: an XML parser context
6325 *
6326 * Parse a content:
6327 *
6328 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6329 */
6330
6331void
6332xmlParseContent(xmlParserCtxtPtr ctxt) {
6333 GROW;
6334 while (((RAW != 0) || (ctxt->token != 0)) &&
6335 ((RAW != '<') || (NXT(1) != '/'))) {
6336 const xmlChar *test = CUR_PTR;
6337 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006338 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006339 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006340
6341 /*
6342 * Handle possible processed charrefs.
6343 */
6344 if (ctxt->token != 0) {
6345 xmlParseCharData(ctxt, 0);
6346 }
6347 /*
6348 * First case : a Processing Instruction.
6349 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006350 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006351 xmlParsePI(ctxt);
6352 }
6353
6354 /*
6355 * Second case : a CDSection
6356 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006357 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006358 (NXT(2) == '[') && (NXT(3) == 'C') &&
6359 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6360 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6361 (NXT(8) == '[')) {
6362 xmlParseCDSect(ctxt);
6363 }
6364
6365 /*
6366 * Third case : a comment
6367 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006368 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006369 (NXT(2) == '-') && (NXT(3) == '-')) {
6370 xmlParseComment(ctxt);
6371 ctxt->instate = XML_PARSER_CONTENT;
6372 }
6373
6374 /*
6375 * Fourth case : a sub-element.
6376 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006377 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006378 xmlParseElement(ctxt);
6379 }
6380
6381 /*
6382 * Fifth case : a reference. If if has not been resolved,
6383 * parsing returns it's Name, create the node
6384 */
6385
Daniel Veillard21a0f912001-02-25 19:54:14 +00006386 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006387 xmlParseReference(ctxt);
6388 }
6389
6390 /*
6391 * Last case, text. Note that References are handled directly.
6392 */
6393 else {
6394 xmlParseCharData(ctxt, 0);
6395 }
6396
6397 GROW;
6398 /*
6399 * Pop-up of finished entities.
6400 */
6401 while ((RAW == 0) && (ctxt->inputNr > 1))
6402 xmlPopInput(ctxt);
6403 SHRINK;
6404
6405 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6406 (tok == ctxt->token)) {
6407 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6409 ctxt->sax->error(ctxt->userData,
6410 "detected an error in element content\n");
6411 ctxt->wellFormed = 0;
6412 ctxt->disableSAX = 1;
6413 ctxt->instate = XML_PARSER_EOF;
6414 break;
6415 }
6416 }
6417}
6418
6419/**
6420 * xmlParseElement:
6421 * @ctxt: an XML parser context
6422 *
6423 * parse an XML element, this is highly recursive
6424 *
6425 * [39] element ::= EmptyElemTag | STag content ETag
6426 *
6427 * [ WFC: Element Type Match ]
6428 * The Name in an element's end-tag must match the element type in the
6429 * start-tag.
6430 *
6431 * [ VC: Element Valid ]
6432 * An element is valid if there is a declaration matching elementdecl
6433 * where the Name matches the element type and one of the following holds:
6434 * - The declaration matches EMPTY and the element has no content.
6435 * - The declaration matches children and the sequence of child elements
6436 * belongs to the language generated by the regular expression in the
6437 * content model, with optional white space (characters matching the
6438 * nonterminal S) between each pair of child elements.
6439 * - The declaration matches Mixed and the content consists of character
6440 * data and child elements whose types match names in the content model.
6441 * - The declaration matches ANY, and the types of any child elements have
6442 * been declared.
6443 */
6444
6445void
6446xmlParseElement(xmlParserCtxtPtr ctxt) {
6447 const xmlChar *openTag = CUR_PTR;
6448 xmlChar *name;
6449 xmlChar *oldname;
6450 xmlParserNodeInfo node_info;
6451 xmlNodePtr ret;
6452
6453 /* Capture start position */
6454 if (ctxt->record_info) {
6455 node_info.begin_pos = ctxt->input->consumed +
6456 (CUR_PTR - ctxt->input->base);
6457 node_info.begin_line = ctxt->input->line;
6458 }
6459
6460 if (ctxt->spaceNr == 0)
6461 spacePush(ctxt, -1);
6462 else
6463 spacePush(ctxt, *ctxt->space);
6464
6465 name = xmlParseStartTag(ctxt);
6466 if (name == NULL) {
6467 spacePop(ctxt);
6468 return;
6469 }
6470 namePush(ctxt, name);
6471 ret = ctxt->node;
6472
6473 /*
6474 * [ VC: Root Element Type ]
6475 * The Name in the document type declaration must match the element
6476 * type of the root element.
6477 */
6478 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6479 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6480 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6481
6482 /*
6483 * Check for an Empty Element.
6484 */
6485 if ((RAW == '/') && (NXT(1) == '>')) {
6486 SKIP(2);
6487 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6488 (!ctxt->disableSAX))
6489 ctxt->sax->endElement(ctxt->userData, name);
6490 oldname = namePop(ctxt);
6491 spacePop(ctxt);
6492 if (oldname != NULL) {
6493#ifdef DEBUG_STACK
6494 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6495#endif
6496 xmlFree(oldname);
6497 }
6498 if ( ret != NULL && ctxt->record_info ) {
6499 node_info.end_pos = ctxt->input->consumed +
6500 (CUR_PTR - ctxt->input->base);
6501 node_info.end_line = ctxt->input->line;
6502 node_info.node = ret;
6503 xmlParserAddNodeInfo(ctxt, &node_info);
6504 }
6505 return;
6506 }
6507 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006508 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006509 } else {
6510 ctxt->errNo = XML_ERR_GT_REQUIRED;
6511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6512 ctxt->sax->error(ctxt->userData,
6513 "Couldn't find end of Start Tag\n%.30s\n",
6514 openTag);
6515 ctxt->wellFormed = 0;
6516 ctxt->disableSAX = 1;
6517
6518 /*
6519 * end of parsing of this node.
6520 */
6521 nodePop(ctxt);
6522 oldname = namePop(ctxt);
6523 spacePop(ctxt);
6524 if (oldname != NULL) {
6525#ifdef DEBUG_STACK
6526 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6527#endif
6528 xmlFree(oldname);
6529 }
6530
6531 /*
6532 * Capture end position and add node
6533 */
6534 if ( ret != NULL && ctxt->record_info ) {
6535 node_info.end_pos = ctxt->input->consumed +
6536 (CUR_PTR - ctxt->input->base);
6537 node_info.end_line = ctxt->input->line;
6538 node_info.node = ret;
6539 xmlParserAddNodeInfo(ctxt, &node_info);
6540 }
6541 return;
6542 }
6543
6544 /*
6545 * Parse the content of the element:
6546 */
6547 xmlParseContent(ctxt);
6548 if (!IS_CHAR(RAW)) {
6549 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6551 ctxt->sax->error(ctxt->userData,
6552 "Premature end of data in tag %.30s\n", openTag);
6553 ctxt->wellFormed = 0;
6554 ctxt->disableSAX = 1;
6555
6556 /*
6557 * end of parsing of this node.
6558 */
6559 nodePop(ctxt);
6560 oldname = namePop(ctxt);
6561 spacePop(ctxt);
6562 if (oldname != NULL) {
6563#ifdef DEBUG_STACK
6564 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6565#endif
6566 xmlFree(oldname);
6567 }
6568 return;
6569 }
6570
6571 /*
6572 * parse the end of tag: '</' should be here.
6573 */
6574 xmlParseEndTag(ctxt);
6575
6576 /*
6577 * Capture end position and add node
6578 */
6579 if ( ret != NULL && ctxt->record_info ) {
6580 node_info.end_pos = ctxt->input->consumed +
6581 (CUR_PTR - ctxt->input->base);
6582 node_info.end_line = ctxt->input->line;
6583 node_info.node = ret;
6584 xmlParserAddNodeInfo(ctxt, &node_info);
6585 }
6586}
6587
6588/**
6589 * xmlParseVersionNum:
6590 * @ctxt: an XML parser context
6591 *
6592 * parse the XML version value.
6593 *
6594 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6595 *
6596 * Returns the string giving the XML version number, or NULL
6597 */
6598xmlChar *
6599xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6600 xmlChar *buf = NULL;
6601 int len = 0;
6602 int size = 10;
6603 xmlChar cur;
6604
6605 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6606 if (buf == NULL) {
6607 xmlGenericError(xmlGenericErrorContext,
6608 "malloc of %d byte failed\n", size);
6609 return(NULL);
6610 }
6611 cur = CUR;
6612 while (((cur >= 'a') && (cur <= 'z')) ||
6613 ((cur >= 'A') && (cur <= 'Z')) ||
6614 ((cur >= '0') && (cur <= '9')) ||
6615 (cur == '_') || (cur == '.') ||
6616 (cur == ':') || (cur == '-')) {
6617 if (len + 1 >= size) {
6618 size *= 2;
6619 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6620 if (buf == NULL) {
6621 xmlGenericError(xmlGenericErrorContext,
6622 "realloc of %d byte failed\n", size);
6623 return(NULL);
6624 }
6625 }
6626 buf[len++] = cur;
6627 NEXT;
6628 cur=CUR;
6629 }
6630 buf[len] = 0;
6631 return(buf);
6632}
6633
6634/**
6635 * xmlParseVersionInfo:
6636 * @ctxt: an XML parser context
6637 *
6638 * parse the XML version.
6639 *
6640 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6641 *
6642 * [25] Eq ::= S? '=' S?
6643 *
6644 * Returns the version string, e.g. "1.0"
6645 */
6646
6647xmlChar *
6648xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6649 xmlChar *version = NULL;
6650 const xmlChar *q;
6651
6652 if ((RAW == 'v') && (NXT(1) == 'e') &&
6653 (NXT(2) == 'r') && (NXT(3) == 's') &&
6654 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6655 (NXT(6) == 'n')) {
6656 SKIP(7);
6657 SKIP_BLANKS;
6658 if (RAW != '=') {
6659 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6661 ctxt->sax->error(ctxt->userData,
6662 "xmlParseVersionInfo : expected '='\n");
6663 ctxt->wellFormed = 0;
6664 ctxt->disableSAX = 1;
6665 return(NULL);
6666 }
6667 NEXT;
6668 SKIP_BLANKS;
6669 if (RAW == '"') {
6670 NEXT;
6671 q = CUR_PTR;
6672 version = xmlParseVersionNum(ctxt);
6673 if (RAW != '"') {
6674 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6675 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6676 ctxt->sax->error(ctxt->userData,
6677 "String not closed\n%.50s\n", q);
6678 ctxt->wellFormed = 0;
6679 ctxt->disableSAX = 1;
6680 } else
6681 NEXT;
6682 } else if (RAW == '\''){
6683 NEXT;
6684 q = CUR_PTR;
6685 version = xmlParseVersionNum(ctxt);
6686 if (RAW != '\'') {
6687 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6689 ctxt->sax->error(ctxt->userData,
6690 "String not closed\n%.50s\n", q);
6691 ctxt->wellFormed = 0;
6692 ctxt->disableSAX = 1;
6693 } else
6694 NEXT;
6695 } else {
6696 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6698 ctxt->sax->error(ctxt->userData,
6699 "xmlParseVersionInfo : expected ' or \"\n");
6700 ctxt->wellFormed = 0;
6701 ctxt->disableSAX = 1;
6702 }
6703 }
6704 return(version);
6705}
6706
6707/**
6708 * xmlParseEncName:
6709 * @ctxt: an XML parser context
6710 *
6711 * parse the XML encoding name
6712 *
6713 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6714 *
6715 * Returns the encoding name value or NULL
6716 */
6717xmlChar *
6718xmlParseEncName(xmlParserCtxtPtr ctxt) {
6719 xmlChar *buf = NULL;
6720 int len = 0;
6721 int size = 10;
6722 xmlChar cur;
6723
6724 cur = CUR;
6725 if (((cur >= 'a') && (cur <= 'z')) ||
6726 ((cur >= 'A') && (cur <= 'Z'))) {
6727 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6728 if (buf == NULL) {
6729 xmlGenericError(xmlGenericErrorContext,
6730 "malloc of %d byte failed\n", size);
6731 return(NULL);
6732 }
6733
6734 buf[len++] = cur;
6735 NEXT;
6736 cur = CUR;
6737 while (((cur >= 'a') && (cur <= 'z')) ||
6738 ((cur >= 'A') && (cur <= 'Z')) ||
6739 ((cur >= '0') && (cur <= '9')) ||
6740 (cur == '.') || (cur == '_') ||
6741 (cur == '-')) {
6742 if (len + 1 >= size) {
6743 size *= 2;
6744 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6745 if (buf == NULL) {
6746 xmlGenericError(xmlGenericErrorContext,
6747 "realloc of %d byte failed\n", size);
6748 return(NULL);
6749 }
6750 }
6751 buf[len++] = cur;
6752 NEXT;
6753 cur = CUR;
6754 if (cur == 0) {
6755 SHRINK;
6756 GROW;
6757 cur = CUR;
6758 }
6759 }
6760 buf[len] = 0;
6761 } else {
6762 ctxt->errNo = XML_ERR_ENCODING_NAME;
6763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6764 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6765 ctxt->wellFormed = 0;
6766 ctxt->disableSAX = 1;
6767 }
6768 return(buf);
6769}
6770
6771/**
6772 * xmlParseEncodingDecl:
6773 * @ctxt: an XML parser context
6774 *
6775 * parse the XML encoding declaration
6776 *
6777 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6778 *
6779 * this setups the conversion filters.
6780 *
6781 * Returns the encoding value or NULL
6782 */
6783
6784xmlChar *
6785xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6786 xmlChar *encoding = NULL;
6787 const xmlChar *q;
6788
6789 SKIP_BLANKS;
6790 if ((RAW == 'e') && (NXT(1) == 'n') &&
6791 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6792 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6793 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6794 SKIP(8);
6795 SKIP_BLANKS;
6796 if (RAW != '=') {
6797 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6799 ctxt->sax->error(ctxt->userData,
6800 "xmlParseEncodingDecl : expected '='\n");
6801 ctxt->wellFormed = 0;
6802 ctxt->disableSAX = 1;
6803 return(NULL);
6804 }
6805 NEXT;
6806 SKIP_BLANKS;
6807 if (RAW == '"') {
6808 NEXT;
6809 q = CUR_PTR;
6810 encoding = xmlParseEncName(ctxt);
6811 if (RAW != '"') {
6812 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6814 ctxt->sax->error(ctxt->userData,
6815 "String not closed\n%.50s\n", q);
6816 ctxt->wellFormed = 0;
6817 ctxt->disableSAX = 1;
6818 } else
6819 NEXT;
6820 } else if (RAW == '\''){
6821 NEXT;
6822 q = CUR_PTR;
6823 encoding = xmlParseEncName(ctxt);
6824 if (RAW != '\'') {
6825 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6827 ctxt->sax->error(ctxt->userData,
6828 "String not closed\n%.50s\n", q);
6829 ctxt->wellFormed = 0;
6830 ctxt->disableSAX = 1;
6831 } else
6832 NEXT;
6833 } else if (RAW == '"'){
6834 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6836 ctxt->sax->error(ctxt->userData,
6837 "xmlParseEncodingDecl : expected ' or \"\n");
6838 ctxt->wellFormed = 0;
6839 ctxt->disableSAX = 1;
6840 }
6841 if (encoding != NULL) {
6842 xmlCharEncoding enc;
6843 xmlCharEncodingHandlerPtr handler;
6844
6845 if (ctxt->input->encoding != NULL)
6846 xmlFree((xmlChar *) ctxt->input->encoding);
6847 ctxt->input->encoding = encoding;
6848
6849 enc = xmlParseCharEncoding((const char *) encoding);
6850 /*
6851 * registered set of known encodings
6852 */
6853 if (enc != XML_CHAR_ENCODING_ERROR) {
6854 xmlSwitchEncoding(ctxt, enc);
6855 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6856 xmlFree(encoding);
6857 return(NULL);
6858 }
6859 } else {
6860 /*
6861 * fallback for unknown encodings
6862 */
6863 handler = xmlFindCharEncodingHandler((const char *) encoding);
6864 if (handler != NULL) {
6865 xmlSwitchToEncoding(ctxt, handler);
6866 } else {
6867 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6869 ctxt->sax->error(ctxt->userData,
6870 "Unsupported encoding %s\n", encoding);
6871 return(NULL);
6872 }
6873 }
6874 }
6875 }
6876 return(encoding);
6877}
6878
6879/**
6880 * xmlParseSDDecl:
6881 * @ctxt: an XML parser context
6882 *
6883 * parse the XML standalone declaration
6884 *
6885 * [32] SDDecl ::= S 'standalone' Eq
6886 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6887 *
6888 * [ VC: Standalone Document Declaration ]
6889 * TODO The standalone document declaration must have the value "no"
6890 * if any external markup declarations contain declarations of:
6891 * - attributes with default values, if elements to which these
6892 * attributes apply appear in the document without specifications
6893 * of values for these attributes, or
6894 * - entities (other than amp, lt, gt, apos, quot), if references
6895 * to those entities appear in the document, or
6896 * - attributes with values subject to normalization, where the
6897 * attribute appears in the document with a value which will change
6898 * as a result of normalization, or
6899 * - element types with element content, if white space occurs directly
6900 * within any instance of those types.
6901 *
6902 * Returns 1 if standalone, 0 otherwise
6903 */
6904
6905int
6906xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6907 int standalone = -1;
6908
6909 SKIP_BLANKS;
6910 if ((RAW == 's') && (NXT(1) == 't') &&
6911 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6912 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6913 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6914 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6915 SKIP(10);
6916 SKIP_BLANKS;
6917 if (RAW != '=') {
6918 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6920 ctxt->sax->error(ctxt->userData,
6921 "XML standalone declaration : expected '='\n");
6922 ctxt->wellFormed = 0;
6923 ctxt->disableSAX = 1;
6924 return(standalone);
6925 }
6926 NEXT;
6927 SKIP_BLANKS;
6928 if (RAW == '\''){
6929 NEXT;
6930 if ((RAW == 'n') && (NXT(1) == 'o')) {
6931 standalone = 0;
6932 SKIP(2);
6933 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6934 (NXT(2) == 's')) {
6935 standalone = 1;
6936 SKIP(3);
6937 } else {
6938 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6940 ctxt->sax->error(ctxt->userData,
6941 "standalone accepts only 'yes' or 'no'\n");
6942 ctxt->wellFormed = 0;
6943 ctxt->disableSAX = 1;
6944 }
6945 if (RAW != '\'') {
6946 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6948 ctxt->sax->error(ctxt->userData, "String not closed\n");
6949 ctxt->wellFormed = 0;
6950 ctxt->disableSAX = 1;
6951 } else
6952 NEXT;
6953 } else if (RAW == '"'){
6954 NEXT;
6955 if ((RAW == 'n') && (NXT(1) == 'o')) {
6956 standalone = 0;
6957 SKIP(2);
6958 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6959 (NXT(2) == 's')) {
6960 standalone = 1;
6961 SKIP(3);
6962 } else {
6963 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6965 ctxt->sax->error(ctxt->userData,
6966 "standalone accepts only 'yes' or 'no'\n");
6967 ctxt->wellFormed = 0;
6968 ctxt->disableSAX = 1;
6969 }
6970 if (RAW != '"') {
6971 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6973 ctxt->sax->error(ctxt->userData, "String not closed\n");
6974 ctxt->wellFormed = 0;
6975 ctxt->disableSAX = 1;
6976 } else
6977 NEXT;
6978 } else {
6979 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6981 ctxt->sax->error(ctxt->userData,
6982 "Standalone value not found\n");
6983 ctxt->wellFormed = 0;
6984 ctxt->disableSAX = 1;
6985 }
6986 }
6987 return(standalone);
6988}
6989
6990/**
6991 * xmlParseXMLDecl:
6992 * @ctxt: an XML parser context
6993 *
6994 * parse an XML declaration header
6995 *
6996 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
6997 */
6998
6999void
7000xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7001 xmlChar *version;
7002
7003 /*
7004 * We know that '<?xml' is here.
7005 */
7006 SKIP(5);
7007
7008 if (!IS_BLANK(RAW)) {
7009 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7011 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7012 ctxt->wellFormed = 0;
7013 ctxt->disableSAX = 1;
7014 }
7015 SKIP_BLANKS;
7016
7017 /*
7018 * We should have the VersionInfo here.
7019 */
7020 version = xmlParseVersionInfo(ctxt);
7021 if (version == NULL)
7022 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7023 ctxt->version = xmlStrdup(version);
7024 xmlFree(version);
7025
7026 /*
7027 * We may have the encoding declaration
7028 */
7029 if (!IS_BLANK(RAW)) {
7030 if ((RAW == '?') && (NXT(1) == '>')) {
7031 SKIP(2);
7032 return;
7033 }
7034 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7036 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7037 ctxt->wellFormed = 0;
7038 ctxt->disableSAX = 1;
7039 }
7040 xmlParseEncodingDecl(ctxt);
7041 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7042 /*
7043 * The XML REC instructs us to stop parsing right here
7044 */
7045 return;
7046 }
7047
7048 /*
7049 * We may have the standalone status.
7050 */
7051 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7052 if ((RAW == '?') && (NXT(1) == '>')) {
7053 SKIP(2);
7054 return;
7055 }
7056 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7058 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7059 ctxt->wellFormed = 0;
7060 ctxt->disableSAX = 1;
7061 }
7062 SKIP_BLANKS;
7063 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7064
7065 SKIP_BLANKS;
7066 if ((RAW == '?') && (NXT(1) == '>')) {
7067 SKIP(2);
7068 } else if (RAW == '>') {
7069 /* Deprecated old WD ... */
7070 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7072 ctxt->sax->error(ctxt->userData,
7073 "XML declaration must end-up with '?>'\n");
7074 ctxt->wellFormed = 0;
7075 ctxt->disableSAX = 1;
7076 NEXT;
7077 } else {
7078 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7080 ctxt->sax->error(ctxt->userData,
7081 "parsing XML declaration: '?>' expected\n");
7082 ctxt->wellFormed = 0;
7083 ctxt->disableSAX = 1;
7084 MOVETO_ENDTAG(CUR_PTR);
7085 NEXT;
7086 }
7087}
7088
7089/**
7090 * xmlParseMisc:
7091 * @ctxt: an XML parser context
7092 *
7093 * parse an XML Misc* optionnal field.
7094 *
7095 * [27] Misc ::= Comment | PI | S
7096 */
7097
7098void
7099xmlParseMisc(xmlParserCtxtPtr ctxt) {
7100 while (((RAW == '<') && (NXT(1) == '?')) ||
7101 ((RAW == '<') && (NXT(1) == '!') &&
7102 (NXT(2) == '-') && (NXT(3) == '-')) ||
7103 IS_BLANK(CUR)) {
7104 if ((RAW == '<') && (NXT(1) == '?')) {
7105 xmlParsePI(ctxt);
7106 } else if (IS_BLANK(CUR)) {
7107 NEXT;
7108 } else
7109 xmlParseComment(ctxt);
7110 }
7111}
7112
7113/**
7114 * xmlParseDocument:
7115 * @ctxt: an XML parser context
7116 *
7117 * parse an XML document (and build a tree if using the standard SAX
7118 * interface).
7119 *
7120 * [1] document ::= prolog element Misc*
7121 *
7122 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7123 *
7124 * Returns 0, -1 in case of error. the parser context is augmented
7125 * as a result of the parsing.
7126 */
7127
7128int
7129xmlParseDocument(xmlParserCtxtPtr ctxt) {
7130 xmlChar start[4];
7131 xmlCharEncoding enc;
7132
7133 xmlInitParser();
7134
7135 GROW;
7136
7137 /*
7138 * SAX: beginning of the document processing.
7139 */
7140 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7141 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7142
7143 /*
7144 * Get the 4 first bytes and decode the charset
7145 * if enc != XML_CHAR_ENCODING_NONE
7146 * plug some encoding conversion routines.
7147 */
7148 start[0] = RAW;
7149 start[1] = NXT(1);
7150 start[2] = NXT(2);
7151 start[3] = NXT(3);
7152 enc = xmlDetectCharEncoding(start, 4);
7153 if (enc != XML_CHAR_ENCODING_NONE) {
7154 xmlSwitchEncoding(ctxt, enc);
7155 }
7156
7157
7158 if (CUR == 0) {
7159 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7161 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7162 ctxt->wellFormed = 0;
7163 ctxt->disableSAX = 1;
7164 }
7165
7166 /*
7167 * Check for the XMLDecl in the Prolog.
7168 */
7169 GROW;
7170 if ((RAW == '<') && (NXT(1) == '?') &&
7171 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7172 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7173
7174 /*
7175 * Note that we will switch encoding on the fly.
7176 */
7177 xmlParseXMLDecl(ctxt);
7178 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7179 /*
7180 * The XML REC instructs us to stop parsing right here
7181 */
7182 return(-1);
7183 }
7184 ctxt->standalone = ctxt->input->standalone;
7185 SKIP_BLANKS;
7186 } else {
7187 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7188 }
7189 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7190 ctxt->sax->startDocument(ctxt->userData);
7191
7192 /*
7193 * The Misc part of the Prolog
7194 */
7195 GROW;
7196 xmlParseMisc(ctxt);
7197
7198 /*
7199 * Then possibly doc type declaration(s) and more Misc
7200 * (doctypedecl Misc*)?
7201 */
7202 GROW;
7203 if ((RAW == '<') && (NXT(1) == '!') &&
7204 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7205 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7206 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7207 (NXT(8) == 'E')) {
7208
7209 ctxt->inSubset = 1;
7210 xmlParseDocTypeDecl(ctxt);
7211 if (RAW == '[') {
7212 ctxt->instate = XML_PARSER_DTD;
7213 xmlParseInternalSubset(ctxt);
7214 }
7215
7216 /*
7217 * Create and update the external subset.
7218 */
7219 ctxt->inSubset = 2;
7220 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7221 (!ctxt->disableSAX))
7222 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7223 ctxt->extSubSystem, ctxt->extSubURI);
7224 ctxt->inSubset = 0;
7225
7226
7227 ctxt->instate = XML_PARSER_PROLOG;
7228 xmlParseMisc(ctxt);
7229 }
7230
7231 /*
7232 * Time to start parsing the tree itself
7233 */
7234 GROW;
7235 if (RAW != '<') {
7236 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7238 ctxt->sax->error(ctxt->userData,
7239 "Start tag expected, '<' not found\n");
7240 ctxt->wellFormed = 0;
7241 ctxt->disableSAX = 1;
7242 ctxt->instate = XML_PARSER_EOF;
7243 } else {
7244 ctxt->instate = XML_PARSER_CONTENT;
7245 xmlParseElement(ctxt);
7246 ctxt->instate = XML_PARSER_EPILOG;
7247
7248
7249 /*
7250 * The Misc part at the end
7251 */
7252 xmlParseMisc(ctxt);
7253
7254 if (RAW != 0) {
7255 ctxt->errNo = XML_ERR_DOCUMENT_END;
7256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7257 ctxt->sax->error(ctxt->userData,
7258 "Extra content at the end of the document\n");
7259 ctxt->wellFormed = 0;
7260 ctxt->disableSAX = 1;
7261 }
7262 ctxt->instate = XML_PARSER_EOF;
7263 }
7264
7265 /*
7266 * SAX: end of the document processing.
7267 */
7268 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7269 (!ctxt->disableSAX))
7270 ctxt->sax->endDocument(ctxt->userData);
7271
7272 if (! ctxt->wellFormed) return(-1);
7273 return(0);
7274}
7275
7276/**
7277 * xmlParseExtParsedEnt:
7278 * @ctxt: an XML parser context
7279 *
7280 * parse a genreral parsed entity
7281 * An external general parsed entity is well-formed if it matches the
7282 * production labeled extParsedEnt.
7283 *
7284 * [78] extParsedEnt ::= TextDecl? content
7285 *
7286 * Returns 0, -1 in case of error. the parser context is augmented
7287 * as a result of the parsing.
7288 */
7289
7290int
7291xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7292 xmlChar start[4];
7293 xmlCharEncoding enc;
7294
7295 xmlDefaultSAXHandlerInit();
7296
7297 GROW;
7298
7299 /*
7300 * SAX: beginning of the document processing.
7301 */
7302 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7303 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7304
7305 /*
7306 * Get the 4 first bytes and decode the charset
7307 * if enc != XML_CHAR_ENCODING_NONE
7308 * plug some encoding conversion routines.
7309 */
7310 start[0] = RAW;
7311 start[1] = NXT(1);
7312 start[2] = NXT(2);
7313 start[3] = NXT(3);
7314 enc = xmlDetectCharEncoding(start, 4);
7315 if (enc != XML_CHAR_ENCODING_NONE) {
7316 xmlSwitchEncoding(ctxt, enc);
7317 }
7318
7319
7320 if (CUR == 0) {
7321 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7323 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7324 ctxt->wellFormed = 0;
7325 ctxt->disableSAX = 1;
7326 }
7327
7328 /*
7329 * Check for the XMLDecl in the Prolog.
7330 */
7331 GROW;
7332 if ((RAW == '<') && (NXT(1) == '?') &&
7333 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7334 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7335
7336 /*
7337 * Note that we will switch encoding on the fly.
7338 */
7339 xmlParseXMLDecl(ctxt);
7340 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7341 /*
7342 * The XML REC instructs us to stop parsing right here
7343 */
7344 return(-1);
7345 }
7346 SKIP_BLANKS;
7347 } else {
7348 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7349 }
7350 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7351 ctxt->sax->startDocument(ctxt->userData);
7352
7353 /*
7354 * Doing validity checking on chunk doesn't make sense
7355 */
7356 ctxt->instate = XML_PARSER_CONTENT;
7357 ctxt->validate = 0;
7358 ctxt->loadsubset = 0;
7359 ctxt->depth = 0;
7360
7361 xmlParseContent(ctxt);
7362
7363 if ((RAW == '<') && (NXT(1) == '/')) {
7364 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7366 ctxt->sax->error(ctxt->userData,
7367 "chunk is not well balanced\n");
7368 ctxt->wellFormed = 0;
7369 ctxt->disableSAX = 1;
7370 } else if (RAW != 0) {
7371 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7373 ctxt->sax->error(ctxt->userData,
7374 "extra content at the end of well balanced chunk\n");
7375 ctxt->wellFormed = 0;
7376 ctxt->disableSAX = 1;
7377 }
7378
7379 /*
7380 * SAX: end of the document processing.
7381 */
7382 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7383 (!ctxt->disableSAX))
7384 ctxt->sax->endDocument(ctxt->userData);
7385
7386 if (! ctxt->wellFormed) return(-1);
7387 return(0);
7388}
7389
7390/************************************************************************
7391 * *
7392 * Progressive parsing interfaces *
7393 * *
7394 ************************************************************************/
7395
7396/**
7397 * xmlParseLookupSequence:
7398 * @ctxt: an XML parser context
7399 * @first: the first char to lookup
7400 * @next: the next char to lookup or zero
7401 * @third: the next char to lookup or zero
7402 *
7403 * Try to find if a sequence (first, next, third) or just (first next) or
7404 * (first) is available in the input stream.
7405 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7406 * to avoid rescanning sequences of bytes, it DOES change the state of the
7407 * parser, do not use liberally.
7408 *
7409 * Returns the index to the current parsing point if the full sequence
7410 * is available, -1 otherwise.
7411 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007412static int
Owen Taylor3473f882001-02-23 17:55:21 +00007413xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7414 xmlChar next, xmlChar third) {
7415 int base, len;
7416 xmlParserInputPtr in;
7417 const xmlChar *buf;
7418
7419 in = ctxt->input;
7420 if (in == NULL) return(-1);
7421 base = in->cur - in->base;
7422 if (base < 0) return(-1);
7423 if (ctxt->checkIndex > base)
7424 base = ctxt->checkIndex;
7425 if (in->buf == NULL) {
7426 buf = in->base;
7427 len = in->length;
7428 } else {
7429 buf = in->buf->buffer->content;
7430 len = in->buf->buffer->use;
7431 }
7432 /* take into account the sequence length */
7433 if (third) len -= 2;
7434 else if (next) len --;
7435 for (;base < len;base++) {
7436 if (buf[base] == first) {
7437 if (third != 0) {
7438 if ((buf[base + 1] != next) ||
7439 (buf[base + 2] != third)) continue;
7440 } else if (next != 0) {
7441 if (buf[base + 1] != next) continue;
7442 }
7443 ctxt->checkIndex = 0;
7444#ifdef DEBUG_PUSH
7445 if (next == 0)
7446 xmlGenericError(xmlGenericErrorContext,
7447 "PP: lookup '%c' found at %d\n",
7448 first, base);
7449 else if (third == 0)
7450 xmlGenericError(xmlGenericErrorContext,
7451 "PP: lookup '%c%c' found at %d\n",
7452 first, next, base);
7453 else
7454 xmlGenericError(xmlGenericErrorContext,
7455 "PP: lookup '%c%c%c' found at %d\n",
7456 first, next, third, base);
7457#endif
7458 return(base - (in->cur - in->base));
7459 }
7460 }
7461 ctxt->checkIndex = base;
7462#ifdef DEBUG_PUSH
7463 if (next == 0)
7464 xmlGenericError(xmlGenericErrorContext,
7465 "PP: lookup '%c' failed\n", first);
7466 else if (third == 0)
7467 xmlGenericError(xmlGenericErrorContext,
7468 "PP: lookup '%c%c' failed\n", first, next);
7469 else
7470 xmlGenericError(xmlGenericErrorContext,
7471 "PP: lookup '%c%c%c' failed\n", first, next, third);
7472#endif
7473 return(-1);
7474}
7475
7476/**
7477 * xmlParseTryOrFinish:
7478 * @ctxt: an XML parser context
7479 * @terminate: last chunk indicator
7480 *
7481 * Try to progress on parsing
7482 *
7483 * Returns zero if no parsing was possible
7484 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007485static int
Owen Taylor3473f882001-02-23 17:55:21 +00007486xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7487 int ret = 0;
7488 int avail;
7489 xmlChar cur, next;
7490
7491#ifdef DEBUG_PUSH
7492 switch (ctxt->instate) {
7493 case XML_PARSER_EOF:
7494 xmlGenericError(xmlGenericErrorContext,
7495 "PP: try EOF\n"); break;
7496 case XML_PARSER_START:
7497 xmlGenericError(xmlGenericErrorContext,
7498 "PP: try START\n"); break;
7499 case XML_PARSER_MISC:
7500 xmlGenericError(xmlGenericErrorContext,
7501 "PP: try MISC\n");break;
7502 case XML_PARSER_COMMENT:
7503 xmlGenericError(xmlGenericErrorContext,
7504 "PP: try COMMENT\n");break;
7505 case XML_PARSER_PROLOG:
7506 xmlGenericError(xmlGenericErrorContext,
7507 "PP: try PROLOG\n");break;
7508 case XML_PARSER_START_TAG:
7509 xmlGenericError(xmlGenericErrorContext,
7510 "PP: try START_TAG\n");break;
7511 case XML_PARSER_CONTENT:
7512 xmlGenericError(xmlGenericErrorContext,
7513 "PP: try CONTENT\n");break;
7514 case XML_PARSER_CDATA_SECTION:
7515 xmlGenericError(xmlGenericErrorContext,
7516 "PP: try CDATA_SECTION\n");break;
7517 case XML_PARSER_END_TAG:
7518 xmlGenericError(xmlGenericErrorContext,
7519 "PP: try END_TAG\n");break;
7520 case XML_PARSER_ENTITY_DECL:
7521 xmlGenericError(xmlGenericErrorContext,
7522 "PP: try ENTITY_DECL\n");break;
7523 case XML_PARSER_ENTITY_VALUE:
7524 xmlGenericError(xmlGenericErrorContext,
7525 "PP: try ENTITY_VALUE\n");break;
7526 case XML_PARSER_ATTRIBUTE_VALUE:
7527 xmlGenericError(xmlGenericErrorContext,
7528 "PP: try ATTRIBUTE_VALUE\n");break;
7529 case XML_PARSER_DTD:
7530 xmlGenericError(xmlGenericErrorContext,
7531 "PP: try DTD\n");break;
7532 case XML_PARSER_EPILOG:
7533 xmlGenericError(xmlGenericErrorContext,
7534 "PP: try EPILOG\n");break;
7535 case XML_PARSER_PI:
7536 xmlGenericError(xmlGenericErrorContext,
7537 "PP: try PI\n");break;
7538 case XML_PARSER_IGNORE:
7539 xmlGenericError(xmlGenericErrorContext,
7540 "PP: try IGNORE\n");break;
7541 }
7542#endif
7543
7544 while (1) {
7545 /*
7546 * Pop-up of finished entities.
7547 */
7548 while ((RAW == 0) && (ctxt->inputNr > 1))
7549 xmlPopInput(ctxt);
7550
7551 if (ctxt->input ==NULL) break;
7552 if (ctxt->input->buf == NULL)
7553 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7554 else
7555 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7556 if (avail < 1)
7557 goto done;
7558 switch (ctxt->instate) {
7559 case XML_PARSER_EOF:
7560 /*
7561 * Document parsing is done !
7562 */
7563 goto done;
7564 case XML_PARSER_START:
7565 /*
7566 * Very first chars read from the document flow.
7567 */
Owen Taylor3473f882001-02-23 17:55:21 +00007568 if (avail < 2)
7569 goto done;
7570
7571 cur = ctxt->input->cur[0];
7572 next = ctxt->input->cur[1];
7573 if (cur == 0) {
7574 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7575 ctxt->sax->setDocumentLocator(ctxt->userData,
7576 &xmlDefaultSAXLocator);
7577 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7578 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7579 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7580 ctxt->wellFormed = 0;
7581 ctxt->disableSAX = 1;
7582 ctxt->instate = XML_PARSER_EOF;
7583#ifdef DEBUG_PUSH
7584 xmlGenericError(xmlGenericErrorContext,
7585 "PP: entering EOF\n");
7586#endif
7587 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7588 ctxt->sax->endDocument(ctxt->userData);
7589 goto done;
7590 }
7591 if ((cur == '<') && (next == '?')) {
7592 /* PI or XML decl */
7593 if (avail < 5) return(ret);
7594 if ((!terminate) &&
7595 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7596 return(ret);
7597 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7598 ctxt->sax->setDocumentLocator(ctxt->userData,
7599 &xmlDefaultSAXLocator);
7600 if ((ctxt->input->cur[2] == 'x') &&
7601 (ctxt->input->cur[3] == 'm') &&
7602 (ctxt->input->cur[4] == 'l') &&
7603 (IS_BLANK(ctxt->input->cur[5]))) {
7604 ret += 5;
7605#ifdef DEBUG_PUSH
7606 xmlGenericError(xmlGenericErrorContext,
7607 "PP: Parsing XML Decl\n");
7608#endif
7609 xmlParseXMLDecl(ctxt);
7610 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7611 /*
7612 * The XML REC instructs us to stop parsing right
7613 * here
7614 */
7615 ctxt->instate = XML_PARSER_EOF;
7616 return(0);
7617 }
7618 ctxt->standalone = ctxt->input->standalone;
7619 if ((ctxt->encoding == NULL) &&
7620 (ctxt->input->encoding != NULL))
7621 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7622 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7623 (!ctxt->disableSAX))
7624 ctxt->sax->startDocument(ctxt->userData);
7625 ctxt->instate = XML_PARSER_MISC;
7626#ifdef DEBUG_PUSH
7627 xmlGenericError(xmlGenericErrorContext,
7628 "PP: entering MISC\n");
7629#endif
7630 } else {
7631 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7632 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7633 (!ctxt->disableSAX))
7634 ctxt->sax->startDocument(ctxt->userData);
7635 ctxt->instate = XML_PARSER_MISC;
7636#ifdef DEBUG_PUSH
7637 xmlGenericError(xmlGenericErrorContext,
7638 "PP: entering MISC\n");
7639#endif
7640 }
7641 } else {
7642 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7643 ctxt->sax->setDocumentLocator(ctxt->userData,
7644 &xmlDefaultSAXLocator);
7645 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7646 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7647 (!ctxt->disableSAX))
7648 ctxt->sax->startDocument(ctxt->userData);
7649 ctxt->instate = XML_PARSER_MISC;
7650#ifdef DEBUG_PUSH
7651 xmlGenericError(xmlGenericErrorContext,
7652 "PP: entering MISC\n");
7653#endif
7654 }
7655 break;
7656 case XML_PARSER_MISC:
7657 SKIP_BLANKS;
7658 if (ctxt->input->buf == NULL)
7659 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7660 else
7661 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7662 if (avail < 2)
7663 goto done;
7664 cur = ctxt->input->cur[0];
7665 next = ctxt->input->cur[1];
7666 if ((cur == '<') && (next == '?')) {
7667 if ((!terminate) &&
7668 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7669 goto done;
7670#ifdef DEBUG_PUSH
7671 xmlGenericError(xmlGenericErrorContext,
7672 "PP: Parsing PI\n");
7673#endif
7674 xmlParsePI(ctxt);
7675 } else if ((cur == '<') && (next == '!') &&
7676 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7677 if ((!terminate) &&
7678 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7679 goto done;
7680#ifdef DEBUG_PUSH
7681 xmlGenericError(xmlGenericErrorContext,
7682 "PP: Parsing Comment\n");
7683#endif
7684 xmlParseComment(ctxt);
7685 ctxt->instate = XML_PARSER_MISC;
7686 } else if ((cur == '<') && (next == '!') &&
7687 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7688 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7689 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7690 (ctxt->input->cur[8] == 'E')) {
7691 if ((!terminate) &&
7692 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7693 goto done;
7694#ifdef DEBUG_PUSH
7695 xmlGenericError(xmlGenericErrorContext,
7696 "PP: Parsing internal subset\n");
7697#endif
7698 ctxt->inSubset = 1;
7699 xmlParseDocTypeDecl(ctxt);
7700 if (RAW == '[') {
7701 ctxt->instate = XML_PARSER_DTD;
7702#ifdef DEBUG_PUSH
7703 xmlGenericError(xmlGenericErrorContext,
7704 "PP: entering DTD\n");
7705#endif
7706 } else {
7707 /*
7708 * Create and update the external subset.
7709 */
7710 ctxt->inSubset = 2;
7711 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7712 (ctxt->sax->externalSubset != NULL))
7713 ctxt->sax->externalSubset(ctxt->userData,
7714 ctxt->intSubName, ctxt->extSubSystem,
7715 ctxt->extSubURI);
7716 ctxt->inSubset = 0;
7717 ctxt->instate = XML_PARSER_PROLOG;
7718#ifdef DEBUG_PUSH
7719 xmlGenericError(xmlGenericErrorContext,
7720 "PP: entering PROLOG\n");
7721#endif
7722 }
7723 } else if ((cur == '<') && (next == '!') &&
7724 (avail < 9)) {
7725 goto done;
7726 } else {
7727 ctxt->instate = XML_PARSER_START_TAG;
7728#ifdef DEBUG_PUSH
7729 xmlGenericError(xmlGenericErrorContext,
7730 "PP: entering START_TAG\n");
7731#endif
7732 }
7733 break;
7734 case XML_PARSER_IGNORE:
7735 xmlGenericError(xmlGenericErrorContext,
7736 "PP: internal error, state == IGNORE");
7737 ctxt->instate = XML_PARSER_DTD;
7738#ifdef DEBUG_PUSH
7739 xmlGenericError(xmlGenericErrorContext,
7740 "PP: entering DTD\n");
7741#endif
7742 break;
7743 case XML_PARSER_PROLOG:
7744 SKIP_BLANKS;
7745 if (ctxt->input->buf == NULL)
7746 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7747 else
7748 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7749 if (avail < 2)
7750 goto done;
7751 cur = ctxt->input->cur[0];
7752 next = ctxt->input->cur[1];
7753 if ((cur == '<') && (next == '?')) {
7754 if ((!terminate) &&
7755 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7756 goto done;
7757#ifdef DEBUG_PUSH
7758 xmlGenericError(xmlGenericErrorContext,
7759 "PP: Parsing PI\n");
7760#endif
7761 xmlParsePI(ctxt);
7762 } else if ((cur == '<') && (next == '!') &&
7763 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7764 if ((!terminate) &&
7765 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7766 goto done;
7767#ifdef DEBUG_PUSH
7768 xmlGenericError(xmlGenericErrorContext,
7769 "PP: Parsing Comment\n");
7770#endif
7771 xmlParseComment(ctxt);
7772 ctxt->instate = XML_PARSER_PROLOG;
7773 } else if ((cur == '<') && (next == '!') &&
7774 (avail < 4)) {
7775 goto done;
7776 } else {
7777 ctxt->instate = XML_PARSER_START_TAG;
7778#ifdef DEBUG_PUSH
7779 xmlGenericError(xmlGenericErrorContext,
7780 "PP: entering START_TAG\n");
7781#endif
7782 }
7783 break;
7784 case XML_PARSER_EPILOG:
7785 SKIP_BLANKS;
7786 if (ctxt->input->buf == NULL)
7787 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7788 else
7789 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7790 if (avail < 2)
7791 goto done;
7792 cur = ctxt->input->cur[0];
7793 next = ctxt->input->cur[1];
7794 if ((cur == '<') && (next == '?')) {
7795 if ((!terminate) &&
7796 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7797 goto done;
7798#ifdef DEBUG_PUSH
7799 xmlGenericError(xmlGenericErrorContext,
7800 "PP: Parsing PI\n");
7801#endif
7802 xmlParsePI(ctxt);
7803 ctxt->instate = XML_PARSER_EPILOG;
7804 } else if ((cur == '<') && (next == '!') &&
7805 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7806 if ((!terminate) &&
7807 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7808 goto done;
7809#ifdef DEBUG_PUSH
7810 xmlGenericError(xmlGenericErrorContext,
7811 "PP: Parsing Comment\n");
7812#endif
7813 xmlParseComment(ctxt);
7814 ctxt->instate = XML_PARSER_EPILOG;
7815 } else if ((cur == '<') && (next == '!') &&
7816 (avail < 4)) {
7817 goto done;
7818 } else {
7819 ctxt->errNo = XML_ERR_DOCUMENT_END;
7820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7821 ctxt->sax->error(ctxt->userData,
7822 "Extra content at the end of the document\n");
7823 ctxt->wellFormed = 0;
7824 ctxt->disableSAX = 1;
7825 ctxt->instate = XML_PARSER_EOF;
7826#ifdef DEBUG_PUSH
7827 xmlGenericError(xmlGenericErrorContext,
7828 "PP: entering EOF\n");
7829#endif
7830 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7831 (!ctxt->disableSAX))
7832 ctxt->sax->endDocument(ctxt->userData);
7833 goto done;
7834 }
7835 break;
7836 case XML_PARSER_START_TAG: {
7837 xmlChar *name, *oldname;
7838
7839 if ((avail < 2) && (ctxt->inputNr == 1))
7840 goto done;
7841 cur = ctxt->input->cur[0];
7842 if (cur != '<') {
7843 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7844 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7845 ctxt->sax->error(ctxt->userData,
7846 "Start tag expect, '<' not found\n");
7847 ctxt->wellFormed = 0;
7848 ctxt->disableSAX = 1;
7849 ctxt->instate = XML_PARSER_EOF;
7850#ifdef DEBUG_PUSH
7851 xmlGenericError(xmlGenericErrorContext,
7852 "PP: entering EOF\n");
7853#endif
7854 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7855 (!ctxt->disableSAX))
7856 ctxt->sax->endDocument(ctxt->userData);
7857 goto done;
7858 }
7859 if ((!terminate) &&
7860 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7861 goto done;
7862 if (ctxt->spaceNr == 0)
7863 spacePush(ctxt, -1);
7864 else
7865 spacePush(ctxt, *ctxt->space);
7866 name = xmlParseStartTag(ctxt);
7867 if (name == NULL) {
7868 spacePop(ctxt);
7869 ctxt->instate = XML_PARSER_EOF;
7870#ifdef DEBUG_PUSH
7871 xmlGenericError(xmlGenericErrorContext,
7872 "PP: entering EOF\n");
7873#endif
7874 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7875 (!ctxt->disableSAX))
7876 ctxt->sax->endDocument(ctxt->userData);
7877 goto done;
7878 }
7879 namePush(ctxt, xmlStrdup(name));
7880
7881 /*
7882 * [ VC: Root Element Type ]
7883 * The Name in the document type declaration must match
7884 * the element type of the root element.
7885 */
7886 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7887 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7888 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7889
7890 /*
7891 * Check for an Empty Element.
7892 */
7893 if ((RAW == '/') && (NXT(1) == '>')) {
7894 SKIP(2);
7895 if ((ctxt->sax != NULL) &&
7896 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7897 ctxt->sax->endElement(ctxt->userData, name);
7898 xmlFree(name);
7899 oldname = namePop(ctxt);
7900 spacePop(ctxt);
7901 if (oldname != NULL) {
7902#ifdef DEBUG_STACK
7903 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7904#endif
7905 xmlFree(oldname);
7906 }
7907 if (ctxt->name == NULL) {
7908 ctxt->instate = XML_PARSER_EPILOG;
7909#ifdef DEBUG_PUSH
7910 xmlGenericError(xmlGenericErrorContext,
7911 "PP: entering EPILOG\n");
7912#endif
7913 } else {
7914 ctxt->instate = XML_PARSER_CONTENT;
7915#ifdef DEBUG_PUSH
7916 xmlGenericError(xmlGenericErrorContext,
7917 "PP: entering CONTENT\n");
7918#endif
7919 }
7920 break;
7921 }
7922 if (RAW == '>') {
7923 NEXT;
7924 } else {
7925 ctxt->errNo = XML_ERR_GT_REQUIRED;
7926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7927 ctxt->sax->error(ctxt->userData,
7928 "Couldn't find end of Start Tag %s\n",
7929 name);
7930 ctxt->wellFormed = 0;
7931 ctxt->disableSAX = 1;
7932
7933 /*
7934 * end of parsing of this node.
7935 */
7936 nodePop(ctxt);
7937 oldname = namePop(ctxt);
7938 spacePop(ctxt);
7939 if (oldname != NULL) {
7940#ifdef DEBUG_STACK
7941 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7942#endif
7943 xmlFree(oldname);
7944 }
7945 }
7946 xmlFree(name);
7947 ctxt->instate = XML_PARSER_CONTENT;
7948#ifdef DEBUG_PUSH
7949 xmlGenericError(xmlGenericErrorContext,
7950 "PP: entering CONTENT\n");
7951#endif
7952 break;
7953 }
7954 case XML_PARSER_CONTENT: {
7955 const xmlChar *test;
7956 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00007957 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00007958
7959 /*
7960 * Handle preparsed entities and charRef
7961 */
7962 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007963 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00007964
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007965 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00007966 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7967 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007968 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00007969 ctxt->token = 0;
7970 }
7971 if ((avail < 2) && (ctxt->inputNr == 1))
7972 goto done;
7973 cur = ctxt->input->cur[0];
7974 next = ctxt->input->cur[1];
7975
7976 test = CUR_PTR;
7977 cons = ctxt->input->consumed;
7978 tok = ctxt->token;
7979 if ((cur == '<') && (next == '?')) {
7980 if ((!terminate) &&
7981 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7982 goto done;
7983#ifdef DEBUG_PUSH
7984 xmlGenericError(xmlGenericErrorContext,
7985 "PP: Parsing PI\n");
7986#endif
7987 xmlParsePI(ctxt);
7988 } else if ((cur == '<') && (next == '!') &&
7989 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7990 if ((!terminate) &&
7991 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7992 goto done;
7993#ifdef DEBUG_PUSH
7994 xmlGenericError(xmlGenericErrorContext,
7995 "PP: Parsing Comment\n");
7996#endif
7997 xmlParseComment(ctxt);
7998 ctxt->instate = XML_PARSER_CONTENT;
7999 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8000 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8001 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8002 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8003 (ctxt->input->cur[8] == '[')) {
8004 SKIP(9);
8005 ctxt->instate = XML_PARSER_CDATA_SECTION;
8006#ifdef DEBUG_PUSH
8007 xmlGenericError(xmlGenericErrorContext,
8008 "PP: entering CDATA_SECTION\n");
8009#endif
8010 break;
8011 } else if ((cur == '<') && (next == '!') &&
8012 (avail < 9)) {
8013 goto done;
8014 } else if ((cur == '<') && (next == '/')) {
8015 ctxt->instate = XML_PARSER_END_TAG;
8016#ifdef DEBUG_PUSH
8017 xmlGenericError(xmlGenericErrorContext,
8018 "PP: entering END_TAG\n");
8019#endif
8020 break;
8021 } else if (cur == '<') {
8022 ctxt->instate = XML_PARSER_START_TAG;
8023#ifdef DEBUG_PUSH
8024 xmlGenericError(xmlGenericErrorContext,
8025 "PP: entering START_TAG\n");
8026#endif
8027 break;
8028 } else if (cur == '&') {
8029 if ((!terminate) &&
8030 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8031 goto done;
8032#ifdef DEBUG_PUSH
8033 xmlGenericError(xmlGenericErrorContext,
8034 "PP: Parsing Reference\n");
8035#endif
8036 xmlParseReference(ctxt);
8037 } else {
8038 /* TODO Avoid the extra copy, handle directly !!! */
8039 /*
8040 * Goal of the following test is:
8041 * - minimize calls to the SAX 'character' callback
8042 * when they are mergeable
8043 * - handle an problem for isBlank when we only parse
8044 * a sequence of blank chars and the next one is
8045 * not available to check against '<' presence.
8046 * - tries to homogenize the differences in SAX
8047 * callbacks beween the push and pull versions
8048 * of the parser.
8049 */
8050 if ((ctxt->inputNr == 1) &&
8051 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8052 if ((!terminate) &&
8053 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8054 goto done;
8055 }
8056 ctxt->checkIndex = 0;
8057#ifdef DEBUG_PUSH
8058 xmlGenericError(xmlGenericErrorContext,
8059 "PP: Parsing char data\n");
8060#endif
8061 xmlParseCharData(ctxt, 0);
8062 }
8063 /*
8064 * Pop-up of finished entities.
8065 */
8066 while ((RAW == 0) && (ctxt->inputNr > 1))
8067 xmlPopInput(ctxt);
8068 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8069 (tok == ctxt->token)) {
8070 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8072 ctxt->sax->error(ctxt->userData,
8073 "detected an error in element content\n");
8074 ctxt->wellFormed = 0;
8075 ctxt->disableSAX = 1;
8076 ctxt->instate = XML_PARSER_EOF;
8077 break;
8078 }
8079 break;
8080 }
8081 case XML_PARSER_CDATA_SECTION: {
8082 /*
8083 * The Push mode need to have the SAX callback for
8084 * cdataBlock merge back contiguous callbacks.
8085 */
8086 int base;
8087
8088 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8089 if (base < 0) {
8090 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8091 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8092 if (ctxt->sax->cdataBlock != NULL)
8093 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8094 XML_PARSER_BIG_BUFFER_SIZE);
8095 }
8096 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8097 ctxt->checkIndex = 0;
8098 }
8099 goto done;
8100 } else {
8101 if ((ctxt->sax != NULL) && (base > 0) &&
8102 (!ctxt->disableSAX)) {
8103 if (ctxt->sax->cdataBlock != NULL)
8104 ctxt->sax->cdataBlock(ctxt->userData,
8105 ctxt->input->cur, base);
8106 }
8107 SKIP(base + 3);
8108 ctxt->checkIndex = 0;
8109 ctxt->instate = XML_PARSER_CONTENT;
8110#ifdef DEBUG_PUSH
8111 xmlGenericError(xmlGenericErrorContext,
8112 "PP: entering CONTENT\n");
8113#endif
8114 }
8115 break;
8116 }
8117 case XML_PARSER_END_TAG:
8118 if (avail < 2)
8119 goto done;
8120 if ((!terminate) &&
8121 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8122 goto done;
8123 xmlParseEndTag(ctxt);
8124 if (ctxt->name == NULL) {
8125 ctxt->instate = XML_PARSER_EPILOG;
8126#ifdef DEBUG_PUSH
8127 xmlGenericError(xmlGenericErrorContext,
8128 "PP: entering EPILOG\n");
8129#endif
8130 } else {
8131 ctxt->instate = XML_PARSER_CONTENT;
8132#ifdef DEBUG_PUSH
8133 xmlGenericError(xmlGenericErrorContext,
8134 "PP: entering CONTENT\n");
8135#endif
8136 }
8137 break;
8138 case XML_PARSER_DTD: {
8139 /*
8140 * Sorry but progressive parsing of the internal subset
8141 * is not expected to be supported. We first check that
8142 * the full content of the internal subset is available and
8143 * the parsing is launched only at that point.
8144 * Internal subset ends up with "']' S? '>'" in an unescaped
8145 * section and not in a ']]>' sequence which are conditional
8146 * sections (whoever argued to keep that crap in XML deserve
8147 * a place in hell !).
8148 */
8149 int base, i;
8150 xmlChar *buf;
8151 xmlChar quote = 0;
8152
8153 base = ctxt->input->cur - ctxt->input->base;
8154 if (base < 0) return(0);
8155 if (ctxt->checkIndex > base)
8156 base = ctxt->checkIndex;
8157 buf = ctxt->input->buf->buffer->content;
8158 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8159 base++) {
8160 if (quote != 0) {
8161 if (buf[base] == quote)
8162 quote = 0;
8163 continue;
8164 }
8165 if (buf[base] == '"') {
8166 quote = '"';
8167 continue;
8168 }
8169 if (buf[base] == '\'') {
8170 quote = '\'';
8171 continue;
8172 }
8173 if (buf[base] == ']') {
8174 if ((unsigned int) base +1 >=
8175 ctxt->input->buf->buffer->use)
8176 break;
8177 if (buf[base + 1] == ']') {
8178 /* conditional crap, skip both ']' ! */
8179 base++;
8180 continue;
8181 }
8182 for (i = 0;
8183 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8184 i++) {
8185 if (buf[base + i] == '>')
8186 goto found_end_int_subset;
8187 }
8188 break;
8189 }
8190 }
8191 /*
8192 * We didn't found the end of the Internal subset
8193 */
8194 if (quote == 0)
8195 ctxt->checkIndex = base;
8196#ifdef DEBUG_PUSH
8197 if (next == 0)
8198 xmlGenericError(xmlGenericErrorContext,
8199 "PP: lookup of int subset end filed\n");
8200#endif
8201 goto done;
8202
8203found_end_int_subset:
8204 xmlParseInternalSubset(ctxt);
8205 ctxt->inSubset = 2;
8206 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8207 (ctxt->sax->externalSubset != NULL))
8208 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8209 ctxt->extSubSystem, ctxt->extSubURI);
8210 ctxt->inSubset = 0;
8211 ctxt->instate = XML_PARSER_PROLOG;
8212 ctxt->checkIndex = 0;
8213#ifdef DEBUG_PUSH
8214 xmlGenericError(xmlGenericErrorContext,
8215 "PP: entering PROLOG\n");
8216#endif
8217 break;
8218 }
8219 case XML_PARSER_COMMENT:
8220 xmlGenericError(xmlGenericErrorContext,
8221 "PP: internal error, state == COMMENT\n");
8222 ctxt->instate = XML_PARSER_CONTENT;
8223#ifdef DEBUG_PUSH
8224 xmlGenericError(xmlGenericErrorContext,
8225 "PP: entering CONTENT\n");
8226#endif
8227 break;
8228 case XML_PARSER_PI:
8229 xmlGenericError(xmlGenericErrorContext,
8230 "PP: internal error, state == PI\n");
8231 ctxt->instate = XML_PARSER_CONTENT;
8232#ifdef DEBUG_PUSH
8233 xmlGenericError(xmlGenericErrorContext,
8234 "PP: entering CONTENT\n");
8235#endif
8236 break;
8237 case XML_PARSER_ENTITY_DECL:
8238 xmlGenericError(xmlGenericErrorContext,
8239 "PP: internal error, state == ENTITY_DECL\n");
8240 ctxt->instate = XML_PARSER_DTD;
8241#ifdef DEBUG_PUSH
8242 xmlGenericError(xmlGenericErrorContext,
8243 "PP: entering DTD\n");
8244#endif
8245 break;
8246 case XML_PARSER_ENTITY_VALUE:
8247 xmlGenericError(xmlGenericErrorContext,
8248 "PP: internal error, state == ENTITY_VALUE\n");
8249 ctxt->instate = XML_PARSER_CONTENT;
8250#ifdef DEBUG_PUSH
8251 xmlGenericError(xmlGenericErrorContext,
8252 "PP: entering DTD\n");
8253#endif
8254 break;
8255 case XML_PARSER_ATTRIBUTE_VALUE:
8256 xmlGenericError(xmlGenericErrorContext,
8257 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8258 ctxt->instate = XML_PARSER_START_TAG;
8259#ifdef DEBUG_PUSH
8260 xmlGenericError(xmlGenericErrorContext,
8261 "PP: entering START_TAG\n");
8262#endif
8263 break;
8264 case XML_PARSER_SYSTEM_LITERAL:
8265 xmlGenericError(xmlGenericErrorContext,
8266 "PP: internal error, state == SYSTEM_LITERAL\n");
8267 ctxt->instate = XML_PARSER_START_TAG;
8268#ifdef DEBUG_PUSH
8269 xmlGenericError(xmlGenericErrorContext,
8270 "PP: entering START_TAG\n");
8271#endif
8272 break;
8273 }
8274 }
8275done:
8276#ifdef DEBUG_PUSH
8277 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8278#endif
8279 return(ret);
8280}
8281
8282/**
Owen Taylor3473f882001-02-23 17:55:21 +00008283 * xmlParseChunk:
8284 * @ctxt: an XML parser context
8285 * @chunk: an char array
8286 * @size: the size in byte of the chunk
8287 * @terminate: last chunk indicator
8288 *
8289 * Parse a Chunk of memory
8290 *
8291 * Returns zero if no error, the xmlParserErrors otherwise.
8292 */
8293int
8294xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8295 int terminate) {
8296 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8297 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8298 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8299 int cur = ctxt->input->cur - ctxt->input->base;
8300
8301 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8302 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8303 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008304 ctxt->input->end =
8305 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008306#ifdef DEBUG_PUSH
8307 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8308#endif
8309
8310 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8311 xmlParseTryOrFinish(ctxt, terminate);
8312 } else if (ctxt->instate != XML_PARSER_EOF) {
8313 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8314 xmlParserInputBufferPtr in = ctxt->input->buf;
8315 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8316 (in->raw != NULL)) {
8317 int nbchars;
8318
8319 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8320 if (nbchars < 0) {
8321 xmlGenericError(xmlGenericErrorContext,
8322 "xmlParseChunk: encoder error\n");
8323 return(XML_ERR_INVALID_ENCODING);
8324 }
8325 }
8326 }
8327 }
8328 xmlParseTryOrFinish(ctxt, terminate);
8329 if (terminate) {
8330 /*
8331 * Check for termination
8332 */
8333 if ((ctxt->instate != XML_PARSER_EOF) &&
8334 (ctxt->instate != XML_PARSER_EPILOG)) {
8335 ctxt->errNo = XML_ERR_DOCUMENT_END;
8336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8337 ctxt->sax->error(ctxt->userData,
8338 "Extra content at the end of the document\n");
8339 ctxt->wellFormed = 0;
8340 ctxt->disableSAX = 1;
8341 }
8342 if (ctxt->instate != XML_PARSER_EOF) {
8343 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8344 (!ctxt->disableSAX))
8345 ctxt->sax->endDocument(ctxt->userData);
8346 }
8347 ctxt->instate = XML_PARSER_EOF;
8348 }
8349 return((xmlParserErrors) ctxt->errNo);
8350}
8351
8352/************************************************************************
8353 * *
8354 * I/O front end functions to the parser *
8355 * *
8356 ************************************************************************/
8357
8358/**
8359 * xmlStopParser:
8360 * @ctxt: an XML parser context
8361 *
8362 * Blocks further parser processing
8363 */
8364void
8365xmlStopParser(xmlParserCtxtPtr ctxt) {
8366 ctxt->instate = XML_PARSER_EOF;
8367 if (ctxt->input != NULL)
8368 ctxt->input->cur = BAD_CAST"";
8369}
8370
8371/**
8372 * xmlCreatePushParserCtxt:
8373 * @sax: a SAX handler
8374 * @user_data: The user data returned on SAX callbacks
8375 * @chunk: a pointer to an array of chars
8376 * @size: number of chars in the array
8377 * @filename: an optional file name or URI
8378 *
8379 * Create a parser context for using the XML parser in push mode
8380 * To allow content encoding detection, @size should be >= 4
8381 * The value of @filename is used for fetching external entities
8382 * and error/warning reports.
8383 *
8384 * Returns the new parser context or NULL
8385 */
8386xmlParserCtxtPtr
8387xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8388 const char *chunk, int size, const char *filename) {
8389 xmlParserCtxtPtr ctxt;
8390 xmlParserInputPtr inputStream;
8391 xmlParserInputBufferPtr buf;
8392 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8393
8394 /*
8395 * plug some encoding conversion routines
8396 */
8397 if ((chunk != NULL) && (size >= 4))
8398 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8399
8400 buf = xmlAllocParserInputBuffer(enc);
8401 if (buf == NULL) return(NULL);
8402
8403 ctxt = xmlNewParserCtxt();
8404 if (ctxt == NULL) {
8405 xmlFree(buf);
8406 return(NULL);
8407 }
8408 if (sax != NULL) {
8409 if (ctxt->sax != &xmlDefaultSAXHandler)
8410 xmlFree(ctxt->sax);
8411 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8412 if (ctxt->sax == NULL) {
8413 xmlFree(buf);
8414 xmlFree(ctxt);
8415 return(NULL);
8416 }
8417 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8418 if (user_data != NULL)
8419 ctxt->userData = user_data;
8420 }
8421 if (filename == NULL) {
8422 ctxt->directory = NULL;
8423 } else {
8424 ctxt->directory = xmlParserGetDirectory(filename);
8425 }
8426
8427 inputStream = xmlNewInputStream(ctxt);
8428 if (inputStream == NULL) {
8429 xmlFreeParserCtxt(ctxt);
8430 return(NULL);
8431 }
8432
8433 if (filename == NULL)
8434 inputStream->filename = NULL;
8435 else
8436 inputStream->filename = xmlMemStrdup(filename);
8437 inputStream->buf = buf;
8438 inputStream->base = inputStream->buf->buffer->content;
8439 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008440 inputStream->end =
8441 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008442 if (enc != XML_CHAR_ENCODING_NONE) {
8443 xmlSwitchEncoding(ctxt, enc);
8444 }
8445
8446 inputPush(ctxt, inputStream);
8447
8448 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8449 (ctxt->input->buf != NULL)) {
8450 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8451#ifdef DEBUG_PUSH
8452 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8453#endif
8454 }
8455
8456 return(ctxt);
8457}
8458
8459/**
8460 * xmlCreateIOParserCtxt:
8461 * @sax: a SAX handler
8462 * @user_data: The user data returned on SAX callbacks
8463 * @ioread: an I/O read function
8464 * @ioclose: an I/O close function
8465 * @ioctx: an I/O handler
8466 * @enc: the charset encoding if known
8467 *
8468 * Create a parser context for using the XML parser with an existing
8469 * I/O stream
8470 *
8471 * Returns the new parser context or NULL
8472 */
8473xmlParserCtxtPtr
8474xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8475 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8476 void *ioctx, xmlCharEncoding enc) {
8477 xmlParserCtxtPtr ctxt;
8478 xmlParserInputPtr inputStream;
8479 xmlParserInputBufferPtr buf;
8480
8481 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8482 if (buf == NULL) return(NULL);
8483
8484 ctxt = xmlNewParserCtxt();
8485 if (ctxt == NULL) {
8486 xmlFree(buf);
8487 return(NULL);
8488 }
8489 if (sax != NULL) {
8490 if (ctxt->sax != &xmlDefaultSAXHandler)
8491 xmlFree(ctxt->sax);
8492 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8493 if (ctxt->sax == NULL) {
8494 xmlFree(buf);
8495 xmlFree(ctxt);
8496 return(NULL);
8497 }
8498 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8499 if (user_data != NULL)
8500 ctxt->userData = user_data;
8501 }
8502
8503 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8504 if (inputStream == NULL) {
8505 xmlFreeParserCtxt(ctxt);
8506 return(NULL);
8507 }
8508 inputPush(ctxt, inputStream);
8509
8510 return(ctxt);
8511}
8512
8513/************************************************************************
8514 * *
8515 * Front ends when parsing a Dtd *
8516 * *
8517 ************************************************************************/
8518
8519/**
8520 * xmlIOParseDTD:
8521 * @sax: the SAX handler block or NULL
8522 * @input: an Input Buffer
8523 * @enc: the charset encoding if known
8524 *
8525 * Load and parse a DTD
8526 *
8527 * Returns the resulting xmlDtdPtr or NULL in case of error.
8528 * @input will be freed at parsing end.
8529 */
8530
8531xmlDtdPtr
8532xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8533 xmlCharEncoding enc) {
8534 xmlDtdPtr ret = NULL;
8535 xmlParserCtxtPtr ctxt;
8536 xmlParserInputPtr pinput = NULL;
8537
8538 if (input == NULL)
8539 return(NULL);
8540
8541 ctxt = xmlNewParserCtxt();
8542 if (ctxt == NULL) {
8543 return(NULL);
8544 }
8545
8546 /*
8547 * Set-up the SAX context
8548 */
8549 if (sax != NULL) {
8550 if (ctxt->sax != NULL)
8551 xmlFree(ctxt->sax);
8552 ctxt->sax = sax;
8553 ctxt->userData = NULL;
8554 }
8555
8556 /*
8557 * generate a parser input from the I/O handler
8558 */
8559
8560 pinput = xmlNewIOInputStream(ctxt, input, enc);
8561 if (pinput == NULL) {
8562 if (sax != NULL) ctxt->sax = NULL;
8563 xmlFreeParserCtxt(ctxt);
8564 return(NULL);
8565 }
8566
8567 /*
8568 * plug some encoding conversion routines here.
8569 */
8570 xmlPushInput(ctxt, pinput);
8571
8572 pinput->filename = NULL;
8573 pinput->line = 1;
8574 pinput->col = 1;
8575 pinput->base = ctxt->input->cur;
8576 pinput->cur = ctxt->input->cur;
8577 pinput->free = NULL;
8578
8579 /*
8580 * let's parse that entity knowing it's an external subset.
8581 */
8582 ctxt->inSubset = 2;
8583 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8584 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8585 BAD_CAST "none", BAD_CAST "none");
8586 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8587
8588 if (ctxt->myDoc != NULL) {
8589 if (ctxt->wellFormed) {
8590 ret = ctxt->myDoc->extSubset;
8591 ctxt->myDoc->extSubset = NULL;
8592 } else {
8593 ret = NULL;
8594 }
8595 xmlFreeDoc(ctxt->myDoc);
8596 ctxt->myDoc = NULL;
8597 }
8598 if (sax != NULL) ctxt->sax = NULL;
8599 xmlFreeParserCtxt(ctxt);
8600
8601 return(ret);
8602}
8603
8604/**
8605 * xmlSAXParseDTD:
8606 * @sax: the SAX handler block
8607 * @ExternalID: a NAME* containing the External ID of the DTD
8608 * @SystemID: a NAME* containing the URL to the DTD
8609 *
8610 * Load and parse an external subset.
8611 *
8612 * Returns the resulting xmlDtdPtr or NULL in case of error.
8613 */
8614
8615xmlDtdPtr
8616xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8617 const xmlChar *SystemID) {
8618 xmlDtdPtr ret = NULL;
8619 xmlParserCtxtPtr ctxt;
8620 xmlParserInputPtr input = NULL;
8621 xmlCharEncoding enc;
8622
8623 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8624
8625 ctxt = xmlNewParserCtxt();
8626 if (ctxt == NULL) {
8627 return(NULL);
8628 }
8629
8630 /*
8631 * Set-up the SAX context
8632 */
8633 if (sax != NULL) {
8634 if (ctxt->sax != NULL)
8635 xmlFree(ctxt->sax);
8636 ctxt->sax = sax;
8637 ctxt->userData = NULL;
8638 }
8639
8640 /*
8641 * Ask the Entity resolver to load the damn thing
8642 */
8643
8644 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8645 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8646 if (input == NULL) {
8647 if (sax != NULL) ctxt->sax = NULL;
8648 xmlFreeParserCtxt(ctxt);
8649 return(NULL);
8650 }
8651
8652 /*
8653 * plug some encoding conversion routines here.
8654 */
8655 xmlPushInput(ctxt, input);
8656 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8657 xmlSwitchEncoding(ctxt, enc);
8658
8659 if (input->filename == NULL)
8660 input->filename = (char *) xmlStrdup(SystemID);
8661 input->line = 1;
8662 input->col = 1;
8663 input->base = ctxt->input->cur;
8664 input->cur = ctxt->input->cur;
8665 input->free = NULL;
8666
8667 /*
8668 * let's parse that entity knowing it's an external subset.
8669 */
8670 ctxt->inSubset = 2;
8671 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8672 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8673 ExternalID, SystemID);
8674 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8675
8676 if (ctxt->myDoc != NULL) {
8677 if (ctxt->wellFormed) {
8678 ret = ctxt->myDoc->extSubset;
8679 ctxt->myDoc->extSubset = NULL;
8680 } else {
8681 ret = NULL;
8682 }
8683 xmlFreeDoc(ctxt->myDoc);
8684 ctxt->myDoc = NULL;
8685 }
8686 if (sax != NULL) ctxt->sax = NULL;
8687 xmlFreeParserCtxt(ctxt);
8688
8689 return(ret);
8690}
8691
8692/**
8693 * xmlParseDTD:
8694 * @ExternalID: a NAME* containing the External ID of the DTD
8695 * @SystemID: a NAME* containing the URL to the DTD
8696 *
8697 * Load and parse an external subset.
8698 *
8699 * Returns the resulting xmlDtdPtr or NULL in case of error.
8700 */
8701
8702xmlDtdPtr
8703xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8704 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8705}
8706
8707/************************************************************************
8708 * *
8709 * Front ends when parsing an Entity *
8710 * *
8711 ************************************************************************/
8712
8713/**
Owen Taylor3473f882001-02-23 17:55:21 +00008714 * xmlParseCtxtExternalEntity:
8715 * @ctx: the existing parsing context
8716 * @URL: the URL for the entity to load
8717 * @ID: the System ID for the entity to load
8718 * @list: the return value for the set of parsed nodes
8719 *
8720 * Parse an external general entity within an existing parsing context
8721 * An external general parsed entity is well-formed if it matches the
8722 * production labeled extParsedEnt.
8723 *
8724 * [78] extParsedEnt ::= TextDecl? content
8725 *
8726 * Returns 0 if the entity is well formed, -1 in case of args problem and
8727 * the parser error code otherwise
8728 */
8729
8730int
8731xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8732 const xmlChar *ID, xmlNodePtr *list) {
8733 xmlParserCtxtPtr ctxt;
8734 xmlDocPtr newDoc;
8735 xmlSAXHandlerPtr oldsax = NULL;
8736 int ret = 0;
8737
8738 if (ctx->depth > 40) {
8739 return(XML_ERR_ENTITY_LOOP);
8740 }
8741
8742 if (list != NULL)
8743 *list = NULL;
8744 if ((URL == NULL) && (ID == NULL))
8745 return(-1);
8746 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8747 return(-1);
8748
8749
8750 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8751 if (ctxt == NULL) return(-1);
8752 ctxt->userData = ctxt;
8753 oldsax = ctxt->sax;
8754 ctxt->sax = ctx->sax;
8755 newDoc = xmlNewDoc(BAD_CAST "1.0");
8756 if (newDoc == NULL) {
8757 xmlFreeParserCtxt(ctxt);
8758 return(-1);
8759 }
8760 if (ctx->myDoc != NULL) {
8761 newDoc->intSubset = ctx->myDoc->intSubset;
8762 newDoc->extSubset = ctx->myDoc->extSubset;
8763 }
8764 if (ctx->myDoc->URL != NULL) {
8765 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8766 }
8767 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8768 if (newDoc->children == NULL) {
8769 ctxt->sax = oldsax;
8770 xmlFreeParserCtxt(ctxt);
8771 newDoc->intSubset = NULL;
8772 newDoc->extSubset = NULL;
8773 xmlFreeDoc(newDoc);
8774 return(-1);
8775 }
8776 nodePush(ctxt, newDoc->children);
8777 if (ctx->myDoc == NULL) {
8778 ctxt->myDoc = newDoc;
8779 } else {
8780 ctxt->myDoc = ctx->myDoc;
8781 newDoc->children->doc = ctx->myDoc;
8782 }
8783
8784 /*
8785 * Parse a possible text declaration first
8786 */
8787 GROW;
8788 if ((RAW == '<') && (NXT(1) == '?') &&
8789 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8790 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8791 xmlParseTextDecl(ctxt);
8792 }
8793
8794 /*
8795 * Doing validity checking on chunk doesn't make sense
8796 */
8797 ctxt->instate = XML_PARSER_CONTENT;
8798 ctxt->validate = ctx->validate;
8799 ctxt->loadsubset = ctx->loadsubset;
8800 ctxt->depth = ctx->depth + 1;
8801 ctxt->replaceEntities = ctx->replaceEntities;
8802 if (ctxt->validate) {
8803 ctxt->vctxt.error = ctx->vctxt.error;
8804 ctxt->vctxt.warning = ctx->vctxt.warning;
8805 /* Allocate the Node stack */
8806 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8807 if (ctxt->vctxt.nodeTab == NULL) {
8808 xmlGenericError(xmlGenericErrorContext,
8809 "xmlParseCtxtExternalEntity: out of memory\n");
8810 ctxt->validate = 0;
8811 ctxt->vctxt.error = NULL;
8812 ctxt->vctxt.warning = NULL;
8813 } else {
8814 ctxt->vctxt.nodeNr = 0;
8815 ctxt->vctxt.nodeMax = 4;
8816 ctxt->vctxt.node = NULL;
8817 }
8818 } else {
8819 ctxt->vctxt.error = NULL;
8820 ctxt->vctxt.warning = NULL;
8821 }
8822
8823 xmlParseContent(ctxt);
8824
8825 if ((RAW == '<') && (NXT(1) == '/')) {
8826 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8828 ctxt->sax->error(ctxt->userData,
8829 "chunk is not well balanced\n");
8830 ctxt->wellFormed = 0;
8831 ctxt->disableSAX = 1;
8832 } else if (RAW != 0) {
8833 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8834 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8835 ctxt->sax->error(ctxt->userData,
8836 "extra content at the end of well balanced chunk\n");
8837 ctxt->wellFormed = 0;
8838 ctxt->disableSAX = 1;
8839 }
8840 if (ctxt->node != newDoc->children) {
8841 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8843 ctxt->sax->error(ctxt->userData,
8844 "chunk is not well balanced\n");
8845 ctxt->wellFormed = 0;
8846 ctxt->disableSAX = 1;
8847 }
8848
8849 if (!ctxt->wellFormed) {
8850 if (ctxt->errNo == 0)
8851 ret = 1;
8852 else
8853 ret = ctxt->errNo;
8854 } else {
8855 if (list != NULL) {
8856 xmlNodePtr cur;
8857
8858 /*
8859 * Return the newly created nodeset after unlinking it from
8860 * they pseudo parent.
8861 */
8862 cur = newDoc->children->children;
8863 *list = cur;
8864 while (cur != NULL) {
8865 cur->parent = NULL;
8866 cur = cur->next;
8867 }
8868 newDoc->children->children = NULL;
8869 }
8870 ret = 0;
8871 }
8872 ctxt->sax = oldsax;
8873 xmlFreeParserCtxt(ctxt);
8874 newDoc->intSubset = NULL;
8875 newDoc->extSubset = NULL;
8876 xmlFreeDoc(newDoc);
8877
8878 return(ret);
8879}
8880
8881/**
8882 * xmlParseExternalEntity:
8883 * @doc: the document the chunk pertains to
8884 * @sax: the SAX handler bloc (possibly NULL)
8885 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8886 * @depth: Used for loop detection, use 0
8887 * @URL: the URL for the entity to load
8888 * @ID: the System ID for the entity to load
8889 * @list: the return value for the set of parsed nodes
8890 *
8891 * Parse an external general entity
8892 * An external general parsed entity is well-formed if it matches the
8893 * production labeled extParsedEnt.
8894 *
8895 * [78] extParsedEnt ::= TextDecl? content
8896 *
8897 * Returns 0 if the entity is well formed, -1 in case of args problem and
8898 * the parser error code otherwise
8899 */
8900
8901int
8902xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
8903 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
8904 xmlParserCtxtPtr ctxt;
8905 xmlDocPtr newDoc;
8906 xmlSAXHandlerPtr oldsax = NULL;
8907 int ret = 0;
8908
8909 if (depth > 40) {
8910 return(XML_ERR_ENTITY_LOOP);
8911 }
8912
8913
8914
8915 if (list != NULL)
8916 *list = NULL;
8917 if ((URL == NULL) && (ID == NULL))
8918 return(-1);
8919 if (doc == NULL) /* @@ relax but check for dereferences */
8920 return(-1);
8921
8922
8923 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8924 if (ctxt == NULL) return(-1);
8925 ctxt->userData = ctxt;
8926 if (sax != NULL) {
8927 oldsax = ctxt->sax;
8928 ctxt->sax = sax;
8929 if (user_data != NULL)
8930 ctxt->userData = user_data;
8931 }
8932 newDoc = xmlNewDoc(BAD_CAST "1.0");
8933 if (newDoc == NULL) {
8934 xmlFreeParserCtxt(ctxt);
8935 return(-1);
8936 }
8937 if (doc != NULL) {
8938 newDoc->intSubset = doc->intSubset;
8939 newDoc->extSubset = doc->extSubset;
8940 }
8941 if (doc->URL != NULL) {
8942 newDoc->URL = xmlStrdup(doc->URL);
8943 }
8944 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8945 if (newDoc->children == NULL) {
8946 if (sax != NULL)
8947 ctxt->sax = oldsax;
8948 xmlFreeParserCtxt(ctxt);
8949 newDoc->intSubset = NULL;
8950 newDoc->extSubset = NULL;
8951 xmlFreeDoc(newDoc);
8952 return(-1);
8953 }
8954 nodePush(ctxt, newDoc->children);
8955 if (doc == NULL) {
8956 ctxt->myDoc = newDoc;
8957 } else {
8958 ctxt->myDoc = doc;
8959 newDoc->children->doc = doc;
8960 }
8961
8962 /*
8963 * Parse a possible text declaration first
8964 */
8965 GROW;
8966 if ((RAW == '<') && (NXT(1) == '?') &&
8967 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8968 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8969 xmlParseTextDecl(ctxt);
8970 }
8971
8972 /*
8973 * Doing validity checking on chunk doesn't make sense
8974 */
8975 ctxt->instate = XML_PARSER_CONTENT;
8976 ctxt->validate = 0;
Daniel Veillarde470df72001-04-18 21:41:07 +00008977 ctxt->external = 2;
Owen Taylor3473f882001-02-23 17:55:21 +00008978 ctxt->loadsubset = 0;
8979 ctxt->depth = depth;
8980
8981 xmlParseContent(ctxt);
8982
8983 if ((RAW == '<') && (NXT(1) == '/')) {
8984 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8986 ctxt->sax->error(ctxt->userData,
8987 "chunk is not well balanced\n");
8988 ctxt->wellFormed = 0;
8989 ctxt->disableSAX = 1;
8990 } else if (RAW != 0) {
8991 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8993 ctxt->sax->error(ctxt->userData,
8994 "extra content at the end of well balanced chunk\n");
8995 ctxt->wellFormed = 0;
8996 ctxt->disableSAX = 1;
8997 }
8998 if (ctxt->node != newDoc->children) {
8999 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9001 ctxt->sax->error(ctxt->userData,
9002 "chunk is not well balanced\n");
9003 ctxt->wellFormed = 0;
9004 ctxt->disableSAX = 1;
9005 }
9006
9007 if (!ctxt->wellFormed) {
9008 if (ctxt->errNo == 0)
9009 ret = 1;
9010 else
9011 ret = ctxt->errNo;
9012 } else {
9013 if (list != NULL) {
9014 xmlNodePtr cur;
9015
9016 /*
9017 * Return the newly created nodeset after unlinking it from
9018 * they pseudo parent.
9019 */
9020 cur = newDoc->children->children;
9021 *list = cur;
9022 while (cur != NULL) {
9023 cur->parent = NULL;
9024 cur = cur->next;
9025 }
9026 newDoc->children->children = NULL;
9027 }
9028 ret = 0;
9029 }
9030 if (sax != NULL)
9031 ctxt->sax = oldsax;
9032 xmlFreeParserCtxt(ctxt);
9033 newDoc->intSubset = NULL;
9034 newDoc->extSubset = NULL;
9035 xmlFreeDoc(newDoc);
9036
9037 return(ret);
9038}
9039
9040/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009041 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009042 * @doc: the document the chunk pertains to
9043 * @sax: the SAX handler bloc (possibly NULL)
9044 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9045 * @depth: Used for loop detection, use 0
9046 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9047 * @list: the return value for the set of parsed nodes
9048 *
9049 * Parse a well-balanced chunk of an XML document
9050 * called by the parser
9051 * The allowed sequence for the Well Balanced Chunk is the one defined by
9052 * the content production in the XML grammar:
9053 *
9054 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9055 *
9056 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9057 * the parser error code otherwise
9058 */
9059
9060int
9061xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9062 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9063 xmlParserCtxtPtr ctxt;
9064 xmlDocPtr newDoc;
9065 xmlSAXHandlerPtr oldsax = NULL;
9066 int size;
9067 int ret = 0;
9068
9069 if (depth > 40) {
9070 return(XML_ERR_ENTITY_LOOP);
9071 }
9072
9073
9074 if (list != NULL)
9075 *list = NULL;
9076 if (string == NULL)
9077 return(-1);
9078
9079 size = xmlStrlen(string);
9080
9081 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9082 if (ctxt == NULL) return(-1);
9083 ctxt->userData = ctxt;
9084 if (sax != NULL) {
9085 oldsax = ctxt->sax;
9086 ctxt->sax = sax;
9087 if (user_data != NULL)
9088 ctxt->userData = user_data;
9089 }
9090 newDoc = xmlNewDoc(BAD_CAST "1.0");
9091 if (newDoc == NULL) {
9092 xmlFreeParserCtxt(ctxt);
9093 return(-1);
9094 }
9095 if (doc != NULL) {
9096 newDoc->intSubset = doc->intSubset;
9097 newDoc->extSubset = doc->extSubset;
9098 }
9099 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9100 if (newDoc->children == NULL) {
9101 if (sax != NULL)
9102 ctxt->sax = oldsax;
9103 xmlFreeParserCtxt(ctxt);
9104 newDoc->intSubset = NULL;
9105 newDoc->extSubset = NULL;
9106 xmlFreeDoc(newDoc);
9107 return(-1);
9108 }
9109 nodePush(ctxt, newDoc->children);
9110 if (doc == NULL) {
9111 ctxt->myDoc = newDoc;
9112 } else {
9113 ctxt->myDoc = doc;
9114 newDoc->children->doc = doc;
9115 }
9116 ctxt->instate = XML_PARSER_CONTENT;
9117 ctxt->depth = depth;
9118
9119 /*
9120 * Doing validity checking on chunk doesn't make sense
9121 */
9122 ctxt->validate = 0;
9123 ctxt->loadsubset = 0;
9124
9125 xmlParseContent(ctxt);
9126
9127 if ((RAW == '<') && (NXT(1) == '/')) {
9128 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9130 ctxt->sax->error(ctxt->userData,
9131 "chunk is not well balanced\n");
9132 ctxt->wellFormed = 0;
9133 ctxt->disableSAX = 1;
9134 } else if (RAW != 0) {
9135 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9136 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9137 ctxt->sax->error(ctxt->userData,
9138 "extra content at the end of well balanced chunk\n");
9139 ctxt->wellFormed = 0;
9140 ctxt->disableSAX = 1;
9141 }
9142 if (ctxt->node != newDoc->children) {
9143 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9145 ctxt->sax->error(ctxt->userData,
9146 "chunk is not well balanced\n");
9147 ctxt->wellFormed = 0;
9148 ctxt->disableSAX = 1;
9149 }
9150
9151 if (!ctxt->wellFormed) {
9152 if (ctxt->errNo == 0)
9153 ret = 1;
9154 else
9155 ret = ctxt->errNo;
9156 } else {
9157 if (list != NULL) {
9158 xmlNodePtr cur;
9159
9160 /*
9161 * Return the newly created nodeset after unlinking it from
9162 * they pseudo parent.
9163 */
9164 cur = newDoc->children->children;
9165 *list = cur;
9166 while (cur != NULL) {
9167 cur->parent = NULL;
9168 cur = cur->next;
9169 }
9170 newDoc->children->children = NULL;
9171 }
9172 ret = 0;
9173 }
9174 if (sax != NULL)
9175 ctxt->sax = oldsax;
9176 xmlFreeParserCtxt(ctxt);
9177 newDoc->intSubset = NULL;
9178 newDoc->extSubset = NULL;
9179 xmlFreeDoc(newDoc);
9180
9181 return(ret);
9182}
9183
9184/**
9185 * xmlSAXParseEntity:
9186 * @sax: the SAX handler block
9187 * @filename: the filename
9188 *
9189 * parse an XML external entity out of context and build a tree.
9190 * It use the given SAX function block to handle the parsing callback.
9191 * If sax is NULL, fallback to the default DOM tree building routines.
9192 *
9193 * [78] extParsedEnt ::= TextDecl? content
9194 *
9195 * This correspond to a "Well Balanced" chunk
9196 *
9197 * Returns the resulting document tree
9198 */
9199
9200xmlDocPtr
9201xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9202 xmlDocPtr ret;
9203 xmlParserCtxtPtr ctxt;
9204 char *directory = NULL;
9205
9206 ctxt = xmlCreateFileParserCtxt(filename);
9207 if (ctxt == NULL) {
9208 return(NULL);
9209 }
9210 if (sax != NULL) {
9211 if (ctxt->sax != NULL)
9212 xmlFree(ctxt->sax);
9213 ctxt->sax = sax;
9214 ctxt->userData = NULL;
9215 }
9216
9217 if ((ctxt->directory == NULL) && (directory == NULL))
9218 directory = xmlParserGetDirectory(filename);
9219
9220 xmlParseExtParsedEnt(ctxt);
9221
9222 if (ctxt->wellFormed)
9223 ret = ctxt->myDoc;
9224 else {
9225 ret = NULL;
9226 xmlFreeDoc(ctxt->myDoc);
9227 ctxt->myDoc = NULL;
9228 }
9229 if (sax != NULL)
9230 ctxt->sax = NULL;
9231 xmlFreeParserCtxt(ctxt);
9232
9233 return(ret);
9234}
9235
9236/**
9237 * xmlParseEntity:
9238 * @filename: the filename
9239 *
9240 * parse an XML external entity out of context and build a tree.
9241 *
9242 * [78] extParsedEnt ::= TextDecl? content
9243 *
9244 * This correspond to a "Well Balanced" chunk
9245 *
9246 * Returns the resulting document tree
9247 */
9248
9249xmlDocPtr
9250xmlParseEntity(const char *filename) {
9251 return(xmlSAXParseEntity(NULL, filename));
9252}
9253
9254/**
9255 * xmlCreateEntityParserCtxt:
9256 * @URL: the entity URL
9257 * @ID: the entity PUBLIC ID
9258 * @base: a posible base for the target URI
9259 *
9260 * Create a parser context for an external entity
9261 * Automatic support for ZLIB/Compress compressed document is provided
9262 * by default if found at compile-time.
9263 *
9264 * Returns the new parser context or NULL
9265 */
9266xmlParserCtxtPtr
9267xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9268 const xmlChar *base) {
9269 xmlParserCtxtPtr ctxt;
9270 xmlParserInputPtr inputStream;
9271 char *directory = NULL;
9272 xmlChar *uri;
9273
9274 ctxt = xmlNewParserCtxt();
9275 if (ctxt == NULL) {
9276 return(NULL);
9277 }
9278
9279 uri = xmlBuildURI(URL, base);
9280
9281 if (uri == NULL) {
9282 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9283 if (inputStream == NULL) {
9284 xmlFreeParserCtxt(ctxt);
9285 return(NULL);
9286 }
9287
9288 inputPush(ctxt, inputStream);
9289
9290 if ((ctxt->directory == NULL) && (directory == NULL))
9291 directory = xmlParserGetDirectory((char *)URL);
9292 if ((ctxt->directory == NULL) && (directory != NULL))
9293 ctxt->directory = directory;
9294 } else {
9295 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9296 if (inputStream == NULL) {
9297 xmlFree(uri);
9298 xmlFreeParserCtxt(ctxt);
9299 return(NULL);
9300 }
9301
9302 inputPush(ctxt, inputStream);
9303
9304 if ((ctxt->directory == NULL) && (directory == NULL))
9305 directory = xmlParserGetDirectory((char *)uri);
9306 if ((ctxt->directory == NULL) && (directory != NULL))
9307 ctxt->directory = directory;
9308 xmlFree(uri);
9309 }
9310
9311 return(ctxt);
9312}
9313
9314/************************************************************************
9315 * *
9316 * Front ends when parsing from a file *
9317 * *
9318 ************************************************************************/
9319
9320/**
9321 * xmlCreateFileParserCtxt:
9322 * @filename: the filename
9323 *
9324 * Create a parser context for a file content.
9325 * Automatic support for ZLIB/Compress compressed document is provided
9326 * by default if found at compile-time.
9327 *
9328 * Returns the new parser context or NULL
9329 */
9330xmlParserCtxtPtr
9331xmlCreateFileParserCtxt(const char *filename)
9332{
9333 xmlParserCtxtPtr ctxt;
9334 xmlParserInputPtr inputStream;
9335 xmlParserInputBufferPtr buf;
9336 char *directory = NULL;
9337
9338 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9339 if (buf == NULL) {
9340 return(NULL);
9341 }
9342
9343 ctxt = xmlNewParserCtxt();
9344 if (ctxt == NULL) {
9345 if (xmlDefaultSAXHandler.error != NULL) {
9346 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9347 }
9348 return(NULL);
9349 }
9350
9351 inputStream = xmlNewInputStream(ctxt);
9352 if (inputStream == NULL) {
9353 xmlFreeParserCtxt(ctxt);
9354 return(NULL);
9355 }
9356
9357 inputStream->filename = xmlMemStrdup(filename);
9358 inputStream->buf = buf;
9359 inputStream->base = inputStream->buf->buffer->content;
9360 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009361 inputStream->end =
9362 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009363
9364 inputPush(ctxt, inputStream);
9365 if ((ctxt->directory == NULL) && (directory == NULL))
9366 directory = xmlParserGetDirectory(filename);
9367 if ((ctxt->directory == NULL) && (directory != NULL))
9368 ctxt->directory = directory;
9369
9370 return(ctxt);
9371}
9372
9373/**
9374 * xmlSAXParseFile:
9375 * @sax: the SAX handler block
9376 * @filename: the filename
9377 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9378 * documents
9379 *
9380 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9381 * compressed document is provided by default if found at compile-time.
9382 * It use the given SAX function block to handle the parsing callback.
9383 * If sax is NULL, fallback to the default DOM tree building routines.
9384 *
9385 * Returns the resulting document tree
9386 */
9387
9388xmlDocPtr
9389xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9390 int recovery) {
9391 xmlDocPtr ret;
9392 xmlParserCtxtPtr ctxt;
9393 char *directory = NULL;
9394
9395 ctxt = xmlCreateFileParserCtxt(filename);
9396 if (ctxt == NULL) {
9397 return(NULL);
9398 }
9399 if (sax != NULL) {
9400 if (ctxt->sax != NULL)
9401 xmlFree(ctxt->sax);
9402 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009403 }
9404
9405 if ((ctxt->directory == NULL) && (directory == NULL))
9406 directory = xmlParserGetDirectory(filename);
9407 if ((ctxt->directory == NULL) && (directory != NULL))
9408 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9409
9410 xmlParseDocument(ctxt);
9411
9412 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9413 else {
9414 ret = NULL;
9415 xmlFreeDoc(ctxt->myDoc);
9416 ctxt->myDoc = NULL;
9417 }
9418 if (sax != NULL)
9419 ctxt->sax = NULL;
9420 xmlFreeParserCtxt(ctxt);
9421
9422 return(ret);
9423}
9424
9425/**
9426 * xmlRecoverDoc:
9427 * @cur: a pointer to an array of xmlChar
9428 *
9429 * parse an XML in-memory document and build a tree.
9430 * In the case the document is not Well Formed, a tree is built anyway
9431 *
9432 * Returns the resulting document tree
9433 */
9434
9435xmlDocPtr
9436xmlRecoverDoc(xmlChar *cur) {
9437 return(xmlSAXParseDoc(NULL, cur, 1));
9438}
9439
9440/**
9441 * xmlParseFile:
9442 * @filename: the filename
9443 *
9444 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9445 * compressed document is provided by default if found at compile-time.
9446 *
9447 * Returns the resulting document tree
9448 */
9449
9450xmlDocPtr
9451xmlParseFile(const char *filename) {
9452 return(xmlSAXParseFile(NULL, filename, 0));
9453}
9454
9455/**
9456 * xmlRecoverFile:
9457 * @filename: the filename
9458 *
9459 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9460 * compressed document is provided by default if found at compile-time.
9461 * In the case the document is not Well Formed, a tree is built anyway
9462 *
9463 * Returns the resulting document tree
9464 */
9465
9466xmlDocPtr
9467xmlRecoverFile(const char *filename) {
9468 return(xmlSAXParseFile(NULL, filename, 1));
9469}
9470
9471
9472/**
9473 * xmlSetupParserForBuffer:
9474 * @ctxt: an XML parser context
9475 * @buffer: a xmlChar * buffer
9476 * @filename: a file name
9477 *
9478 * Setup the parser context to parse a new buffer; Clears any prior
9479 * contents from the parser context. The buffer parameter must not be
9480 * NULL, but the filename parameter can be
9481 */
9482void
9483xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9484 const char* filename)
9485{
9486 xmlParserInputPtr input;
9487
9488 input = xmlNewInputStream(ctxt);
9489 if (input == NULL) {
9490 perror("malloc");
9491 xmlFree(ctxt);
9492 return;
9493 }
9494
9495 xmlClearParserCtxt(ctxt);
9496 if (filename != NULL)
9497 input->filename = xmlMemStrdup(filename);
9498 input->base = buffer;
9499 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009500 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009501 inputPush(ctxt, input);
9502}
9503
9504/**
9505 * xmlSAXUserParseFile:
9506 * @sax: a SAX handler
9507 * @user_data: The user data returned on SAX callbacks
9508 * @filename: a file name
9509 *
9510 * parse an XML file and call the given SAX handler routines.
9511 * Automatic support for ZLIB/Compress compressed document is provided
9512 *
9513 * Returns 0 in case of success or a error number otherwise
9514 */
9515int
9516xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9517 const char *filename) {
9518 int ret = 0;
9519 xmlParserCtxtPtr ctxt;
9520
9521 ctxt = xmlCreateFileParserCtxt(filename);
9522 if (ctxt == NULL) return -1;
9523 if (ctxt->sax != &xmlDefaultSAXHandler)
9524 xmlFree(ctxt->sax);
9525 ctxt->sax = sax;
9526 if (user_data != NULL)
9527 ctxt->userData = user_data;
9528
9529 xmlParseDocument(ctxt);
9530
9531 if (ctxt->wellFormed)
9532 ret = 0;
9533 else {
9534 if (ctxt->errNo != 0)
9535 ret = ctxt->errNo;
9536 else
9537 ret = -1;
9538 }
9539 if (sax != NULL)
9540 ctxt->sax = NULL;
9541 xmlFreeParserCtxt(ctxt);
9542
9543 return ret;
9544}
9545
9546/************************************************************************
9547 * *
9548 * Front ends when parsing from memory *
9549 * *
9550 ************************************************************************/
9551
9552/**
9553 * xmlCreateMemoryParserCtxt:
9554 * @buffer: a pointer to a char array
9555 * @size: the size of the array
9556 *
9557 * Create a parser context for an XML in-memory document.
9558 *
9559 * Returns the new parser context or NULL
9560 */
9561xmlParserCtxtPtr
9562xmlCreateMemoryParserCtxt(char *buffer, int size) {
9563 xmlParserCtxtPtr ctxt;
9564 xmlParserInputPtr input;
9565 xmlParserInputBufferPtr buf;
9566
9567 if (buffer == NULL)
9568 return(NULL);
9569 if (size <= 0)
9570 return(NULL);
9571
9572 ctxt = xmlNewParserCtxt();
9573 if (ctxt == NULL)
9574 return(NULL);
9575
9576 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9577 if (buf == NULL) return(NULL);
9578
9579 input = xmlNewInputStream(ctxt);
9580 if (input == NULL) {
9581 xmlFreeParserCtxt(ctxt);
9582 return(NULL);
9583 }
9584
9585 input->filename = NULL;
9586 input->buf = buf;
9587 input->base = input->buf->buffer->content;
9588 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009589 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009590
9591 inputPush(ctxt, input);
9592 return(ctxt);
9593}
9594
9595/**
9596 * xmlSAXParseMemory:
9597 * @sax: the SAX handler block
9598 * @buffer: an pointer to a char array
9599 * @size: the size of the array
9600 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9601 * documents
9602 *
9603 * parse an XML in-memory block and use the given SAX function block
9604 * to handle the parsing callback. If sax is NULL, fallback to the default
9605 * DOM tree building routines.
9606 *
9607 * Returns the resulting document tree
9608 */
9609xmlDocPtr
9610xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9611 xmlDocPtr ret;
9612 xmlParserCtxtPtr ctxt;
9613
9614 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9615 if (ctxt == NULL) return(NULL);
9616 if (sax != NULL) {
9617 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009618 }
9619
9620 xmlParseDocument(ctxt);
9621
9622 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9623 else {
9624 ret = NULL;
9625 xmlFreeDoc(ctxt->myDoc);
9626 ctxt->myDoc = NULL;
9627 }
9628 if (sax != NULL)
9629 ctxt->sax = NULL;
9630 xmlFreeParserCtxt(ctxt);
9631
9632 return(ret);
9633}
9634
9635/**
9636 * xmlParseMemory:
9637 * @buffer: an pointer to a char array
9638 * @size: the size of the array
9639 *
9640 * parse an XML in-memory block and build a tree.
9641 *
9642 * Returns the resulting document tree
9643 */
9644
9645xmlDocPtr xmlParseMemory(char *buffer, int size) {
9646 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9647}
9648
9649/**
9650 * xmlRecoverMemory:
9651 * @buffer: an pointer to a char array
9652 * @size: the size of the array
9653 *
9654 * parse an XML in-memory block and build a tree.
9655 * In the case the document is not Well Formed, a tree is built anyway
9656 *
9657 * Returns the resulting document tree
9658 */
9659
9660xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9661 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9662}
9663
9664/**
9665 * xmlSAXUserParseMemory:
9666 * @sax: a SAX handler
9667 * @user_data: The user data returned on SAX callbacks
9668 * @buffer: an in-memory XML document input
9669 * @size: the length of the XML document in bytes
9670 *
9671 * A better SAX parsing routine.
9672 * parse an XML in-memory buffer and call the given SAX handler routines.
9673 *
9674 * Returns 0 in case of success or a error number otherwise
9675 */
9676int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9677 char *buffer, int size) {
9678 int ret = 0;
9679 xmlParserCtxtPtr ctxt;
9680 xmlSAXHandlerPtr oldsax = NULL;
9681
9682 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9683 if (ctxt == NULL) return -1;
9684 if (sax != NULL) {
9685 oldsax = ctxt->sax;
9686 ctxt->sax = sax;
9687 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009688 if (user_data != NULL)
9689 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009690
9691 xmlParseDocument(ctxt);
9692
9693 if (ctxt->wellFormed)
9694 ret = 0;
9695 else {
9696 if (ctxt->errNo != 0)
9697 ret = ctxt->errNo;
9698 else
9699 ret = -1;
9700 }
9701 if (sax != NULL) {
9702 ctxt->sax = oldsax;
9703 }
9704 xmlFreeParserCtxt(ctxt);
9705
9706 return ret;
9707}
9708
9709/**
9710 * xmlCreateDocParserCtxt:
9711 * @cur: a pointer to an array of xmlChar
9712 *
9713 * Creates a parser context for an XML in-memory document.
9714 *
9715 * Returns the new parser context or NULL
9716 */
9717xmlParserCtxtPtr
9718xmlCreateDocParserCtxt(xmlChar *cur) {
9719 int len;
9720
9721 if (cur == NULL)
9722 return(NULL);
9723 len = xmlStrlen(cur);
9724 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9725}
9726
9727/**
9728 * xmlSAXParseDoc:
9729 * @sax: the SAX handler block
9730 * @cur: a pointer to an array of xmlChar
9731 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9732 * documents
9733 *
9734 * parse an XML in-memory document and build a tree.
9735 * It use the given SAX function block to handle the parsing callback.
9736 * If sax is NULL, fallback to the default DOM tree building routines.
9737 *
9738 * Returns the resulting document tree
9739 */
9740
9741xmlDocPtr
9742xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9743 xmlDocPtr ret;
9744 xmlParserCtxtPtr ctxt;
9745
9746 if (cur == NULL) return(NULL);
9747
9748
9749 ctxt = xmlCreateDocParserCtxt(cur);
9750 if (ctxt == NULL) return(NULL);
9751 if (sax != NULL) {
9752 ctxt->sax = sax;
9753 ctxt->userData = NULL;
9754 }
9755
9756 xmlParseDocument(ctxt);
9757 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9758 else {
9759 ret = NULL;
9760 xmlFreeDoc(ctxt->myDoc);
9761 ctxt->myDoc = NULL;
9762 }
9763 if (sax != NULL)
9764 ctxt->sax = NULL;
9765 xmlFreeParserCtxt(ctxt);
9766
9767 return(ret);
9768}
9769
9770/**
9771 * xmlParseDoc:
9772 * @cur: a pointer to an array of xmlChar
9773 *
9774 * parse an XML in-memory document and build a tree.
9775 *
9776 * Returns the resulting document tree
9777 */
9778
9779xmlDocPtr
9780xmlParseDoc(xmlChar *cur) {
9781 return(xmlSAXParseDoc(NULL, cur, 0));
9782}
9783
9784
9785/************************************************************************
9786 * *
9787 * Miscellaneous *
9788 * *
9789 ************************************************************************/
9790
9791#ifdef LIBXML_XPATH_ENABLED
9792#include <libxml/xpath.h>
9793#endif
9794
9795static int xmlParserInitialized = 0;
9796
9797/**
9798 * xmlInitParser:
9799 *
9800 * Initialization function for the XML parser.
9801 * This is not reentrant. Call once before processing in case of
9802 * use in multithreaded programs.
9803 */
9804
9805void
9806xmlInitParser(void) {
9807 if (xmlParserInitialized) return;
9808
9809 xmlInitCharEncodingHandlers();
9810 xmlInitializePredefinedEntities();
9811 xmlDefaultSAXHandlerInit();
9812 xmlRegisterDefaultInputCallbacks();
9813 xmlRegisterDefaultOutputCallbacks();
9814#ifdef LIBXML_HTML_ENABLED
9815 htmlInitAutoClose();
9816 htmlDefaultSAXHandlerInit();
9817#endif
9818#ifdef LIBXML_XPATH_ENABLED
9819 xmlXPathInit();
9820#endif
9821 xmlParserInitialized = 1;
9822}
9823
9824/**
9825 * xmlCleanupParser:
9826 *
9827 * Cleanup function for the XML parser. It tries to reclaim all
9828 * parsing related global memory allocated for the parser processing.
9829 * It doesn't deallocate any document related memory. Calling this
9830 * function should not prevent reusing the parser.
9831 */
9832
9833void
9834xmlCleanupParser(void) {
9835 xmlParserInitialized = 0;
9836 xmlCleanupCharEncodingHandlers();
9837 xmlCleanupPredefinedEntities();
9838}
9839
9840/**
9841 * xmlPedanticParserDefault:
9842 * @val: int 0 or 1
9843 *
9844 * Set and return the previous value for enabling pedantic warnings.
9845 *
9846 * Returns the last value for 0 for no substitution, 1 for substitution.
9847 */
9848
9849int
9850xmlPedanticParserDefault(int val) {
9851 int old = xmlPedanticParserDefaultValue;
9852
9853 xmlPedanticParserDefaultValue = val;
9854 return(old);
9855}
9856
9857/**
9858 * xmlSubstituteEntitiesDefault:
9859 * @val: int 0 or 1
9860 *
9861 * Set and return the previous value for default entity support.
9862 * Initially the parser always keep entity references instead of substituting
9863 * entity values in the output. This function has to be used to change the
9864 * default parser behaviour
9865 * SAX::subtituteEntities() has to be used for changing that on a file by
9866 * file basis.
9867 *
9868 * Returns the last value for 0 for no substitution, 1 for substitution.
9869 */
9870
9871int
9872xmlSubstituteEntitiesDefault(int val) {
9873 int old = xmlSubstituteEntitiesDefaultValue;
9874
9875 xmlSubstituteEntitiesDefaultValue = val;
9876 return(old);
9877}
9878
9879/**
9880 * xmlKeepBlanksDefault:
9881 * @val: int 0 or 1
9882 *
9883 * Set and return the previous value for default blanks text nodes support.
9884 * The 1.x version of the parser used an heuristic to try to detect
9885 * ignorable white spaces. As a result the SAX callback was generating
9886 * ignorableWhitespace() callbacks instead of characters() one, and when
9887 * using the DOM output text nodes containing those blanks were not generated.
9888 * The 2.x and later version will switch to the XML standard way and
9889 * ignorableWhitespace() are only generated when running the parser in
9890 * validating mode and when the current element doesn't allow CDATA or
9891 * mixed content.
9892 * This function is provided as a way to force the standard behaviour
9893 * on 1.X libs and to switch back to the old mode for compatibility when
9894 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9895 * by using xmlIsBlankNode() commodity function to detect the "empty"
9896 * nodes generated.
9897 * This value also affect autogeneration of indentation when saving code
9898 * if blanks sections are kept, indentation is not generated.
9899 *
9900 * Returns the last value for 0 for no substitution, 1 for substitution.
9901 */
9902
9903int
9904xmlKeepBlanksDefault(int val) {
9905 int old = xmlKeepBlanksDefaultValue;
9906
9907 xmlKeepBlanksDefaultValue = val;
9908 xmlIndentTreeOutput = !val;
9909 return(old);
9910}
9911