blob: 007b2e7e58da459d4da696c8414e938ebc0fec71 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
83int xmlGetWarningsDefaultValue = 1;
84int xmlParserDebugEntities = 0;
85#ifdef VMS
86int xmlSubstituteEntitiesDefaultVal = 0;
87#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
88int xmlDoValidityCheckingDefaultVal = 0;
89#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
90#else
91int xmlSubstituteEntitiesDefaultValue = 0;
92int xmlDoValidityCheckingDefaultValue = 0;
93#endif
94int xmlLoadExtDtdDefaultValue = 0;
95int xmlPedanticParserDefaultValue = 0;
96int xmlKeepBlanksDefaultValue = 1;
97
98/*
99 * List of XML prefixed PI allowed by W3C specs
100 */
101
102const char *xmlW3CPIs[] = {
103 "xml-stylesheet",
104 NULL
105};
106
107/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
108void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110 const xmlChar **str);
111
112
113/************************************************************************
114 * *
115 * Parser stacks related functions and macros *
116 * *
117 ************************************************************************/
118
119xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
120 const xmlChar ** str);
121
122/*
123 * Generic function for accessing stacks in the Parser Context
124 */
125
126#define PUSH_AND_POP(scope, type, name) \
127scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
128 if (ctxt->name##Nr >= ctxt->name##Max) { \
129 ctxt->name##Max *= 2; \
130 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
131 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
132 if (ctxt->name##Tab == NULL) { \
133 xmlGenericError(xmlGenericErrorContext, \
134 "realloc failed !\n"); \
135 return(0); \
136 } \
137 } \
138 ctxt->name##Tab[ctxt->name##Nr] = value; \
139 ctxt->name = value; \
140 return(ctxt->name##Nr++); \
141} \
142scope type name##Pop(xmlParserCtxtPtr ctxt) { \
143 type ret; \
144 if (ctxt->name##Nr <= 0) return(0); \
145 ctxt->name##Nr--; \
146 if (ctxt->name##Nr > 0) \
147 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
148 else \
149 ctxt->name = NULL; \
150 ret = ctxt->name##Tab[ctxt->name##Nr]; \
151 ctxt->name##Tab[ctxt->name##Nr] = 0; \
152 return(ret); \
153} \
154
155/*
156 * Those macros actually generate the functions
157 */
158PUSH_AND_POP(extern, xmlParserInputPtr, input)
159PUSH_AND_POP(extern, xmlNodePtr, node)
160PUSH_AND_POP(extern, xmlChar*, name)
161
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000162static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000163 if (ctxt->spaceNr >= ctxt->spaceMax) {
164 ctxt->spaceMax *= 2;
165 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
166 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
167 if (ctxt->spaceTab == NULL) {
168 xmlGenericError(xmlGenericErrorContext,
169 "realloc failed !\n");
170 return(0);
171 }
172 }
173 ctxt->spaceTab[ctxt->spaceNr] = val;
174 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
175 return(ctxt->spaceNr++);
176}
177
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000178static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000179 int ret;
180 if (ctxt->spaceNr <= 0) return(0);
181 ctxt->spaceNr--;
182 if (ctxt->spaceNr > 0)
183 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
184 else
185 ctxt->space = NULL;
186 ret = ctxt->spaceTab[ctxt->spaceNr];
187 ctxt->spaceTab[ctxt->spaceNr] = -1;
188 return(ret);
189}
190
191/*
192 * Macros for accessing the content. Those should be used only by the parser,
193 * and not exported.
194 *
195 * Dirty macros, i.e. one often need to make assumption on the context to
196 * use them
197 *
198 * CUR_PTR return the current pointer to the xmlChar to be parsed.
199 * To be used with extreme caution since operations consuming
200 * characters may move the input buffer to a different location !
201 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
202 * This should be used internally by the parser
203 * only to compare to ASCII values otherwise it would break when
204 * running with UTF-8 encoding.
205 * RAW same as CUR but in the input buffer, bypass any token
206 * extraction that may have been done
207 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
208 * to compare on ASCII based substring.
209 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
210 * strings within the parser.
211 *
212 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
213 *
214 * NEXT Skip to the next character, this does the proper decoding
215 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
216 * NEXTL(l) Skip l xmlChars in the input buffer
217 * CUR_CHAR(l) returns the current unicode character (int), set l
218 * to the number of xmlChars used for the encoding [0-5].
219 * CUR_SCHAR same but operate on a string instead of the context
220 * COPY_BUF copy the current unicode char to the target buffer, increment
221 * the index
222 * GROW, SHRINK handling of input buffers
223 */
224
225#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
226#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
227#define NXT(val) ctxt->input->cur[(val)]
228#define CUR_PTR ctxt->input->cur
229
230#define SKIP(val) do { \
231 ctxt->nbChars += (val),ctxt->input->cur += (val); \
232 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000233 if ((*ctxt->input->cur == 0) && \
234 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
235 xmlPopInput(ctxt); \
236 } while (0)
237
Daniel Veillard48b2f892001-02-25 16:11:03 +0000238#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000239 xmlParserInputShrink(ctxt->input); \
240 if ((*ctxt->input->cur == 0) && \
241 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
242 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000243 }
Owen Taylor3473f882001-02-23 17:55:21 +0000244
Daniel Veillard48b2f892001-02-25 16:11:03 +0000245#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000246 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
247 if ((*ctxt->input->cur == 0) && \
248 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
249 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000250 }
Owen Taylor3473f882001-02-23 17:55:21 +0000251
252#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
253
254#define NEXT xmlNextChar(ctxt)
255
Daniel Veillard21a0f912001-02-25 19:54:14 +0000256#define NEXT1 { \
257 ctxt->input->cur++; \
258 ctxt->nbChars++; \
259 if (*ctxt->input->cur == 0) \
260 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
261 }
262
Owen Taylor3473f882001-02-23 17:55:21 +0000263#define NEXTL(l) do { \
264 if (*(ctxt->input->cur) == '\n') { \
265 ctxt->input->line++; ctxt->input->col = 1; \
266 } else ctxt->input->col++; \
267 ctxt->token = 0; ctxt->input->cur += l; \
268 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000269 } while (0)
270
271#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
272#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
273
274#define COPY_BUF(l,b,i,v) \
275 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000276 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000277
278/**
279 * xmlSkipBlankChars:
280 * @ctxt: the XML parser context
281 *
282 * skip all blanks character found at that point in the input streams.
283 * It pops up finished entities in the process if allowable at that point.
284 *
285 * Returns the number of space chars skipped
286 */
287
288int
289xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
290 int cur, res = 0;
291
292 /*
293 * It's Okay to use CUR/NEXT here since all the blanks are on
294 * the ASCII range.
295 */
296 do {
297 cur = CUR;
298 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
299 NEXT;
300 cur = CUR;
301 res++;
302 }
303 while ((cur == 0) && (ctxt->inputNr > 1) &&
304 (ctxt->instate != XML_PARSER_COMMENT)) {
305 xmlPopInput(ctxt);
306 cur = CUR;
307 }
308 /*
309 * Need to handle support of entities branching here
310 */
311 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
312 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
313 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
314 return(res);
315}
316
317/************************************************************************
318 * *
319 * Commodity functions to handle entities *
320 * *
321 ************************************************************************/
322
323/**
324 * xmlPopInput:
325 * @ctxt: an XML parser context
326 *
327 * xmlPopInput: the current input pointed by ctxt->input came to an end
328 * pop it and return the next char.
329 *
330 * Returns the current xmlChar in the parser context
331 */
332xmlChar
333xmlPopInput(xmlParserCtxtPtr ctxt) {
334 if (ctxt->inputNr == 1) return(0); /* End of main Input */
335 if (xmlParserDebugEntities)
336 xmlGenericError(xmlGenericErrorContext,
337 "Popping input %d\n", ctxt->inputNr);
338 xmlFreeInputStream(inputPop(ctxt));
339 if ((*ctxt->input->cur == 0) &&
340 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
341 return(xmlPopInput(ctxt));
342 return(CUR);
343}
344
345/**
346 * xmlPushInput:
347 * @ctxt: an XML parser context
348 * @input: an XML parser input fragment (entity, XML fragment ...).
349 *
350 * xmlPushInput: switch to a new input stream which is stacked on top
351 * of the previous one(s).
352 */
353void
354xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
355 if (input == NULL) return;
356
357 if (xmlParserDebugEntities) {
358 if ((ctxt->input != NULL) && (ctxt->input->filename))
359 xmlGenericError(xmlGenericErrorContext,
360 "%s(%d): ", ctxt->input->filename,
361 ctxt->input->line);
362 xmlGenericError(xmlGenericErrorContext,
363 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
364 }
365 inputPush(ctxt, input);
366 GROW;
367}
368
369/**
370 * xmlParseCharRef:
371 * @ctxt: an XML parser context
372 *
373 * parse Reference declarations
374 *
375 * [66] CharRef ::= '&#' [0-9]+ ';' |
376 * '&#x' [0-9a-fA-F]+ ';'
377 *
378 * [ WFC: Legal Character ]
379 * Characters referred to using character references must match the
380 * production for Char.
381 *
382 * Returns the value parsed (as an int), 0 in case of error
383 */
384int
385xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000386 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000387 int count = 0;
388
389 if (ctxt->token != 0) {
390 val = ctxt->token;
391 ctxt->token = 0;
392 return(val);
393 }
394 /*
395 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
396 */
397 if ((RAW == '&') && (NXT(1) == '#') &&
398 (NXT(2) == 'x')) {
399 SKIP(3);
400 GROW;
401 while (RAW != ';') { /* loop blocked by count */
402 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
403 val = val * 16 + (CUR - '0');
404 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
405 val = val * 16 + (CUR - 'a') + 10;
406 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
407 val = val * 16 + (CUR - 'A') + 10;
408 else {
409 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
411 ctxt->sax->error(ctxt->userData,
412 "xmlParseCharRef: invalid hexadecimal value\n");
413 ctxt->wellFormed = 0;
414 ctxt->disableSAX = 1;
415 val = 0;
416 break;
417 }
418 NEXT;
419 count++;
420 }
421 if (RAW == ';') {
422 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
423 ctxt->nbChars ++;
424 ctxt->input->cur++;
425 }
426 } else if ((RAW == '&') && (NXT(1) == '#')) {
427 SKIP(2);
428 GROW;
429 while (RAW != ';') { /* loop blocked by count */
430 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
431 val = val * 10 + (CUR - '0');
432 else {
433 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
435 ctxt->sax->error(ctxt->userData,
436 "xmlParseCharRef: invalid decimal value\n");
437 ctxt->wellFormed = 0;
438 ctxt->disableSAX = 1;
439 val = 0;
440 break;
441 }
442 NEXT;
443 count++;
444 }
445 if (RAW == ';') {
446 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
447 ctxt->nbChars ++;
448 ctxt->input->cur++;
449 }
450 } else {
451 ctxt->errNo = XML_ERR_INVALID_CHARREF;
452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
453 ctxt->sax->error(ctxt->userData,
454 "xmlParseCharRef: invalid value\n");
455 ctxt->wellFormed = 0;
456 ctxt->disableSAX = 1;
457 }
458
459 /*
460 * [ WFC: Legal Character ]
461 * Characters referred to using character references must match the
462 * production for Char.
463 */
464 if (IS_CHAR(val)) {
465 return(val);
466 } else {
467 ctxt->errNo = XML_ERR_INVALID_CHAR;
468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
469 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
470 val);
471 ctxt->wellFormed = 0;
472 ctxt->disableSAX = 1;
473 }
474 return(0);
475}
476
477/**
478 * xmlParseStringCharRef:
479 * @ctxt: an XML parser context
480 * @str: a pointer to an index in the string
481 *
482 * parse Reference declarations, variant parsing from a string rather
483 * than an an input flow.
484 *
485 * [66] CharRef ::= '&#' [0-9]+ ';' |
486 * '&#x' [0-9a-fA-F]+ ';'
487 *
488 * [ WFC: Legal Character ]
489 * Characters referred to using character references must match the
490 * production for Char.
491 *
492 * Returns the value parsed (as an int), 0 in case of error, str will be
493 * updated to the current value of the index
494 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000495static int
Owen Taylor3473f882001-02-23 17:55:21 +0000496xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
497 const xmlChar *ptr;
498 xmlChar cur;
499 int val = 0;
500
501 if ((str == NULL) || (*str == NULL)) return(0);
502 ptr = *str;
503 cur = *ptr;
504 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
505 ptr += 3;
506 cur = *ptr;
507 while (cur != ';') { /* Non input consuming loop */
508 if ((cur >= '0') && (cur <= '9'))
509 val = val * 16 + (cur - '0');
510 else if ((cur >= 'a') && (cur <= 'f'))
511 val = val * 16 + (cur - 'a') + 10;
512 else if ((cur >= 'A') && (cur <= 'F'))
513 val = val * 16 + (cur - 'A') + 10;
514 else {
515 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
517 ctxt->sax->error(ctxt->userData,
518 "xmlParseStringCharRef: invalid hexadecimal value\n");
519 ctxt->wellFormed = 0;
520 ctxt->disableSAX = 1;
521 val = 0;
522 break;
523 }
524 ptr++;
525 cur = *ptr;
526 }
527 if (cur == ';')
528 ptr++;
529 } else if ((cur == '&') && (ptr[1] == '#')){
530 ptr += 2;
531 cur = *ptr;
532 while (cur != ';') { /* Non input consuming loops */
533 if ((cur >= '0') && (cur <= '9'))
534 val = val * 10 + (cur - '0');
535 else {
536 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
538 ctxt->sax->error(ctxt->userData,
539 "xmlParseStringCharRef: invalid decimal value\n");
540 ctxt->wellFormed = 0;
541 ctxt->disableSAX = 1;
542 val = 0;
543 break;
544 }
545 ptr++;
546 cur = *ptr;
547 }
548 if (cur == ';')
549 ptr++;
550 } else {
551 ctxt->errNo = XML_ERR_INVALID_CHARREF;
552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
553 ctxt->sax->error(ctxt->userData,
554 "xmlParseCharRef: invalid value\n");
555 ctxt->wellFormed = 0;
556 ctxt->disableSAX = 1;
557 return(0);
558 }
559 *str = ptr;
560
561 /*
562 * [ WFC: Legal Character ]
563 * Characters referred to using character references must match the
564 * production for Char.
565 */
566 if (IS_CHAR(val)) {
567 return(val);
568 } else {
569 ctxt->errNo = XML_ERR_INVALID_CHAR;
570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
571 ctxt->sax->error(ctxt->userData,
572 "CharRef: invalid xmlChar value %d\n", val);
573 ctxt->wellFormed = 0;
574 ctxt->disableSAX = 1;
575 }
576 return(0);
577}
578
579/**
580 * xmlParserHandlePEReference:
581 * @ctxt: the parser context
582 *
583 * [69] PEReference ::= '%' Name ';'
584 *
585 * [ WFC: No Recursion ]
586 * A parsed entity must not contain a recursive
587 * reference to itself, either directly or indirectly.
588 *
589 * [ WFC: Entity Declared ]
590 * In a document without any DTD, a document with only an internal DTD
591 * subset which contains no parameter entity references, or a document
592 * with "standalone='yes'", ... ... The declaration of a parameter
593 * entity must precede any reference to it...
594 *
595 * [ VC: Entity Declared ]
596 * In a document with an external subset or external parameter entities
597 * with "standalone='no'", ... ... The declaration of a parameter entity
598 * must precede any reference to it...
599 *
600 * [ WFC: In DTD ]
601 * Parameter-entity references may only appear in the DTD.
602 * NOTE: misleading but this is handled.
603 *
604 * A PEReference may have been detected in the current input stream
605 * the handling is done accordingly to
606 * http://www.w3.org/TR/REC-xml#entproc
607 * i.e.
608 * - Included in literal in entity values
609 * - Included as Paraemeter Entity reference within DTDs
610 */
611void
612xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
613 xmlChar *name;
614 xmlEntityPtr entity = NULL;
615 xmlParserInputPtr input;
616
617 if (ctxt->token != 0) {
618 return;
619 }
620 if (RAW != '%') return;
621 switch(ctxt->instate) {
622 case XML_PARSER_CDATA_SECTION:
623 return;
624 case XML_PARSER_COMMENT:
625 return;
626 case XML_PARSER_START_TAG:
627 return;
628 case XML_PARSER_END_TAG:
629 return;
630 case XML_PARSER_EOF:
631 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
633 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
634 ctxt->wellFormed = 0;
635 ctxt->disableSAX = 1;
636 return;
637 case XML_PARSER_PROLOG:
638 case XML_PARSER_START:
639 case XML_PARSER_MISC:
640 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
642 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
643 ctxt->wellFormed = 0;
644 ctxt->disableSAX = 1;
645 return;
646 case XML_PARSER_ENTITY_DECL:
647 case XML_PARSER_CONTENT:
648 case XML_PARSER_ATTRIBUTE_VALUE:
649 case XML_PARSER_PI:
650 case XML_PARSER_SYSTEM_LITERAL:
651 /* we just ignore it there */
652 return;
653 case XML_PARSER_EPILOG:
654 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
656 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
657 ctxt->wellFormed = 0;
658 ctxt->disableSAX = 1;
659 return;
660 case XML_PARSER_ENTITY_VALUE:
661 /*
662 * NOTE: in the case of entity values, we don't do the
663 * substitution here since we need the literal
664 * entity value to be able to save the internal
665 * subset of the document.
666 * This will be handled by xmlStringDecodeEntities
667 */
668 return;
669 case XML_PARSER_DTD:
670 /*
671 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
672 * In the internal DTD subset, parameter-entity references
673 * can occur only where markup declarations can occur, not
674 * within markup declarations.
675 * In that case this is handled in xmlParseMarkupDecl
676 */
677 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
678 return;
679 break;
680 case XML_PARSER_IGNORE:
681 return;
682 }
683
684 NEXT;
685 name = xmlParseName(ctxt);
686 if (xmlParserDebugEntities)
687 xmlGenericError(xmlGenericErrorContext,
688 "PE Reference: %s\n", name);
689 if (name == NULL) {
690 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
692 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
693 ctxt->wellFormed = 0;
694 ctxt->disableSAX = 1;
695 } else {
696 if (RAW == ';') {
697 NEXT;
698 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
699 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
700 if (entity == NULL) {
701
702 /*
703 * [ WFC: Entity Declared ]
704 * In a document without any DTD, a document with only an
705 * internal DTD subset which contains no parameter entity
706 * references, or a document with "standalone='yes'", ...
707 * ... The declaration of a parameter entity must precede
708 * any reference to it...
709 */
710 if ((ctxt->standalone == 1) ||
711 ((ctxt->hasExternalSubset == 0) &&
712 (ctxt->hasPErefs == 0))) {
713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
714 ctxt->sax->error(ctxt->userData,
715 "PEReference: %%%s; not found\n", name);
716 ctxt->wellFormed = 0;
717 ctxt->disableSAX = 1;
718 } else {
719 /*
720 * [ VC: Entity Declared ]
721 * In a document with an external subset or external
722 * parameter entities with "standalone='no'", ...
723 * ... The declaration of a parameter entity must precede
724 * any reference to it...
725 */
726 if ((!ctxt->disableSAX) &&
727 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
728 ctxt->vctxt.error(ctxt->vctxt.userData,
729 "PEReference: %%%s; not found\n", name);
730 } else if ((!ctxt->disableSAX) &&
731 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
732 ctxt->sax->warning(ctxt->userData,
733 "PEReference: %%%s; not found\n", name);
734 ctxt->valid = 0;
735 }
736 } else {
737 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
738 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
739 /*
740 * handle the extra spaces added before and after
741 * c.f. http://www.w3.org/TR/REC-xml#as-PE
742 * this is done independantly.
743 */
744 input = xmlNewEntityInputStream(ctxt, entity);
745 xmlPushInput(ctxt, input);
746 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
747 (RAW == '<') && (NXT(1) == '?') &&
748 (NXT(2) == 'x') && (NXT(3) == 'm') &&
749 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
750 xmlParseTextDecl(ctxt);
751 }
752 if (ctxt->token == 0)
753 ctxt->token = ' ';
754 } else {
755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
756 ctxt->sax->error(ctxt->userData,
757 "xmlHandlePEReference: %s is not a parameter entity\n",
758 name);
759 ctxt->wellFormed = 0;
760 ctxt->disableSAX = 1;
761 }
762 }
763 } else {
764 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
766 ctxt->sax->error(ctxt->userData,
767 "xmlHandlePEReference: expecting ';'\n");
768 ctxt->wellFormed = 0;
769 ctxt->disableSAX = 1;
770 }
771 xmlFree(name);
772 }
773}
774
775/*
776 * Macro used to grow the current buffer.
777 */
778#define growBuffer(buffer) { \
779 buffer##_size *= 2; \
780 buffer = (xmlChar *) \
781 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
782 if (buffer == NULL) { \
783 perror("realloc failed"); \
784 return(NULL); \
785 } \
786}
787
788/**
789 * xmlStringDecodeEntities:
790 * @ctxt: the parser context
791 * @str: the input string
792 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
793 * @end: an end marker xmlChar, 0 if none
794 * @end2: an end marker xmlChar, 0 if none
795 * @end3: an end marker xmlChar, 0 if none
796 *
797 * Takes a entity string content and process to do the adequate subtitutions.
798 *
799 * [67] Reference ::= EntityRef | CharRef
800 *
801 * [69] PEReference ::= '%' Name ';'
802 *
803 * Returns A newly allocated string with the substitution done. The caller
804 * must deallocate it !
805 */
806xmlChar *
807xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
808 xmlChar end, xmlChar end2, xmlChar end3) {
809 xmlChar *buffer = NULL;
810 int buffer_size = 0;
811
812 xmlChar *current = NULL;
813 xmlEntityPtr ent;
814 int c,l;
815 int nbchars = 0;
816
817 if (str == NULL)
818 return(NULL);
819
820 if (ctxt->depth > 40) {
821 ctxt->errNo = XML_ERR_ENTITY_LOOP;
822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
823 ctxt->sax->error(ctxt->userData,
824 "Detected entity reference loop\n");
825 ctxt->wellFormed = 0;
826 ctxt->disableSAX = 1;
827 return(NULL);
828 }
829
830 /*
831 * allocate a translation buffer.
832 */
833 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
834 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
835 if (buffer == NULL) {
836 perror("xmlDecodeEntities: malloc failed");
837 return(NULL);
838 }
839
840 /*
841 * Ok loop until we reach one of the ending char or a size limit.
842 * we are operating on already parsed values.
843 */
844 c = CUR_SCHAR(str, l);
845 while ((c != 0) && (c != end) && /* non input consuming loop */
846 (c != end2) && (c != end3)) {
847
848 if (c == 0) break;
849 if ((c == '&') && (str[1] == '#')) {
850 int val = xmlParseStringCharRef(ctxt, &str);
851 if (val != 0) {
852 COPY_BUF(0,buffer,nbchars,val);
853 }
854 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
855 if (xmlParserDebugEntities)
856 xmlGenericError(xmlGenericErrorContext,
857 "String decoding Entity Reference: %.30s\n",
858 str);
859 ent = xmlParseStringEntityRef(ctxt, &str);
860 if ((ent != NULL) &&
861 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
862 if (ent->content != NULL) {
863 COPY_BUF(0,buffer,nbchars,ent->content[0]);
864 } else {
865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
866 ctxt->sax->error(ctxt->userData,
867 "internal error entity has no content\n");
868 }
869 } else if ((ent != NULL) && (ent->content != NULL)) {
870 xmlChar *rep;
871
872 ctxt->depth++;
873 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
874 0, 0, 0);
875 ctxt->depth--;
876 if (rep != NULL) {
877 current = rep;
878 while (*current != 0) { /* non input consuming loop */
879 buffer[nbchars++] = *current++;
880 if (nbchars >
881 buffer_size - XML_PARSER_BUFFER_SIZE) {
882 growBuffer(buffer);
883 }
884 }
885 xmlFree(rep);
886 }
887 } else if (ent != NULL) {
888 int i = xmlStrlen(ent->name);
889 const xmlChar *cur = ent->name;
890
891 buffer[nbchars++] = '&';
892 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
893 growBuffer(buffer);
894 }
895 for (;i > 0;i--)
896 buffer[nbchars++] = *cur++;
897 buffer[nbchars++] = ';';
898 }
899 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
900 if (xmlParserDebugEntities)
901 xmlGenericError(xmlGenericErrorContext,
902 "String decoding PE Reference: %.30s\n", str);
903 ent = xmlParseStringPEReference(ctxt, &str);
904 if (ent != NULL) {
905 xmlChar *rep;
906
907 ctxt->depth++;
908 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
909 0, 0, 0);
910 ctxt->depth--;
911 if (rep != NULL) {
912 current = rep;
913 while (*current != 0) { /* non input consuming loop */
914 buffer[nbchars++] = *current++;
915 if (nbchars >
916 buffer_size - XML_PARSER_BUFFER_SIZE) {
917 growBuffer(buffer);
918 }
919 }
920 xmlFree(rep);
921 }
922 }
923 } else {
924 COPY_BUF(l,buffer,nbchars,c);
925 str += l;
926 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
927 growBuffer(buffer);
928 }
929 }
930 c = CUR_SCHAR(str, l);
931 }
932 buffer[nbchars++] = 0;
933 return(buffer);
934}
935
936
937/************************************************************************
938 * *
939 * Commodity functions to handle xmlChars *
940 * *
941 ************************************************************************/
942
943/**
944 * xmlStrndup:
945 * @cur: the input xmlChar *
946 * @len: the len of @cur
947 *
948 * a strndup for array of xmlChar's
949 *
950 * Returns a new xmlChar * or NULL
951 */
952xmlChar *
953xmlStrndup(const xmlChar *cur, int len) {
954 xmlChar *ret;
955
956 if ((cur == NULL) || (len < 0)) return(NULL);
957 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
958 if (ret == NULL) {
959 xmlGenericError(xmlGenericErrorContext,
960 "malloc of %ld byte failed\n",
961 (len + 1) * (long)sizeof(xmlChar));
962 return(NULL);
963 }
964 memcpy(ret, cur, len * sizeof(xmlChar));
965 ret[len] = 0;
966 return(ret);
967}
968
969/**
970 * xmlStrdup:
971 * @cur: the input xmlChar *
972 *
973 * a strdup for array of xmlChar's. Since they are supposed to be
974 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
975 * a termination mark of '0'.
976 *
977 * Returns a new xmlChar * or NULL
978 */
979xmlChar *
980xmlStrdup(const xmlChar *cur) {
981 const xmlChar *p = cur;
982
983 if (cur == NULL) return(NULL);
984 while (*p != 0) p++; /* non input consuming */
985 return(xmlStrndup(cur, p - cur));
986}
987
988/**
989 * xmlCharStrndup:
990 * @cur: the input char *
991 * @len: the len of @cur
992 *
993 * a strndup for char's to xmlChar's
994 *
995 * Returns a new xmlChar * or NULL
996 */
997
998xmlChar *
999xmlCharStrndup(const char *cur, int len) {
1000 int i;
1001 xmlChar *ret;
1002
1003 if ((cur == NULL) || (len < 0)) return(NULL);
1004 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1005 if (ret == NULL) {
1006 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1007 (len + 1) * (long)sizeof(xmlChar));
1008 return(NULL);
1009 }
1010 for (i = 0;i < len;i++)
1011 ret[i] = (xmlChar) cur[i];
1012 ret[len] = 0;
1013 return(ret);
1014}
1015
1016/**
1017 * xmlCharStrdup:
1018 * @cur: the input char *
1019 * @len: the len of @cur
1020 *
1021 * a strdup for char's to xmlChar's
1022 *
1023 * Returns a new xmlChar * or NULL
1024 */
1025
1026xmlChar *
1027xmlCharStrdup(const char *cur) {
1028 const char *p = cur;
1029
1030 if (cur == NULL) return(NULL);
1031 while (*p != '\0') p++; /* non input consuming */
1032 return(xmlCharStrndup(cur, p - cur));
1033}
1034
1035/**
1036 * xmlStrcmp:
1037 * @str1: the first xmlChar *
1038 * @str2: the second xmlChar *
1039 *
1040 * a strcmp for xmlChar's
1041 *
1042 * Returns the integer result of the comparison
1043 */
1044
1045int
1046xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1047 register int tmp;
1048
1049 if (str1 == str2) return(0);
1050 if (str1 == NULL) return(-1);
1051 if (str2 == NULL) return(1);
1052 do {
1053 tmp = *str1++ - *str2;
1054 if (tmp != 0) return(tmp);
1055 } while (*str2++ != 0);
1056 return 0;
1057}
1058
1059/**
1060 * xmlStrEqual:
1061 * @str1: the first xmlChar *
1062 * @str2: the second xmlChar *
1063 *
1064 * Check if both string are equal of have same content
1065 * Should be a bit more readable and faster than xmlStrEqual()
1066 *
1067 * Returns 1 if they are equal, 0 if they are different
1068 */
1069
1070int
1071xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1072 if (str1 == str2) return(1);
1073 if (str1 == NULL) return(0);
1074 if (str2 == NULL) return(0);
1075 do {
1076 if (*str1++ != *str2) return(0);
1077 } while (*str2++);
1078 return(1);
1079}
1080
1081/**
1082 * xmlStrncmp:
1083 * @str1: the first xmlChar *
1084 * @str2: the second xmlChar *
1085 * @len: the max comparison length
1086 *
1087 * a strncmp for xmlChar's
1088 *
1089 * Returns the integer result of the comparison
1090 */
1091
1092int
1093xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1094 register int tmp;
1095
1096 if (len <= 0) return(0);
1097 if (str1 == str2) return(0);
1098 if (str1 == NULL) return(-1);
1099 if (str2 == NULL) return(1);
1100 do {
1101 tmp = *str1++ - *str2;
1102 if (tmp != 0 || --len == 0) return(tmp);
1103 } while (*str2++ != 0);
1104 return 0;
1105}
1106
1107static xmlChar casemap[256] = {
1108 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1109 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1110 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1111 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1112 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1113 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1114 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1115 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1116 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1117 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1118 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1119 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1120 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1121 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1122 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1123 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1124 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1125 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1126 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1127 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1128 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1129 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1130 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1131 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1132 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1133 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1134 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1135 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1136 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1137 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1138 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1139 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1140};
1141
1142/**
1143 * xmlStrcasecmp:
1144 * @str1: the first xmlChar *
1145 * @str2: the second xmlChar *
1146 *
1147 * a strcasecmp for xmlChar's
1148 *
1149 * Returns the integer result of the comparison
1150 */
1151
1152int
1153xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1154 register int tmp;
1155
1156 if (str1 == str2) return(0);
1157 if (str1 == NULL) return(-1);
1158 if (str2 == NULL) return(1);
1159 do {
1160 tmp = casemap[*str1++] - casemap[*str2];
1161 if (tmp != 0) return(tmp);
1162 } while (*str2++ != 0);
1163 return 0;
1164}
1165
1166/**
1167 * xmlStrncasecmp:
1168 * @str1: the first xmlChar *
1169 * @str2: the second xmlChar *
1170 * @len: the max comparison length
1171 *
1172 * a strncasecmp for xmlChar's
1173 *
1174 * Returns the integer result of the comparison
1175 */
1176
1177int
1178xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1179 register int tmp;
1180
1181 if (len <= 0) return(0);
1182 if (str1 == str2) return(0);
1183 if (str1 == NULL) return(-1);
1184 if (str2 == NULL) return(1);
1185 do {
1186 tmp = casemap[*str1++] - casemap[*str2];
1187 if (tmp != 0 || --len == 0) return(tmp);
1188 } while (*str2++ != 0);
1189 return 0;
1190}
1191
1192/**
1193 * xmlStrchr:
1194 * @str: the xmlChar * array
1195 * @val: the xmlChar to search
1196 *
1197 * a strchr for xmlChar's
1198 *
1199 * Returns the xmlChar * for the first occurence or NULL.
1200 */
1201
1202const xmlChar *
1203xmlStrchr(const xmlChar *str, xmlChar val) {
1204 if (str == NULL) return(NULL);
1205 while (*str != 0) { /* non input consuming */
1206 if (*str == val) return((xmlChar *) str);
1207 str++;
1208 }
1209 return(NULL);
1210}
1211
1212/**
1213 * xmlStrstr:
1214 * @str: the xmlChar * array (haystack)
1215 * @val: the xmlChar to search (needle)
1216 *
1217 * a strstr for xmlChar's
1218 *
1219 * Returns the xmlChar * for the first occurence or NULL.
1220 */
1221
1222const xmlChar *
1223xmlStrstr(const xmlChar *str, xmlChar *val) {
1224 int n;
1225
1226 if (str == NULL) return(NULL);
1227 if (val == NULL) return(NULL);
1228 n = xmlStrlen(val);
1229
1230 if (n == 0) return(str);
1231 while (*str != 0) { /* non input consuming */
1232 if (*str == *val) {
1233 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1234 }
1235 str++;
1236 }
1237 return(NULL);
1238}
1239
1240/**
1241 * xmlStrcasestr:
1242 * @str: the xmlChar * array (haystack)
1243 * @val: the xmlChar to search (needle)
1244 *
1245 * a case-ignoring strstr for xmlChar's
1246 *
1247 * Returns the xmlChar * for the first occurence or NULL.
1248 */
1249
1250const xmlChar *
1251xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1252 int n;
1253
1254 if (str == NULL) return(NULL);
1255 if (val == NULL) return(NULL);
1256 n = xmlStrlen(val);
1257
1258 if (n == 0) return(str);
1259 while (*str != 0) { /* non input consuming */
1260 if (casemap[*str] == casemap[*val])
1261 if (!xmlStrncasecmp(str, val, n)) return(str);
1262 str++;
1263 }
1264 return(NULL);
1265}
1266
1267/**
1268 * xmlStrsub:
1269 * @str: the xmlChar * array (haystack)
1270 * @start: the index of the first char (zero based)
1271 * @len: the length of the substring
1272 *
1273 * Extract a substring of a given string
1274 *
1275 * Returns the xmlChar * for the first occurence or NULL.
1276 */
1277
1278xmlChar *
1279xmlStrsub(const xmlChar *str, int start, int len) {
1280 int i;
1281
1282 if (str == NULL) return(NULL);
1283 if (start < 0) return(NULL);
1284 if (len < 0) return(NULL);
1285
1286 for (i = 0;i < start;i++) {
1287 if (*str == 0) return(NULL);
1288 str++;
1289 }
1290 if (*str == 0) return(NULL);
1291 return(xmlStrndup(str, len));
1292}
1293
1294/**
1295 * xmlStrlen:
1296 * @str: the xmlChar * array
1297 *
1298 * length of a xmlChar's string
1299 *
1300 * Returns the number of xmlChar contained in the ARRAY.
1301 */
1302
1303int
1304xmlStrlen(const xmlChar *str) {
1305 int len = 0;
1306
1307 if (str == NULL) return(0);
1308 while (*str != 0) { /* non input consuming */
1309 str++;
1310 len++;
1311 }
1312 return(len);
1313}
1314
1315/**
1316 * xmlStrncat:
1317 * @cur: the original xmlChar * array
1318 * @add: the xmlChar * array added
1319 * @len: the length of @add
1320 *
1321 * a strncat for array of xmlChar's, it will extend cur with the len
1322 * first bytes of @add.
1323 *
1324 * Returns a new xmlChar *, the original @cur is reallocated if needed
1325 * and should not be freed
1326 */
1327
1328xmlChar *
1329xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1330 int size;
1331 xmlChar *ret;
1332
1333 if ((add == NULL) || (len == 0))
1334 return(cur);
1335 if (cur == NULL)
1336 return(xmlStrndup(add, len));
1337
1338 size = xmlStrlen(cur);
1339 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1340 if (ret == NULL) {
1341 xmlGenericError(xmlGenericErrorContext,
1342 "xmlStrncat: realloc of %ld byte failed\n",
1343 (size + len + 1) * (long)sizeof(xmlChar));
1344 return(cur);
1345 }
1346 memcpy(&ret[size], add, len * sizeof(xmlChar));
1347 ret[size + len] = 0;
1348 return(ret);
1349}
1350
1351/**
1352 * xmlStrcat:
1353 * @cur: the original xmlChar * array
1354 * @add: the xmlChar * array added
1355 *
1356 * a strcat for array of xmlChar's. Since they are supposed to be
1357 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1358 * a termination mark of '0'.
1359 *
1360 * Returns a new xmlChar * containing the concatenated string.
1361 */
1362xmlChar *
1363xmlStrcat(xmlChar *cur, const xmlChar *add) {
1364 const xmlChar *p = add;
1365
1366 if (add == NULL) return(cur);
1367 if (cur == NULL)
1368 return(xmlStrdup(add));
1369
1370 while (*p != 0) p++; /* non input consuming */
1371 return(xmlStrncat(cur, add, p - add));
1372}
1373
1374/************************************************************************
1375 * *
1376 * Commodity functions, cleanup needed ? *
1377 * *
1378 ************************************************************************/
1379
1380/**
1381 * areBlanks:
1382 * @ctxt: an XML parser context
1383 * @str: a xmlChar *
1384 * @len: the size of @str
1385 *
1386 * Is this a sequence of blank chars that one can ignore ?
1387 *
1388 * Returns 1 if ignorable 0 otherwise.
1389 */
1390
1391static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1392 int i, ret;
1393 xmlNodePtr lastChild;
1394
Daniel Veillard2f362242001-03-02 17:36:21 +00001395 if (ctxt->keepBlanks)
1396 return(0);
1397
Owen Taylor3473f882001-02-23 17:55:21 +00001398 /*
1399 * Check for xml:space value.
1400 */
1401 if (*(ctxt->space) == 1)
1402 return(0);
1403
1404 /*
1405 * Check that the string is made of blanks
1406 */
1407 for (i = 0;i < len;i++)
1408 if (!(IS_BLANK(str[i]))) return(0);
1409
1410 /*
1411 * Look if the element is mixed content in the Dtd if available
1412 */
1413 if (ctxt->myDoc != NULL) {
1414 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1415 if (ret == 0) return(1);
1416 if (ret == 1) return(0);
1417 }
1418
1419 /*
1420 * Otherwise, heuristic :-\
1421 */
Owen Taylor3473f882001-02-23 17:55:21 +00001422 if (RAW != '<') return(0);
1423 if (ctxt->node == NULL) return(0);
1424 if ((ctxt->node->children == NULL) &&
1425 (RAW == '<') && (NXT(1) == '/')) return(0);
1426
1427 lastChild = xmlGetLastChild(ctxt->node);
1428 if (lastChild == NULL) {
1429 if (ctxt->node->content != NULL) return(0);
1430 } else if (xmlNodeIsText(lastChild))
1431 return(0);
1432 else if ((ctxt->node->children != NULL) &&
1433 (xmlNodeIsText(ctxt->node->children)))
1434 return(0);
1435 return(1);
1436}
1437
1438/*
1439 * Forward definition for recusive behaviour.
1440 */
1441void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1442void xmlParseReference(xmlParserCtxtPtr ctxt);
1443
1444/************************************************************************
1445 * *
1446 * Extra stuff for namespace support *
1447 * Relates to http://www.w3.org/TR/WD-xml-names *
1448 * *
1449 ************************************************************************/
1450
1451/**
1452 * xmlSplitQName:
1453 * @ctxt: an XML parser context
1454 * @name: an XML parser context
1455 * @prefix: a xmlChar **
1456 *
1457 * parse an UTF8 encoded XML qualified name string
1458 *
1459 * [NS 5] QName ::= (Prefix ':')? LocalPart
1460 *
1461 * [NS 6] Prefix ::= NCName
1462 *
1463 * [NS 7] LocalPart ::= NCName
1464 *
1465 * Returns the local part, and prefix is updated
1466 * to get the Prefix if any.
1467 */
1468
1469xmlChar *
1470xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1471 xmlChar buf[XML_MAX_NAMELEN + 5];
1472 xmlChar *buffer = NULL;
1473 int len = 0;
1474 int max = XML_MAX_NAMELEN;
1475 xmlChar *ret = NULL;
1476 const xmlChar *cur = name;
1477 int c;
1478
1479 *prefix = NULL;
1480
1481#ifndef XML_XML_NAMESPACE
1482 /* xml: prefix is not really a namespace */
1483 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1484 (cur[2] == 'l') && (cur[3] == ':'))
1485 return(xmlStrdup(name));
1486#endif
1487
1488 /* nasty but valid */
1489 if (cur[0] == ':')
1490 return(xmlStrdup(name));
1491
1492 c = *cur++;
1493 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1494 buf[len++] = c;
1495 c = *cur++;
1496 }
1497 if (len >= max) {
1498 /*
1499 * Okay someone managed to make a huge name, so he's ready to pay
1500 * for the processing speed.
1501 */
1502 max = len * 2;
1503
1504 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1505 if (buffer == NULL) {
1506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1507 ctxt->sax->error(ctxt->userData,
1508 "xmlSplitQName: out of memory\n");
1509 return(NULL);
1510 }
1511 memcpy(buffer, buf, len);
1512 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1513 if (len + 10 > max) {
1514 max *= 2;
1515 buffer = (xmlChar *) xmlRealloc(buffer,
1516 max * sizeof(xmlChar));
1517 if (buffer == NULL) {
1518 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1519 ctxt->sax->error(ctxt->userData,
1520 "xmlSplitQName: out of memory\n");
1521 return(NULL);
1522 }
1523 }
1524 buffer[len++] = c;
1525 c = *cur++;
1526 }
1527 buffer[len] = 0;
1528 }
1529
1530 if (buffer == NULL)
1531 ret = xmlStrndup(buf, len);
1532 else {
1533 ret = buffer;
1534 buffer = NULL;
1535 max = XML_MAX_NAMELEN;
1536 }
1537
1538
1539 if (c == ':') {
1540 c = *cur++;
1541 if (c == 0) return(ret);
1542 *prefix = ret;
1543 len = 0;
1544
1545 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1546 buf[len++] = c;
1547 c = *cur++;
1548 }
1549 if (len >= max) {
1550 /*
1551 * Okay someone managed to make a huge name, so he's ready to pay
1552 * for the processing speed.
1553 */
1554 max = len * 2;
1555
1556 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1557 if (buffer == NULL) {
1558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1559 ctxt->sax->error(ctxt->userData,
1560 "xmlSplitQName: out of memory\n");
1561 return(NULL);
1562 }
1563 memcpy(buffer, buf, len);
1564 while (c != 0) { /* tested bigname2.xml */
1565 if (len + 10 > max) {
1566 max *= 2;
1567 buffer = (xmlChar *) xmlRealloc(buffer,
1568 max * sizeof(xmlChar));
1569 if (buffer == NULL) {
1570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1571 ctxt->sax->error(ctxt->userData,
1572 "xmlSplitQName: out of memory\n");
1573 return(NULL);
1574 }
1575 }
1576 buffer[len++] = c;
1577 c = *cur++;
1578 }
1579 buffer[len] = 0;
1580 }
1581
1582 if (buffer == NULL)
1583 ret = xmlStrndup(buf, len);
1584 else {
1585 ret = buffer;
1586 }
1587 }
1588
1589 return(ret);
1590}
1591
1592/************************************************************************
1593 * *
1594 * The parser itself *
1595 * Relates to http://www.w3.org/TR/REC-xml *
1596 * *
1597 ************************************************************************/
1598
Daniel Veillard21a0f912001-02-25 19:54:14 +00001599xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001600/**
1601 * xmlParseName:
1602 * @ctxt: an XML parser context
1603 *
1604 * parse an XML name.
1605 *
1606 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1607 * CombiningChar | Extender
1608 *
1609 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1610 *
1611 * [6] Names ::= Name (S Name)*
1612 *
1613 * Returns the Name parsed or NULL
1614 */
1615
1616xmlChar *
1617xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001618 const xmlChar *in;
1619 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001620 int count = 0;
1621
1622 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001623
1624 /*
1625 * Accelerator for simple ASCII names
1626 */
1627 in = ctxt->input->cur;
1628 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1629 ((*in >= 0x41) && (*in <= 0x5A)) ||
1630 (*in == '_') || (*in == ':')) {
1631 in++;
1632 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1633 ((*in >= 0x41) && (*in <= 0x5A)) ||
1634 ((*in >= 0x30) && (*in <= 0x39)) ||
1635 (*in == '_') || (*in == ':'))
1636 in++;
1637 if ((*in == ' ') || (*in == '>') || (*in == '/')) {
1638 count = in - ctxt->input->cur;
1639 ret = xmlStrndup(ctxt->input->cur, count);
1640 ctxt->input->cur = in;
1641 return(ret);
1642 }
1643 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001644 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001645}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001646
Daniel Veillard21a0f912001-02-25 19:54:14 +00001647xmlChar *
1648xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1649 xmlChar buf[XML_MAX_NAMELEN + 5];
1650 int len = 0, l;
1651 int c;
1652 int count = 0;
1653
1654 /*
1655 * Handler for more complex cases
1656 */
1657 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001658 c = CUR_CHAR(l);
1659 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1660 (!IS_LETTER(c) && (c != '_') &&
1661 (c != ':'))) {
1662 return(NULL);
1663 }
1664
1665 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1666 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1667 (c == '.') || (c == '-') ||
1668 (c == '_') || (c == ':') ||
1669 (IS_COMBINING(c)) ||
1670 (IS_EXTENDER(c)))) {
1671 if (count++ > 100) {
1672 count = 0;
1673 GROW;
1674 }
1675 COPY_BUF(l,buf,len,c);
1676 NEXTL(l);
1677 c = CUR_CHAR(l);
1678 if (len >= XML_MAX_NAMELEN) {
1679 /*
1680 * Okay someone managed to make a huge name, so he's ready to pay
1681 * for the processing speed.
1682 */
1683 xmlChar *buffer;
1684 int max = len * 2;
1685
1686 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1687 if (buffer == NULL) {
1688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1689 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001690 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001691 return(NULL);
1692 }
1693 memcpy(buffer, buf, len);
1694 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1695 (c == '.') || (c == '-') ||
1696 (c == '_') || (c == ':') ||
1697 (IS_COMBINING(c)) ||
1698 (IS_EXTENDER(c))) {
1699 if (count++ > 100) {
1700 count = 0;
1701 GROW;
1702 }
1703 if (len + 10 > max) {
1704 max *= 2;
1705 buffer = (xmlChar *) xmlRealloc(buffer,
1706 max * sizeof(xmlChar));
1707 if (buffer == NULL) {
1708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1709 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001710 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001711 return(NULL);
1712 }
1713 }
1714 COPY_BUF(l,buffer,len,c);
1715 NEXTL(l);
1716 c = CUR_CHAR(l);
1717 }
1718 buffer[len] = 0;
1719 return(buffer);
1720 }
1721 }
1722 return(xmlStrndup(buf, len));
1723}
1724
1725/**
1726 * xmlParseStringName:
1727 * @ctxt: an XML parser context
1728 * @str: a pointer to the string pointer (IN/OUT)
1729 *
1730 * parse an XML name.
1731 *
1732 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1733 * CombiningChar | Extender
1734 *
1735 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1736 *
1737 * [6] Names ::= Name (S Name)*
1738 *
1739 * Returns the Name parsed or NULL. The str pointer
1740 * is updated to the current location in the string.
1741 */
1742
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001743static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001744xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1745 xmlChar buf[XML_MAX_NAMELEN + 5];
1746 const xmlChar *cur = *str;
1747 int len = 0, l;
1748 int c;
1749
1750 c = CUR_SCHAR(cur, l);
1751 if (!IS_LETTER(c) && (c != '_') &&
1752 (c != ':')) {
1753 return(NULL);
1754 }
1755
1756 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1757 (c == '.') || (c == '-') ||
1758 (c == '_') || (c == ':') ||
1759 (IS_COMBINING(c)) ||
1760 (IS_EXTENDER(c))) {
1761 COPY_BUF(l,buf,len,c);
1762 cur += l;
1763 c = CUR_SCHAR(cur, l);
1764 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1765 /*
1766 * Okay someone managed to make a huge name, so he's ready to pay
1767 * for the processing speed.
1768 */
1769 xmlChar *buffer;
1770 int max = len * 2;
1771
1772 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1773 if (buffer == NULL) {
1774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1775 ctxt->sax->error(ctxt->userData,
1776 "xmlParseStringName: out of memory\n");
1777 return(NULL);
1778 }
1779 memcpy(buffer, buf, len);
1780 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1781 (c == '.') || (c == '-') ||
1782 (c == '_') || (c == ':') ||
1783 (IS_COMBINING(c)) ||
1784 (IS_EXTENDER(c))) {
1785 if (len + 10 > max) {
1786 max *= 2;
1787 buffer = (xmlChar *) xmlRealloc(buffer,
1788 max * sizeof(xmlChar));
1789 if (buffer == NULL) {
1790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1791 ctxt->sax->error(ctxt->userData,
1792 "xmlParseStringName: out of memory\n");
1793 return(NULL);
1794 }
1795 }
1796 COPY_BUF(l,buffer,len,c);
1797 cur += l;
1798 c = CUR_SCHAR(cur, l);
1799 }
1800 buffer[len] = 0;
1801 *str = cur;
1802 return(buffer);
1803 }
1804 }
1805 *str = cur;
1806 return(xmlStrndup(buf, len));
1807}
1808
1809/**
1810 * xmlParseNmtoken:
1811 * @ctxt: an XML parser context
1812 *
1813 * parse an XML Nmtoken.
1814 *
1815 * [7] Nmtoken ::= (NameChar)+
1816 *
1817 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1818 *
1819 * Returns the Nmtoken parsed or NULL
1820 */
1821
1822xmlChar *
1823xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1824 xmlChar buf[XML_MAX_NAMELEN + 5];
1825 int len = 0, l;
1826 int c;
1827 int count = 0;
1828
1829 GROW;
1830 c = CUR_CHAR(l);
1831
1832 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1833 (c == '.') || (c == '-') ||
1834 (c == '_') || (c == ':') ||
1835 (IS_COMBINING(c)) ||
1836 (IS_EXTENDER(c))) {
1837 if (count++ > 100) {
1838 count = 0;
1839 GROW;
1840 }
1841 COPY_BUF(l,buf,len,c);
1842 NEXTL(l);
1843 c = CUR_CHAR(l);
1844 if (len >= XML_MAX_NAMELEN) {
1845 /*
1846 * Okay someone managed to make a huge token, so he's ready to pay
1847 * for the processing speed.
1848 */
1849 xmlChar *buffer;
1850 int max = len * 2;
1851
1852 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1853 if (buffer == NULL) {
1854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1855 ctxt->sax->error(ctxt->userData,
1856 "xmlParseNmtoken: out of memory\n");
1857 return(NULL);
1858 }
1859 memcpy(buffer, buf, len);
1860 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1861 (c == '.') || (c == '-') ||
1862 (c == '_') || (c == ':') ||
1863 (IS_COMBINING(c)) ||
1864 (IS_EXTENDER(c))) {
1865 if (count++ > 100) {
1866 count = 0;
1867 GROW;
1868 }
1869 if (len + 10 > max) {
1870 max *= 2;
1871 buffer = (xmlChar *) xmlRealloc(buffer,
1872 max * sizeof(xmlChar));
1873 if (buffer == NULL) {
1874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1875 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001876 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001877 return(NULL);
1878 }
1879 }
1880 COPY_BUF(l,buffer,len,c);
1881 NEXTL(l);
1882 c = CUR_CHAR(l);
1883 }
1884 buffer[len] = 0;
1885 return(buffer);
1886 }
1887 }
1888 if (len == 0)
1889 return(NULL);
1890 return(xmlStrndup(buf, len));
1891}
1892
1893/**
1894 * xmlParseEntityValue:
1895 * @ctxt: an XML parser context
1896 * @orig: if non-NULL store a copy of the original entity value
1897 *
1898 * parse a value for ENTITY declarations
1899 *
1900 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1901 * "'" ([^%&'] | PEReference | Reference)* "'"
1902 *
1903 * Returns the EntityValue parsed with reference substitued or NULL
1904 */
1905
1906xmlChar *
1907xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1908 xmlChar *buf = NULL;
1909 int len = 0;
1910 int size = XML_PARSER_BUFFER_SIZE;
1911 int c, l;
1912 xmlChar stop;
1913 xmlChar *ret = NULL;
1914 const xmlChar *cur = NULL;
1915 xmlParserInputPtr input;
1916
1917 if (RAW == '"') stop = '"';
1918 else if (RAW == '\'') stop = '\'';
1919 else {
1920 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1922 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1923 ctxt->wellFormed = 0;
1924 ctxt->disableSAX = 1;
1925 return(NULL);
1926 }
1927 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1928 if (buf == NULL) {
1929 xmlGenericError(xmlGenericErrorContext,
1930 "malloc of %d byte failed\n", size);
1931 return(NULL);
1932 }
1933
1934 /*
1935 * The content of the entity definition is copied in a buffer.
1936 */
1937
1938 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1939 input = ctxt->input;
1940 GROW;
1941 NEXT;
1942 c = CUR_CHAR(l);
1943 /*
1944 * NOTE: 4.4.5 Included in Literal
1945 * When a parameter entity reference appears in a literal entity
1946 * value, ... a single or double quote character in the replacement
1947 * text is always treated as a normal data character and will not
1948 * terminate the literal.
1949 * In practice it means we stop the loop only when back at parsing
1950 * the initial entity and the quote is found
1951 */
1952 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1953 (ctxt->input != input))) {
1954 if (len + 5 >= size) {
1955 size *= 2;
1956 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1957 if (buf == NULL) {
1958 xmlGenericError(xmlGenericErrorContext,
1959 "realloc of %d byte failed\n", size);
1960 return(NULL);
1961 }
1962 }
1963 COPY_BUF(l,buf,len,c);
1964 NEXTL(l);
1965 /*
1966 * Pop-up of finished entities.
1967 */
1968 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1969 xmlPopInput(ctxt);
1970
1971 GROW;
1972 c = CUR_CHAR(l);
1973 if (c == 0) {
1974 GROW;
1975 c = CUR_CHAR(l);
1976 }
1977 }
1978 buf[len] = 0;
1979
1980 /*
1981 * Raise problem w.r.t. '&' and '%' being used in non-entities
1982 * reference constructs. Note Charref will be handled in
1983 * xmlStringDecodeEntities()
1984 */
1985 cur = buf;
1986 while (*cur != 0) { /* non input consuming */
1987 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1988 xmlChar *name;
1989 xmlChar tmp = *cur;
1990
1991 cur++;
1992 name = xmlParseStringName(ctxt, &cur);
1993 if ((name == NULL) || (*cur != ';')) {
1994 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1996 ctxt->sax->error(ctxt->userData,
1997 "EntityValue: '%c' forbidden except for entities references\n",
1998 tmp);
1999 ctxt->wellFormed = 0;
2000 ctxt->disableSAX = 1;
2001 }
2002 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2003 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2005 ctxt->sax->error(ctxt->userData,
2006 "EntityValue: PEReferences forbidden in internal subset\n",
2007 tmp);
2008 ctxt->wellFormed = 0;
2009 ctxt->disableSAX = 1;
2010 }
2011 if (name != NULL)
2012 xmlFree(name);
2013 }
2014 cur++;
2015 }
2016
2017 /*
2018 * Then PEReference entities are substituted.
2019 */
2020 if (c != stop) {
2021 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2023 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2024 ctxt->wellFormed = 0;
2025 ctxt->disableSAX = 1;
2026 xmlFree(buf);
2027 } else {
2028 NEXT;
2029 /*
2030 * NOTE: 4.4.7 Bypassed
2031 * When a general entity reference appears in the EntityValue in
2032 * an entity declaration, it is bypassed and left as is.
2033 * so XML_SUBSTITUTE_REF is not set here.
2034 */
2035 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2036 0, 0, 0);
2037 if (orig != NULL)
2038 *orig = buf;
2039 else
2040 xmlFree(buf);
2041 }
2042
2043 return(ret);
2044}
2045
2046/**
2047 * xmlParseAttValue:
2048 * @ctxt: an XML parser context
2049 *
2050 * parse a value for an attribute
2051 * Note: the parser won't do substitution of entities here, this
2052 * will be handled later in xmlStringGetNodeList
2053 *
2054 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2055 * "'" ([^<&'] | Reference)* "'"
2056 *
2057 * 3.3.3 Attribute-Value Normalization:
2058 * Before the value of an attribute is passed to the application or
2059 * checked for validity, the XML processor must normalize it as follows:
2060 * - a character reference is processed by appending the referenced
2061 * character to the attribute value
2062 * - an entity reference is processed by recursively processing the
2063 * replacement text of the entity
2064 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2065 * appending #x20 to the normalized value, except that only a single
2066 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2067 * parsed entity or the literal entity value of an internal parsed entity
2068 * - other characters are processed by appending them to the normalized value
2069 * If the declared value is not CDATA, then the XML processor must further
2070 * process the normalized attribute value by discarding any leading and
2071 * trailing space (#x20) characters, and by replacing sequences of space
2072 * (#x20) characters by a single space (#x20) character.
2073 * All attributes for which no declaration has been read should be treated
2074 * by a non-validating parser as if declared CDATA.
2075 *
2076 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2077 */
2078
2079xmlChar *
2080xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2081 xmlChar limit = 0;
2082 xmlChar *buf = NULL;
2083 int len = 0;
2084 int buf_size = 0;
2085 int c, l;
2086 xmlChar *current = NULL;
2087 xmlEntityPtr ent;
2088
2089
2090 SHRINK;
2091 if (NXT(0) == '"') {
2092 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2093 limit = '"';
2094 NEXT;
2095 } else if (NXT(0) == '\'') {
2096 limit = '\'';
2097 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2098 NEXT;
2099 } else {
2100 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2102 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2103 ctxt->wellFormed = 0;
2104 ctxt->disableSAX = 1;
2105 return(NULL);
2106 }
2107
2108 /*
2109 * allocate a translation buffer.
2110 */
2111 buf_size = XML_PARSER_BUFFER_SIZE;
2112 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2113 if (buf == NULL) {
2114 perror("xmlParseAttValue: malloc failed");
2115 return(NULL);
2116 }
2117
2118 /*
2119 * Ok loop until we reach one of the ending char or a size limit.
2120 */
2121 c = CUR_CHAR(l);
2122 while (((NXT(0) != limit) && /* checked */
2123 (c != '<')) || (ctxt->token != 0)) {
2124 if (c == 0) break;
2125 if (ctxt->token == '&') {
2126 /*
2127 * The reparsing will be done in xmlStringGetNodeList()
2128 * called by the attribute() function in SAX.c
2129 */
2130 static xmlChar buffer[6] = "&#38;";
2131
2132 if (len > buf_size - 10) {
2133 growBuffer(buf);
2134 }
2135 current = &buffer[0];
2136 while (*current != 0) { /* non input consuming */
2137 buf[len++] = *current++;
2138 }
2139 ctxt->token = 0;
2140 } else if (c == '&') {
2141 if (NXT(1) == '#') {
2142 int val = xmlParseCharRef(ctxt);
2143 if (val == '&') {
2144 /*
2145 * The reparsing will be done in xmlStringGetNodeList()
2146 * called by the attribute() function in SAX.c
2147 */
2148 static xmlChar buffer[6] = "&#38;";
2149
2150 if (len > buf_size - 10) {
2151 growBuffer(buf);
2152 }
2153 current = &buffer[0];
2154 while (*current != 0) { /* non input consuming */
2155 buf[len++] = *current++;
2156 }
2157 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002158 if (len > buf_size - 10) {
2159 growBuffer(buf);
2160 }
Owen Taylor3473f882001-02-23 17:55:21 +00002161 len += xmlCopyChar(0, &buf[len], val);
2162 }
2163 } else {
2164 ent = xmlParseEntityRef(ctxt);
2165 if ((ent != NULL) &&
2166 (ctxt->replaceEntities != 0)) {
2167 xmlChar *rep;
2168
2169 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2170 rep = xmlStringDecodeEntities(ctxt, ent->content,
2171 XML_SUBSTITUTE_REF, 0, 0, 0);
2172 if (rep != NULL) {
2173 current = rep;
2174 while (*current != 0) { /* non input consuming */
2175 buf[len++] = *current++;
2176 if (len > buf_size - 10) {
2177 growBuffer(buf);
2178 }
2179 }
2180 xmlFree(rep);
2181 }
2182 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002183 if (len > buf_size - 10) {
2184 growBuffer(buf);
2185 }
Owen Taylor3473f882001-02-23 17:55:21 +00002186 if (ent->content != NULL)
2187 buf[len++] = ent->content[0];
2188 }
2189 } else if (ent != NULL) {
2190 int i = xmlStrlen(ent->name);
2191 const xmlChar *cur = ent->name;
2192
2193 /*
2194 * This may look absurd but is needed to detect
2195 * entities problems
2196 */
2197 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2198 (ent->content != NULL)) {
2199 xmlChar *rep;
2200 rep = xmlStringDecodeEntities(ctxt, ent->content,
2201 XML_SUBSTITUTE_REF, 0, 0, 0);
2202 if (rep != NULL)
2203 xmlFree(rep);
2204 }
2205
2206 /*
2207 * Just output the reference
2208 */
2209 buf[len++] = '&';
2210 if (len > buf_size - i - 10) {
2211 growBuffer(buf);
2212 }
2213 for (;i > 0;i--)
2214 buf[len++] = *cur++;
2215 buf[len++] = ';';
2216 }
2217 }
2218 } else {
2219 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2220 COPY_BUF(l,buf,len,0x20);
2221 if (len > buf_size - 10) {
2222 growBuffer(buf);
2223 }
2224 } else {
2225 COPY_BUF(l,buf,len,c);
2226 if (len > buf_size - 10) {
2227 growBuffer(buf);
2228 }
2229 }
2230 NEXTL(l);
2231 }
2232 GROW;
2233 c = CUR_CHAR(l);
2234 }
2235 buf[len++] = 0;
2236 if (RAW == '<') {
2237 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2239 ctxt->sax->error(ctxt->userData,
2240 "Unescaped '<' not allowed in attributes values\n");
2241 ctxt->wellFormed = 0;
2242 ctxt->disableSAX = 1;
2243 } else if (RAW != limit) {
2244 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2246 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2247 ctxt->wellFormed = 0;
2248 ctxt->disableSAX = 1;
2249 } else
2250 NEXT;
2251 return(buf);
2252}
2253
2254/**
2255 * xmlParseSystemLiteral:
2256 * @ctxt: an XML parser context
2257 *
2258 * parse an XML Literal
2259 *
2260 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2261 *
2262 * Returns the SystemLiteral parsed or NULL
2263 */
2264
2265xmlChar *
2266xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2267 xmlChar *buf = NULL;
2268 int len = 0;
2269 int size = XML_PARSER_BUFFER_SIZE;
2270 int cur, l;
2271 xmlChar stop;
2272 int state = ctxt->instate;
2273 int count = 0;
2274
2275 SHRINK;
2276 if (RAW == '"') {
2277 NEXT;
2278 stop = '"';
2279 } else if (RAW == '\'') {
2280 NEXT;
2281 stop = '\'';
2282 } else {
2283 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2285 ctxt->sax->error(ctxt->userData,
2286 "SystemLiteral \" or ' expected\n");
2287 ctxt->wellFormed = 0;
2288 ctxt->disableSAX = 1;
2289 return(NULL);
2290 }
2291
2292 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2293 if (buf == NULL) {
2294 xmlGenericError(xmlGenericErrorContext,
2295 "malloc of %d byte failed\n", size);
2296 return(NULL);
2297 }
2298 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2299 cur = CUR_CHAR(l);
2300 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2301 if (len + 5 >= size) {
2302 size *= 2;
2303 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2304 if (buf == NULL) {
2305 xmlGenericError(xmlGenericErrorContext,
2306 "realloc of %d byte failed\n", size);
2307 ctxt->instate = (xmlParserInputState) state;
2308 return(NULL);
2309 }
2310 }
2311 count++;
2312 if (count > 50) {
2313 GROW;
2314 count = 0;
2315 }
2316 COPY_BUF(l,buf,len,cur);
2317 NEXTL(l);
2318 cur = CUR_CHAR(l);
2319 if (cur == 0) {
2320 GROW;
2321 SHRINK;
2322 cur = CUR_CHAR(l);
2323 }
2324 }
2325 buf[len] = 0;
2326 ctxt->instate = (xmlParserInputState) state;
2327 if (!IS_CHAR(cur)) {
2328 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2330 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2331 ctxt->wellFormed = 0;
2332 ctxt->disableSAX = 1;
2333 } else {
2334 NEXT;
2335 }
2336 return(buf);
2337}
2338
2339/**
2340 * xmlParsePubidLiteral:
2341 * @ctxt: an XML parser context
2342 *
2343 * parse an XML public literal
2344 *
2345 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2346 *
2347 * Returns the PubidLiteral parsed or NULL.
2348 */
2349
2350xmlChar *
2351xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2352 xmlChar *buf = NULL;
2353 int len = 0;
2354 int size = XML_PARSER_BUFFER_SIZE;
2355 xmlChar cur;
2356 xmlChar stop;
2357 int count = 0;
2358
2359 SHRINK;
2360 if (RAW == '"') {
2361 NEXT;
2362 stop = '"';
2363 } else if (RAW == '\'') {
2364 NEXT;
2365 stop = '\'';
2366 } else {
2367 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2369 ctxt->sax->error(ctxt->userData,
2370 "SystemLiteral \" or ' expected\n");
2371 ctxt->wellFormed = 0;
2372 ctxt->disableSAX = 1;
2373 return(NULL);
2374 }
2375 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2376 if (buf == NULL) {
2377 xmlGenericError(xmlGenericErrorContext,
2378 "malloc of %d byte failed\n", size);
2379 return(NULL);
2380 }
2381 cur = CUR;
2382 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2383 if (len + 1 >= size) {
2384 size *= 2;
2385 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2386 if (buf == NULL) {
2387 xmlGenericError(xmlGenericErrorContext,
2388 "realloc of %d byte failed\n", size);
2389 return(NULL);
2390 }
2391 }
2392 buf[len++] = cur;
2393 count++;
2394 if (count > 50) {
2395 GROW;
2396 count = 0;
2397 }
2398 NEXT;
2399 cur = CUR;
2400 if (cur == 0) {
2401 GROW;
2402 SHRINK;
2403 cur = CUR;
2404 }
2405 }
2406 buf[len] = 0;
2407 if (cur != stop) {
2408 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2410 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2411 ctxt->wellFormed = 0;
2412 ctxt->disableSAX = 1;
2413 } else {
2414 NEXT;
2415 }
2416 return(buf);
2417}
2418
Daniel Veillard48b2f892001-02-25 16:11:03 +00002419void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002420/**
2421 * xmlParseCharData:
2422 * @ctxt: an XML parser context
2423 * @cdata: int indicating whether we are within a CDATA section
2424 *
2425 * parse a CharData section.
2426 * if we are within a CDATA section ']]>' marks an end of section.
2427 *
2428 * The right angle bracket (>) may be represented using the string "&gt;",
2429 * and must, for compatibility, be escaped using "&gt;" or a character
2430 * reference when it appears in the string "]]>" in content, when that
2431 * string is not marking the end of a CDATA section.
2432 *
2433 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2434 */
2435
2436void
2437xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002438 const xmlChar *in;
2439 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002440 int line = ctxt->input->line;
2441 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002442
2443 SHRINK;
2444 GROW;
2445 /*
2446 * Accelerated common case where input don't need to be
2447 * modified before passing it to the handler.
2448 */
2449 if ((ctxt->token == 0) && (!cdata)) {
2450 in = ctxt->input->cur;
2451 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002452get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002453 while (((*in >= 0x20) && (*in != '<') &&
2454 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2455 in++;
2456 if (*in == 0xA) {
2457 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002458 in++;
2459 while (*in == 0xA) {
2460 ctxt->input->line++;
2461 in++;
2462 }
2463 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002464 }
2465 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002466 if (nbchar > 0) {
2467 if (IS_BLANK(*ctxt->input->cur) &&
2468 areBlanks(ctxt, ctxt->input->cur, nbchar)) {
2469 if (ctxt->sax->ignorableWhitespace != NULL)
2470 ctxt->sax->ignorableWhitespace(ctxt->userData,
2471 ctxt->input->cur, nbchar);
2472 } else {
2473 if (ctxt->sax->characters != NULL)
2474 ctxt->sax->characters(ctxt->userData,
2475 ctxt->input->cur, nbchar);
2476 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002477 }
2478 ctxt->input->cur = in;
2479 if (*in == 0xD) {
2480 in++;
2481 if (*in == 0xA) {
2482 ctxt->input->cur = in;
2483 in++;
2484 ctxt->input->line++;
2485 continue; /* while */
2486 }
2487 in--;
2488 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002489 if (*in == '<') {
2490 return;
2491 }
2492 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002493 return;
2494 }
2495 SHRINK;
2496 GROW;
2497 in = ctxt->input->cur;
2498 } while ((*in >= 0x20) && (*in <= 0x7F));
2499 nbchar = 0;
2500 }
Daniel Veillard50582112001-03-26 22:52:16 +00002501 ctxt->input->line = line;
2502 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002503 xmlParseCharDataComplex(ctxt, cdata);
2504}
2505
2506void
2507xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002508 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2509 int nbchar = 0;
2510 int cur, l;
2511 int count = 0;
2512
2513 SHRINK;
2514 GROW;
2515 cur = CUR_CHAR(l);
2516 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2517 ((cur != '&') || (ctxt->token == '&')) &&
2518 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2519 if ((cur == ']') && (NXT(1) == ']') &&
2520 (NXT(2) == '>')) {
2521 if (cdata) break;
2522 else {
2523 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2525 ctxt->sax->error(ctxt->userData,
2526 "Sequence ']]>' not allowed in content\n");
2527 /* Should this be relaxed ??? I see a "must here */
2528 ctxt->wellFormed = 0;
2529 ctxt->disableSAX = 1;
2530 }
2531 }
2532 COPY_BUF(l,buf,nbchar,cur);
2533 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2534 /*
2535 * Ok the segment is to be consumed as chars.
2536 */
2537 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2538 if (areBlanks(ctxt, buf, nbchar)) {
2539 if (ctxt->sax->ignorableWhitespace != NULL)
2540 ctxt->sax->ignorableWhitespace(ctxt->userData,
2541 buf, nbchar);
2542 } else {
2543 if (ctxt->sax->characters != NULL)
2544 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2545 }
2546 }
2547 nbchar = 0;
2548 }
2549 count++;
2550 if (count > 50) {
2551 GROW;
2552 count = 0;
2553 }
2554 NEXTL(l);
2555 cur = CUR_CHAR(l);
2556 }
2557 if (nbchar != 0) {
2558 /*
2559 * Ok the segment is to be consumed as chars.
2560 */
2561 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2562 if (areBlanks(ctxt, buf, nbchar)) {
2563 if (ctxt->sax->ignorableWhitespace != NULL)
2564 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2565 } else {
2566 if (ctxt->sax->characters != NULL)
2567 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2568 }
2569 }
2570 }
2571}
2572
2573/**
2574 * xmlParseExternalID:
2575 * @ctxt: an XML parser context
2576 * @publicID: a xmlChar** receiving PubidLiteral
2577 * @strict: indicate whether we should restrict parsing to only
2578 * production [75], see NOTE below
2579 *
2580 * Parse an External ID or a Public ID
2581 *
2582 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2583 * 'PUBLIC' S PubidLiteral S SystemLiteral
2584 *
2585 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2586 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2587 *
2588 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2589 *
2590 * Returns the function returns SystemLiteral and in the second
2591 * case publicID receives PubidLiteral, is strict is off
2592 * it is possible to return NULL and have publicID set.
2593 */
2594
2595xmlChar *
2596xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2597 xmlChar *URI = NULL;
2598
2599 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002600
2601 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002602 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2603 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2604 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2605 SKIP(6);
2606 if (!IS_BLANK(CUR)) {
2607 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2609 ctxt->sax->error(ctxt->userData,
2610 "Space required after 'SYSTEM'\n");
2611 ctxt->wellFormed = 0;
2612 ctxt->disableSAX = 1;
2613 }
2614 SKIP_BLANKS;
2615 URI = xmlParseSystemLiteral(ctxt);
2616 if (URI == NULL) {
2617 ctxt->errNo = XML_ERR_URI_REQUIRED;
2618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2619 ctxt->sax->error(ctxt->userData,
2620 "xmlParseExternalID: SYSTEM, no URI\n");
2621 ctxt->wellFormed = 0;
2622 ctxt->disableSAX = 1;
2623 }
2624 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2625 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2626 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2627 SKIP(6);
2628 if (!IS_BLANK(CUR)) {
2629 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2631 ctxt->sax->error(ctxt->userData,
2632 "Space required after 'PUBLIC'\n");
2633 ctxt->wellFormed = 0;
2634 ctxt->disableSAX = 1;
2635 }
2636 SKIP_BLANKS;
2637 *publicID = xmlParsePubidLiteral(ctxt);
2638 if (*publicID == NULL) {
2639 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2641 ctxt->sax->error(ctxt->userData,
2642 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2643 ctxt->wellFormed = 0;
2644 ctxt->disableSAX = 1;
2645 }
2646 if (strict) {
2647 /*
2648 * We don't handle [83] so "S SystemLiteral" is required.
2649 */
2650 if (!IS_BLANK(CUR)) {
2651 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2653 ctxt->sax->error(ctxt->userData,
2654 "Space required after the Public Identifier\n");
2655 ctxt->wellFormed = 0;
2656 ctxt->disableSAX = 1;
2657 }
2658 } else {
2659 /*
2660 * We handle [83] so we return immediately, if
2661 * "S SystemLiteral" is not detected. From a purely parsing
2662 * point of view that's a nice mess.
2663 */
2664 const xmlChar *ptr;
2665 GROW;
2666
2667 ptr = CUR_PTR;
2668 if (!IS_BLANK(*ptr)) return(NULL);
2669
2670 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2671 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2672 }
2673 SKIP_BLANKS;
2674 URI = xmlParseSystemLiteral(ctxt);
2675 if (URI == NULL) {
2676 ctxt->errNo = XML_ERR_URI_REQUIRED;
2677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2678 ctxt->sax->error(ctxt->userData,
2679 "xmlParseExternalID: PUBLIC, no URI\n");
2680 ctxt->wellFormed = 0;
2681 ctxt->disableSAX = 1;
2682 }
2683 }
2684 return(URI);
2685}
2686
2687/**
2688 * xmlParseComment:
2689 * @ctxt: an XML parser context
2690 *
2691 * Skip an XML (SGML) comment <!-- .... -->
2692 * The spec says that "For compatibility, the string "--" (double-hyphen)
2693 * must not occur within comments. "
2694 *
2695 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2696 */
2697void
2698xmlParseComment(xmlParserCtxtPtr ctxt) {
2699 xmlChar *buf = NULL;
2700 int len;
2701 int size = XML_PARSER_BUFFER_SIZE;
2702 int q, ql;
2703 int r, rl;
2704 int cur, l;
2705 xmlParserInputState state;
2706 xmlParserInputPtr input = ctxt->input;
2707 int count = 0;
2708
2709 /*
2710 * Check that there is a comment right here.
2711 */
2712 if ((RAW != '<') || (NXT(1) != '!') ||
2713 (NXT(2) != '-') || (NXT(3) != '-')) return;
2714
2715 state = ctxt->instate;
2716 ctxt->instate = XML_PARSER_COMMENT;
2717 SHRINK;
2718 SKIP(4);
2719 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2720 if (buf == NULL) {
2721 xmlGenericError(xmlGenericErrorContext,
2722 "malloc of %d byte failed\n", size);
2723 ctxt->instate = state;
2724 return;
2725 }
2726 q = CUR_CHAR(ql);
2727 NEXTL(ql);
2728 r = CUR_CHAR(rl);
2729 NEXTL(rl);
2730 cur = CUR_CHAR(l);
2731 len = 0;
2732 while (IS_CHAR(cur) && /* checked */
2733 ((cur != '>') ||
2734 (r != '-') || (q != '-'))) {
2735 if ((r == '-') && (q == '-') && (len > 1)) {
2736 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2738 ctxt->sax->error(ctxt->userData,
2739 "Comment must not contain '--' (double-hyphen)`\n");
2740 ctxt->wellFormed = 0;
2741 ctxt->disableSAX = 1;
2742 }
2743 if (len + 5 >= size) {
2744 size *= 2;
2745 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2746 if (buf == NULL) {
2747 xmlGenericError(xmlGenericErrorContext,
2748 "realloc of %d byte failed\n", size);
2749 ctxt->instate = state;
2750 return;
2751 }
2752 }
2753 COPY_BUF(ql,buf,len,q);
2754 q = r;
2755 ql = rl;
2756 r = cur;
2757 rl = l;
2758
2759 count++;
2760 if (count > 50) {
2761 GROW;
2762 count = 0;
2763 }
2764 NEXTL(l);
2765 cur = CUR_CHAR(l);
2766 if (cur == 0) {
2767 SHRINK;
2768 GROW;
2769 cur = CUR_CHAR(l);
2770 }
2771 }
2772 buf[len] = 0;
2773 if (!IS_CHAR(cur)) {
2774 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2776 ctxt->sax->error(ctxt->userData,
2777 "Comment not terminated \n<!--%.50s\n", buf);
2778 ctxt->wellFormed = 0;
2779 ctxt->disableSAX = 1;
2780 xmlFree(buf);
2781 } else {
2782 if (input != ctxt->input) {
2783 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2785 ctxt->sax->error(ctxt->userData,
2786"Comment doesn't start and stop in the same entity\n");
2787 ctxt->wellFormed = 0;
2788 ctxt->disableSAX = 1;
2789 }
2790 NEXT;
2791 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2792 (!ctxt->disableSAX))
2793 ctxt->sax->comment(ctxt->userData, buf);
2794 xmlFree(buf);
2795 }
2796 ctxt->instate = state;
2797}
2798
2799/**
2800 * xmlParsePITarget:
2801 * @ctxt: an XML parser context
2802 *
2803 * parse the name of a PI
2804 *
2805 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2806 *
2807 * Returns the PITarget name or NULL
2808 */
2809
2810xmlChar *
2811xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2812 xmlChar *name;
2813
2814 name = xmlParseName(ctxt);
2815 if ((name != NULL) &&
2816 ((name[0] == 'x') || (name[0] == 'X')) &&
2817 ((name[1] == 'm') || (name[1] == 'M')) &&
2818 ((name[2] == 'l') || (name[2] == 'L'))) {
2819 int i;
2820 if ((name[0] == 'x') && (name[1] == 'm') &&
2821 (name[2] == 'l') && (name[3] == 0)) {
2822 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2824 ctxt->sax->error(ctxt->userData,
2825 "XML declaration allowed only at the start of the document\n");
2826 ctxt->wellFormed = 0;
2827 ctxt->disableSAX = 1;
2828 return(name);
2829 } else if (name[3] == 0) {
2830 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2832 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2833 ctxt->wellFormed = 0;
2834 ctxt->disableSAX = 1;
2835 return(name);
2836 }
2837 for (i = 0;;i++) {
2838 if (xmlW3CPIs[i] == NULL) break;
2839 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2840 return(name);
2841 }
2842 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2843 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2844 ctxt->sax->warning(ctxt->userData,
2845 "xmlParsePItarget: invalid name prefix 'xml'\n");
2846 }
2847 }
2848 return(name);
2849}
2850
2851/**
2852 * xmlParsePI:
2853 * @ctxt: an XML parser context
2854 *
2855 * parse an XML Processing Instruction.
2856 *
2857 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2858 *
2859 * The processing is transfered to SAX once parsed.
2860 */
2861
2862void
2863xmlParsePI(xmlParserCtxtPtr ctxt) {
2864 xmlChar *buf = NULL;
2865 int len = 0;
2866 int size = XML_PARSER_BUFFER_SIZE;
2867 int cur, l;
2868 xmlChar *target;
2869 xmlParserInputState state;
2870 int count = 0;
2871
2872 if ((RAW == '<') && (NXT(1) == '?')) {
2873 xmlParserInputPtr input = ctxt->input;
2874 state = ctxt->instate;
2875 ctxt->instate = XML_PARSER_PI;
2876 /*
2877 * this is a Processing Instruction.
2878 */
2879 SKIP(2);
2880 SHRINK;
2881
2882 /*
2883 * Parse the target name and check for special support like
2884 * namespace.
2885 */
2886 target = xmlParsePITarget(ctxt);
2887 if (target != NULL) {
2888 if ((RAW == '?') && (NXT(1) == '>')) {
2889 if (input != ctxt->input) {
2890 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2892 ctxt->sax->error(ctxt->userData,
2893 "PI declaration doesn't start and stop in the same entity\n");
2894 ctxt->wellFormed = 0;
2895 ctxt->disableSAX = 1;
2896 }
2897 SKIP(2);
2898
2899 /*
2900 * SAX: PI detected.
2901 */
2902 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2903 (ctxt->sax->processingInstruction != NULL))
2904 ctxt->sax->processingInstruction(ctxt->userData,
2905 target, NULL);
2906 ctxt->instate = state;
2907 xmlFree(target);
2908 return;
2909 }
2910 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2911 if (buf == NULL) {
2912 xmlGenericError(xmlGenericErrorContext,
2913 "malloc of %d byte failed\n", size);
2914 ctxt->instate = state;
2915 return;
2916 }
2917 cur = CUR;
2918 if (!IS_BLANK(cur)) {
2919 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2921 ctxt->sax->error(ctxt->userData,
2922 "xmlParsePI: PI %s space expected\n", target);
2923 ctxt->wellFormed = 0;
2924 ctxt->disableSAX = 1;
2925 }
2926 SKIP_BLANKS;
2927 cur = CUR_CHAR(l);
2928 while (IS_CHAR(cur) && /* checked */
2929 ((cur != '?') || (NXT(1) != '>'))) {
2930 if (len + 5 >= size) {
2931 size *= 2;
2932 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2933 if (buf == NULL) {
2934 xmlGenericError(xmlGenericErrorContext,
2935 "realloc of %d byte failed\n", size);
2936 ctxt->instate = state;
2937 return;
2938 }
2939 }
2940 count++;
2941 if (count > 50) {
2942 GROW;
2943 count = 0;
2944 }
2945 COPY_BUF(l,buf,len,cur);
2946 NEXTL(l);
2947 cur = CUR_CHAR(l);
2948 if (cur == 0) {
2949 SHRINK;
2950 GROW;
2951 cur = CUR_CHAR(l);
2952 }
2953 }
2954 buf[len] = 0;
2955 if (cur != '?') {
2956 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2957 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2958 ctxt->sax->error(ctxt->userData,
2959 "xmlParsePI: PI %s never end ...\n", target);
2960 ctxt->wellFormed = 0;
2961 ctxt->disableSAX = 1;
2962 } else {
2963 if (input != ctxt->input) {
2964 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2966 ctxt->sax->error(ctxt->userData,
2967 "PI declaration doesn't start and stop in the same entity\n");
2968 ctxt->wellFormed = 0;
2969 ctxt->disableSAX = 1;
2970 }
2971 SKIP(2);
2972
2973 /*
2974 * SAX: PI detected.
2975 */
2976 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2977 (ctxt->sax->processingInstruction != NULL))
2978 ctxt->sax->processingInstruction(ctxt->userData,
2979 target, buf);
2980 }
2981 xmlFree(buf);
2982 xmlFree(target);
2983 } else {
2984 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
2985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2986 ctxt->sax->error(ctxt->userData,
2987 "xmlParsePI : no target name\n");
2988 ctxt->wellFormed = 0;
2989 ctxt->disableSAX = 1;
2990 }
2991 ctxt->instate = state;
2992 }
2993}
2994
2995/**
2996 * xmlParseNotationDecl:
2997 * @ctxt: an XML parser context
2998 *
2999 * parse a notation declaration
3000 *
3001 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3002 *
3003 * Hence there is actually 3 choices:
3004 * 'PUBLIC' S PubidLiteral
3005 * 'PUBLIC' S PubidLiteral S SystemLiteral
3006 * and 'SYSTEM' S SystemLiteral
3007 *
3008 * See the NOTE on xmlParseExternalID().
3009 */
3010
3011void
3012xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3013 xmlChar *name;
3014 xmlChar *Pubid;
3015 xmlChar *Systemid;
3016
3017 if ((RAW == '<') && (NXT(1) == '!') &&
3018 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3019 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3020 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3021 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3022 xmlParserInputPtr input = ctxt->input;
3023 SHRINK;
3024 SKIP(10);
3025 if (!IS_BLANK(CUR)) {
3026 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3028 ctxt->sax->error(ctxt->userData,
3029 "Space required after '<!NOTATION'\n");
3030 ctxt->wellFormed = 0;
3031 ctxt->disableSAX = 1;
3032 return;
3033 }
3034 SKIP_BLANKS;
3035
Daniel Veillard29631a82001-03-05 09:49:20 +00003036 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003037 if (name == NULL) {
3038 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3040 ctxt->sax->error(ctxt->userData,
3041 "NOTATION: Name expected here\n");
3042 ctxt->wellFormed = 0;
3043 ctxt->disableSAX = 1;
3044 return;
3045 }
3046 if (!IS_BLANK(CUR)) {
3047 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3049 ctxt->sax->error(ctxt->userData,
3050 "Space required after the NOTATION name'\n");
3051 ctxt->wellFormed = 0;
3052 ctxt->disableSAX = 1;
3053 return;
3054 }
3055 SKIP_BLANKS;
3056
3057 /*
3058 * Parse the IDs.
3059 */
3060 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3061 SKIP_BLANKS;
3062
3063 if (RAW == '>') {
3064 if (input != ctxt->input) {
3065 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3067 ctxt->sax->error(ctxt->userData,
3068"Notation declaration doesn't start and stop in the same entity\n");
3069 ctxt->wellFormed = 0;
3070 ctxt->disableSAX = 1;
3071 }
3072 NEXT;
3073 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3074 (ctxt->sax->notationDecl != NULL))
3075 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3076 } else {
3077 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3079 ctxt->sax->error(ctxt->userData,
3080 "'>' required to close NOTATION declaration\n");
3081 ctxt->wellFormed = 0;
3082 ctxt->disableSAX = 1;
3083 }
3084 xmlFree(name);
3085 if (Systemid != NULL) xmlFree(Systemid);
3086 if (Pubid != NULL) xmlFree(Pubid);
3087 }
3088}
3089
3090/**
3091 * xmlParseEntityDecl:
3092 * @ctxt: an XML parser context
3093 *
3094 * parse <!ENTITY declarations
3095 *
3096 * [70] EntityDecl ::= GEDecl | PEDecl
3097 *
3098 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3099 *
3100 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3101 *
3102 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3103 *
3104 * [74] PEDef ::= EntityValue | ExternalID
3105 *
3106 * [76] NDataDecl ::= S 'NDATA' S Name
3107 *
3108 * [ VC: Notation Declared ]
3109 * The Name must match the declared name of a notation.
3110 */
3111
3112void
3113xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3114 xmlChar *name = NULL;
3115 xmlChar *value = NULL;
3116 xmlChar *URI = NULL, *literal = NULL;
3117 xmlChar *ndata = NULL;
3118 int isParameter = 0;
3119 xmlChar *orig = NULL;
3120
3121 GROW;
3122 if ((RAW == '<') && (NXT(1) == '!') &&
3123 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3124 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3125 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3126 xmlParserInputPtr input = ctxt->input;
3127 ctxt->instate = XML_PARSER_ENTITY_DECL;
3128 SHRINK;
3129 SKIP(8);
3130 if (!IS_BLANK(CUR)) {
3131 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3133 ctxt->sax->error(ctxt->userData,
3134 "Space required after '<!ENTITY'\n");
3135 ctxt->wellFormed = 0;
3136 ctxt->disableSAX = 1;
3137 }
3138 SKIP_BLANKS;
3139
3140 if (RAW == '%') {
3141 NEXT;
3142 if (!IS_BLANK(CUR)) {
3143 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3145 ctxt->sax->error(ctxt->userData,
3146 "Space required after '%'\n");
3147 ctxt->wellFormed = 0;
3148 ctxt->disableSAX = 1;
3149 }
3150 SKIP_BLANKS;
3151 isParameter = 1;
3152 }
3153
Daniel Veillard29631a82001-03-05 09:49:20 +00003154 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003155 if (name == NULL) {
3156 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3158 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3159 ctxt->wellFormed = 0;
3160 ctxt->disableSAX = 1;
3161 return;
3162 }
3163 if (!IS_BLANK(CUR)) {
3164 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3166 ctxt->sax->error(ctxt->userData,
3167 "Space required after the entity name\n");
3168 ctxt->wellFormed = 0;
3169 ctxt->disableSAX = 1;
3170 }
3171 SKIP_BLANKS;
3172
3173 /*
3174 * handle the various case of definitions...
3175 */
3176 if (isParameter) {
3177 if ((RAW == '"') || (RAW == '\'')) {
3178 value = xmlParseEntityValue(ctxt, &orig);
3179 if (value) {
3180 if ((ctxt->sax != NULL) &&
3181 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3182 ctxt->sax->entityDecl(ctxt->userData, name,
3183 XML_INTERNAL_PARAMETER_ENTITY,
3184 NULL, NULL, value);
3185 }
3186 } else {
3187 URI = xmlParseExternalID(ctxt, &literal, 1);
3188 if ((URI == NULL) && (literal == NULL)) {
3189 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3191 ctxt->sax->error(ctxt->userData,
3192 "Entity value required\n");
3193 ctxt->wellFormed = 0;
3194 ctxt->disableSAX = 1;
3195 }
3196 if (URI) {
3197 xmlURIPtr uri;
3198
3199 uri = xmlParseURI((const char *) URI);
3200 if (uri == NULL) {
3201 ctxt->errNo = XML_ERR_INVALID_URI;
3202 if ((ctxt->sax != NULL) &&
3203 (!ctxt->disableSAX) &&
3204 (ctxt->sax->error != NULL))
3205 ctxt->sax->error(ctxt->userData,
3206 "Invalid URI: %s\n", URI);
3207 ctxt->wellFormed = 0;
3208 } else {
3209 if (uri->fragment != NULL) {
3210 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3211 if ((ctxt->sax != NULL) &&
3212 (!ctxt->disableSAX) &&
3213 (ctxt->sax->error != NULL))
3214 ctxt->sax->error(ctxt->userData,
3215 "Fragment not allowed: %s\n", URI);
3216 ctxt->wellFormed = 0;
3217 } else {
3218 if ((ctxt->sax != NULL) &&
3219 (!ctxt->disableSAX) &&
3220 (ctxt->sax->entityDecl != NULL))
3221 ctxt->sax->entityDecl(ctxt->userData, name,
3222 XML_EXTERNAL_PARAMETER_ENTITY,
3223 literal, URI, NULL);
3224 }
3225 xmlFreeURI(uri);
3226 }
3227 }
3228 }
3229 } else {
3230 if ((RAW == '"') || (RAW == '\'')) {
3231 value = xmlParseEntityValue(ctxt, &orig);
3232 if ((ctxt->sax != NULL) &&
3233 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3234 ctxt->sax->entityDecl(ctxt->userData, name,
3235 XML_INTERNAL_GENERAL_ENTITY,
3236 NULL, NULL, value);
3237 } else {
3238 URI = xmlParseExternalID(ctxt, &literal, 1);
3239 if ((URI == NULL) && (literal == NULL)) {
3240 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3242 ctxt->sax->error(ctxt->userData,
3243 "Entity value required\n");
3244 ctxt->wellFormed = 0;
3245 ctxt->disableSAX = 1;
3246 }
3247 if (URI) {
3248 xmlURIPtr uri;
3249
3250 uri = xmlParseURI((const char *)URI);
3251 if (uri == NULL) {
3252 ctxt->errNo = XML_ERR_INVALID_URI;
3253 if ((ctxt->sax != NULL) &&
3254 (!ctxt->disableSAX) &&
3255 (ctxt->sax->error != NULL))
3256 ctxt->sax->error(ctxt->userData,
3257 "Invalid URI: %s\n", URI);
3258 ctxt->wellFormed = 0;
3259 } else {
3260 if (uri->fragment != NULL) {
3261 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3262 if ((ctxt->sax != NULL) &&
3263 (!ctxt->disableSAX) &&
3264 (ctxt->sax->error != NULL))
3265 ctxt->sax->error(ctxt->userData,
3266 "Fragment not allowed: %s\n", URI);
3267 ctxt->wellFormed = 0;
3268 }
3269 xmlFreeURI(uri);
3270 }
3271 }
3272 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3273 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3275 ctxt->sax->error(ctxt->userData,
3276 "Space required before 'NDATA'\n");
3277 ctxt->wellFormed = 0;
3278 ctxt->disableSAX = 1;
3279 }
3280 SKIP_BLANKS;
3281 if ((RAW == 'N') && (NXT(1) == 'D') &&
3282 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3283 (NXT(4) == 'A')) {
3284 SKIP(5);
3285 if (!IS_BLANK(CUR)) {
3286 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3287 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3288 ctxt->sax->error(ctxt->userData,
3289 "Space required after 'NDATA'\n");
3290 ctxt->wellFormed = 0;
3291 ctxt->disableSAX = 1;
3292 }
3293 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003294 ndata = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003295 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3296 (ctxt->sax->unparsedEntityDecl != NULL))
3297 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3298 literal, URI, ndata);
3299 } else {
3300 if ((ctxt->sax != NULL) &&
3301 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3302 ctxt->sax->entityDecl(ctxt->userData, name,
3303 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3304 literal, URI, NULL);
3305 }
3306 }
3307 }
3308 SKIP_BLANKS;
3309 if (RAW != '>') {
3310 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3312 ctxt->sax->error(ctxt->userData,
3313 "xmlParseEntityDecl: entity %s not terminated\n", name);
3314 ctxt->wellFormed = 0;
3315 ctxt->disableSAX = 1;
3316 } else {
3317 if (input != ctxt->input) {
3318 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3320 ctxt->sax->error(ctxt->userData,
3321"Entity declaration doesn't start and stop in the same entity\n");
3322 ctxt->wellFormed = 0;
3323 ctxt->disableSAX = 1;
3324 }
3325 NEXT;
3326 }
3327 if (orig != NULL) {
3328 /*
3329 * Ugly mechanism to save the raw entity value.
3330 */
3331 xmlEntityPtr cur = NULL;
3332
3333 if (isParameter) {
3334 if ((ctxt->sax != NULL) &&
3335 (ctxt->sax->getParameterEntity != NULL))
3336 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3337 } else {
3338 if ((ctxt->sax != NULL) &&
3339 (ctxt->sax->getEntity != NULL))
3340 cur = ctxt->sax->getEntity(ctxt->userData, name);
3341 }
3342 if (cur != NULL) {
3343 if (cur->orig != NULL)
3344 xmlFree(orig);
3345 else
3346 cur->orig = orig;
3347 } else
3348 xmlFree(orig);
3349 }
3350 if (name != NULL) xmlFree(name);
3351 if (value != NULL) xmlFree(value);
3352 if (URI != NULL) xmlFree(URI);
3353 if (literal != NULL) xmlFree(literal);
3354 if (ndata != NULL) xmlFree(ndata);
3355 }
3356}
3357
3358/**
3359 * xmlParseDefaultDecl:
3360 * @ctxt: an XML parser context
3361 * @value: Receive a possible fixed default value for the attribute
3362 *
3363 * Parse an attribute default declaration
3364 *
3365 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3366 *
3367 * [ VC: Required Attribute ]
3368 * if the default declaration is the keyword #REQUIRED, then the
3369 * attribute must be specified for all elements of the type in the
3370 * attribute-list declaration.
3371 *
3372 * [ VC: Attribute Default Legal ]
3373 * The declared default value must meet the lexical constraints of
3374 * the declared attribute type c.f. xmlValidateAttributeDecl()
3375 *
3376 * [ VC: Fixed Attribute Default ]
3377 * if an attribute has a default value declared with the #FIXED
3378 * keyword, instances of that attribute must match the default value.
3379 *
3380 * [ WFC: No < in Attribute Values ]
3381 * handled in xmlParseAttValue()
3382 *
3383 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3384 * or XML_ATTRIBUTE_FIXED.
3385 */
3386
3387int
3388xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3389 int val;
3390 xmlChar *ret;
3391
3392 *value = NULL;
3393 if ((RAW == '#') && (NXT(1) == 'R') &&
3394 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3395 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3396 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3397 (NXT(8) == 'D')) {
3398 SKIP(9);
3399 return(XML_ATTRIBUTE_REQUIRED);
3400 }
3401 if ((RAW == '#') && (NXT(1) == 'I') &&
3402 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3403 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3404 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3405 SKIP(8);
3406 return(XML_ATTRIBUTE_IMPLIED);
3407 }
3408 val = XML_ATTRIBUTE_NONE;
3409 if ((RAW == '#') && (NXT(1) == 'F') &&
3410 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3411 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3412 SKIP(6);
3413 val = XML_ATTRIBUTE_FIXED;
3414 if (!IS_BLANK(CUR)) {
3415 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3417 ctxt->sax->error(ctxt->userData,
3418 "Space required after '#FIXED'\n");
3419 ctxt->wellFormed = 0;
3420 ctxt->disableSAX = 1;
3421 }
3422 SKIP_BLANKS;
3423 }
3424 ret = xmlParseAttValue(ctxt);
3425 ctxt->instate = XML_PARSER_DTD;
3426 if (ret == NULL) {
3427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3428 ctxt->sax->error(ctxt->userData,
3429 "Attribute default value declaration error\n");
3430 ctxt->wellFormed = 0;
3431 ctxt->disableSAX = 1;
3432 } else
3433 *value = ret;
3434 return(val);
3435}
3436
3437/**
3438 * xmlParseNotationType:
3439 * @ctxt: an XML parser context
3440 *
3441 * parse an Notation attribute type.
3442 *
3443 * Note: the leading 'NOTATION' S part has already being parsed...
3444 *
3445 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3446 *
3447 * [ VC: Notation Attributes ]
3448 * Values of this type must match one of the notation names included
3449 * in the declaration; all notation names in the declaration must be declared.
3450 *
3451 * Returns: the notation attribute tree built while parsing
3452 */
3453
3454xmlEnumerationPtr
3455xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3456 xmlChar *name;
3457 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3458
3459 if (RAW != '(') {
3460 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3462 ctxt->sax->error(ctxt->userData,
3463 "'(' required to start 'NOTATION'\n");
3464 ctxt->wellFormed = 0;
3465 ctxt->disableSAX = 1;
3466 return(NULL);
3467 }
3468 SHRINK;
3469 do {
3470 NEXT;
3471 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003472 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003473 if (name == NULL) {
3474 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3476 ctxt->sax->error(ctxt->userData,
3477 "Name expected in NOTATION declaration\n");
3478 ctxt->wellFormed = 0;
3479 ctxt->disableSAX = 1;
3480 return(ret);
3481 }
3482 cur = xmlCreateEnumeration(name);
3483 xmlFree(name);
3484 if (cur == NULL) return(ret);
3485 if (last == NULL) ret = last = cur;
3486 else {
3487 last->next = cur;
3488 last = cur;
3489 }
3490 SKIP_BLANKS;
3491 } while (RAW == '|');
3492 if (RAW != ')') {
3493 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3495 ctxt->sax->error(ctxt->userData,
3496 "')' required to finish NOTATION declaration\n");
3497 ctxt->wellFormed = 0;
3498 ctxt->disableSAX = 1;
3499 if ((last != NULL) && (last != ret))
3500 xmlFreeEnumeration(last);
3501 return(ret);
3502 }
3503 NEXT;
3504 return(ret);
3505}
3506
3507/**
3508 * xmlParseEnumerationType:
3509 * @ctxt: an XML parser context
3510 *
3511 * parse an Enumeration attribute type.
3512 *
3513 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3514 *
3515 * [ VC: Enumeration ]
3516 * Values of this type must match one of the Nmtoken tokens in
3517 * the declaration
3518 *
3519 * Returns: the enumeration attribute tree built while parsing
3520 */
3521
3522xmlEnumerationPtr
3523xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3524 xmlChar *name;
3525 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3526
3527 if (RAW != '(') {
3528 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3530 ctxt->sax->error(ctxt->userData,
3531 "'(' required to start ATTLIST enumeration\n");
3532 ctxt->wellFormed = 0;
3533 ctxt->disableSAX = 1;
3534 return(NULL);
3535 }
3536 SHRINK;
3537 do {
3538 NEXT;
3539 SKIP_BLANKS;
3540 name = xmlParseNmtoken(ctxt);
3541 if (name == NULL) {
3542 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3544 ctxt->sax->error(ctxt->userData,
3545 "NmToken expected in ATTLIST enumeration\n");
3546 ctxt->wellFormed = 0;
3547 ctxt->disableSAX = 1;
3548 return(ret);
3549 }
3550 cur = xmlCreateEnumeration(name);
3551 xmlFree(name);
3552 if (cur == NULL) return(ret);
3553 if (last == NULL) ret = last = cur;
3554 else {
3555 last->next = cur;
3556 last = cur;
3557 }
3558 SKIP_BLANKS;
3559 } while (RAW == '|');
3560 if (RAW != ')') {
3561 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3562 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3563 ctxt->sax->error(ctxt->userData,
3564 "')' required to finish ATTLIST enumeration\n");
3565 ctxt->wellFormed = 0;
3566 ctxt->disableSAX = 1;
3567 return(ret);
3568 }
3569 NEXT;
3570 return(ret);
3571}
3572
3573/**
3574 * xmlParseEnumeratedType:
3575 * @ctxt: an XML parser context
3576 * @tree: the enumeration tree built while parsing
3577 *
3578 * parse an Enumerated attribute type.
3579 *
3580 * [57] EnumeratedType ::= NotationType | Enumeration
3581 *
3582 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3583 *
3584 *
3585 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3586 */
3587
3588int
3589xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3590 if ((RAW == 'N') && (NXT(1) == 'O') &&
3591 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3592 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3593 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3594 SKIP(8);
3595 if (!IS_BLANK(CUR)) {
3596 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3598 ctxt->sax->error(ctxt->userData,
3599 "Space required after 'NOTATION'\n");
3600 ctxt->wellFormed = 0;
3601 ctxt->disableSAX = 1;
3602 return(0);
3603 }
3604 SKIP_BLANKS;
3605 *tree = xmlParseNotationType(ctxt);
3606 if (*tree == NULL) return(0);
3607 return(XML_ATTRIBUTE_NOTATION);
3608 }
3609 *tree = xmlParseEnumerationType(ctxt);
3610 if (*tree == NULL) return(0);
3611 return(XML_ATTRIBUTE_ENUMERATION);
3612}
3613
3614/**
3615 * xmlParseAttributeType:
3616 * @ctxt: an XML parser context
3617 * @tree: the enumeration tree built while parsing
3618 *
3619 * parse the Attribute list def for an element
3620 *
3621 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3622 *
3623 * [55] StringType ::= 'CDATA'
3624 *
3625 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3626 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3627 *
3628 * Validity constraints for attribute values syntax are checked in
3629 * xmlValidateAttributeValue()
3630 *
3631 * [ VC: ID ]
3632 * Values of type ID must match the Name production. A name must not
3633 * appear more than once in an XML document as a value of this type;
3634 * i.e., ID values must uniquely identify the elements which bear them.
3635 *
3636 * [ VC: One ID per Element Type ]
3637 * No element type may have more than one ID attribute specified.
3638 *
3639 * [ VC: ID Attribute Default ]
3640 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3641 *
3642 * [ VC: IDREF ]
3643 * Values of type IDREF must match the Name production, and values
3644 * of type IDREFS must match Names; each IDREF Name must match the value
3645 * of an ID attribute on some element in the XML document; i.e. IDREF
3646 * values must match the value of some ID attribute.
3647 *
3648 * [ VC: Entity Name ]
3649 * Values of type ENTITY must match the Name production, values
3650 * of type ENTITIES must match Names; each Entity Name must match the
3651 * name of an unparsed entity declared in the DTD.
3652 *
3653 * [ VC: Name Token ]
3654 * Values of type NMTOKEN must match the Nmtoken production; values
3655 * of type NMTOKENS must match Nmtokens.
3656 *
3657 * Returns the attribute type
3658 */
3659int
3660xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3661 SHRINK;
3662 if ((RAW == 'C') && (NXT(1) == 'D') &&
3663 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3664 (NXT(4) == 'A')) {
3665 SKIP(5);
3666 return(XML_ATTRIBUTE_CDATA);
3667 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3668 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3669 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3670 SKIP(6);
3671 return(XML_ATTRIBUTE_IDREFS);
3672 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3673 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3674 (NXT(4) == 'F')) {
3675 SKIP(5);
3676 return(XML_ATTRIBUTE_IDREF);
3677 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3678 SKIP(2);
3679 return(XML_ATTRIBUTE_ID);
3680 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3681 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3682 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3683 SKIP(6);
3684 return(XML_ATTRIBUTE_ENTITY);
3685 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3686 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3687 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3688 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3689 SKIP(8);
3690 return(XML_ATTRIBUTE_ENTITIES);
3691 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3692 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3693 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3694 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3695 SKIP(8);
3696 return(XML_ATTRIBUTE_NMTOKENS);
3697 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3698 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3699 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3700 (NXT(6) == 'N')) {
3701 SKIP(7);
3702 return(XML_ATTRIBUTE_NMTOKEN);
3703 }
3704 return(xmlParseEnumeratedType(ctxt, tree));
3705}
3706
3707/**
3708 * xmlParseAttributeListDecl:
3709 * @ctxt: an XML parser context
3710 *
3711 * : parse the Attribute list def for an element
3712 *
3713 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3714 *
3715 * [53] AttDef ::= S Name S AttType S DefaultDecl
3716 *
3717 */
3718void
3719xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3720 xmlChar *elemName;
3721 xmlChar *attrName;
3722 xmlEnumerationPtr tree;
3723
3724 if ((RAW == '<') && (NXT(1) == '!') &&
3725 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3726 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3727 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3728 (NXT(8) == 'T')) {
3729 xmlParserInputPtr input = ctxt->input;
3730
3731 SKIP(9);
3732 if (!IS_BLANK(CUR)) {
3733 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3735 ctxt->sax->error(ctxt->userData,
3736 "Space required after '<!ATTLIST'\n");
3737 ctxt->wellFormed = 0;
3738 ctxt->disableSAX = 1;
3739 }
3740 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003741 elemName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003742 if (elemName == NULL) {
3743 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3745 ctxt->sax->error(ctxt->userData,
3746 "ATTLIST: no name for Element\n");
3747 ctxt->wellFormed = 0;
3748 ctxt->disableSAX = 1;
3749 return;
3750 }
3751 SKIP_BLANKS;
3752 GROW;
3753 while (RAW != '>') {
3754 const xmlChar *check = CUR_PTR;
3755 int type;
3756 int def;
3757 xmlChar *defaultValue = NULL;
3758
3759 GROW;
3760 tree = NULL;
Daniel Veillard29631a82001-03-05 09:49:20 +00003761 attrName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003762 if (attrName == NULL) {
3763 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3765 ctxt->sax->error(ctxt->userData,
3766 "ATTLIST: no name for Attribute\n");
3767 ctxt->wellFormed = 0;
3768 ctxt->disableSAX = 1;
3769 break;
3770 }
3771 GROW;
3772 if (!IS_BLANK(CUR)) {
3773 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3775 ctxt->sax->error(ctxt->userData,
3776 "Space required after the attribute name\n");
3777 ctxt->wellFormed = 0;
3778 ctxt->disableSAX = 1;
3779 if (attrName != NULL)
3780 xmlFree(attrName);
3781 if (defaultValue != NULL)
3782 xmlFree(defaultValue);
3783 break;
3784 }
3785 SKIP_BLANKS;
3786
3787 type = xmlParseAttributeType(ctxt, &tree);
3788 if (type <= 0) {
3789 if (attrName != NULL)
3790 xmlFree(attrName);
3791 if (defaultValue != NULL)
3792 xmlFree(defaultValue);
3793 break;
3794 }
3795
3796 GROW;
3797 if (!IS_BLANK(CUR)) {
3798 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3800 ctxt->sax->error(ctxt->userData,
3801 "Space required after the attribute type\n");
3802 ctxt->wellFormed = 0;
3803 ctxt->disableSAX = 1;
3804 if (attrName != NULL)
3805 xmlFree(attrName);
3806 if (defaultValue != NULL)
3807 xmlFree(defaultValue);
3808 if (tree != NULL)
3809 xmlFreeEnumeration(tree);
3810 break;
3811 }
3812 SKIP_BLANKS;
3813
3814 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3815 if (def <= 0) {
3816 if (attrName != NULL)
3817 xmlFree(attrName);
3818 if (defaultValue != NULL)
3819 xmlFree(defaultValue);
3820 if (tree != NULL)
3821 xmlFreeEnumeration(tree);
3822 break;
3823 }
3824
3825 GROW;
3826 if (RAW != '>') {
3827 if (!IS_BLANK(CUR)) {
3828 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3829 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3830 ctxt->sax->error(ctxt->userData,
3831 "Space required after the attribute default value\n");
3832 ctxt->wellFormed = 0;
3833 ctxt->disableSAX = 1;
3834 if (attrName != NULL)
3835 xmlFree(attrName);
3836 if (defaultValue != NULL)
3837 xmlFree(defaultValue);
3838 if (tree != NULL)
3839 xmlFreeEnumeration(tree);
3840 break;
3841 }
3842 SKIP_BLANKS;
3843 }
3844 if (check == CUR_PTR) {
3845 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3847 ctxt->sax->error(ctxt->userData,
3848 "xmlParseAttributeListDecl: detected internal error\n");
3849 if (attrName != NULL)
3850 xmlFree(attrName);
3851 if (defaultValue != NULL)
3852 xmlFree(defaultValue);
3853 if (tree != NULL)
3854 xmlFreeEnumeration(tree);
3855 break;
3856 }
3857 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3858 (ctxt->sax->attributeDecl != NULL))
3859 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3860 type, def, defaultValue, tree);
3861 if (attrName != NULL)
3862 xmlFree(attrName);
3863 if (defaultValue != NULL)
3864 xmlFree(defaultValue);
3865 GROW;
3866 }
3867 if (RAW == '>') {
3868 if (input != ctxt->input) {
3869 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3871 ctxt->sax->error(ctxt->userData,
3872"Attribute list declaration doesn't start and stop in the same entity\n");
3873 ctxt->wellFormed = 0;
3874 ctxt->disableSAX = 1;
3875 }
3876 NEXT;
3877 }
3878
3879 xmlFree(elemName);
3880 }
3881}
3882
3883/**
3884 * xmlParseElementMixedContentDecl:
3885 * @ctxt: an XML parser context
3886 *
3887 * parse the declaration for a Mixed Element content
3888 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3889 *
3890 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3891 * '(' S? '#PCDATA' S? ')'
3892 *
3893 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3894 *
3895 * [ VC: No Duplicate Types ]
3896 * The same name must not appear more than once in a single
3897 * mixed-content declaration.
3898 *
3899 * returns: the list of the xmlElementContentPtr describing the element choices
3900 */
3901xmlElementContentPtr
3902xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3903 xmlElementContentPtr ret = NULL, cur = NULL, n;
3904 xmlChar *elem = NULL;
3905
3906 GROW;
3907 if ((RAW == '#') && (NXT(1) == 'P') &&
3908 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3909 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3910 (NXT(6) == 'A')) {
3911 SKIP(7);
3912 SKIP_BLANKS;
3913 SHRINK;
3914 if (RAW == ')') {
3915 ctxt->entity = ctxt->input;
3916 NEXT;
3917 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3918 if (RAW == '*') {
3919 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3920 NEXT;
3921 }
3922 return(ret);
3923 }
3924 if ((RAW == '(') || (RAW == '|')) {
3925 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3926 if (ret == NULL) return(NULL);
3927 }
3928 while (RAW == '|') {
3929 NEXT;
3930 if (elem == NULL) {
3931 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3932 if (ret == NULL) return(NULL);
3933 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003934 if (cur != NULL)
3935 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003936 cur = ret;
3937 } else {
3938 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3939 if (n == NULL) return(NULL);
3940 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003941 if (n->c1 != NULL)
3942 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00003943 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003944 if (n != NULL)
3945 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003946 cur = n;
3947 xmlFree(elem);
3948 }
3949 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003950 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003951 if (elem == NULL) {
3952 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3954 ctxt->sax->error(ctxt->userData,
3955 "xmlParseElementMixedContentDecl : Name expected\n");
3956 ctxt->wellFormed = 0;
3957 ctxt->disableSAX = 1;
3958 xmlFreeElementContent(cur);
3959 return(NULL);
3960 }
3961 SKIP_BLANKS;
3962 GROW;
3963 }
3964 if ((RAW == ')') && (NXT(1) == '*')) {
3965 if (elem != NULL) {
3966 cur->c2 = xmlNewElementContent(elem,
3967 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003968 if (cur->c2 != NULL)
3969 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003970 xmlFree(elem);
3971 }
3972 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3973 ctxt->entity = ctxt->input;
3974 SKIP(2);
3975 } else {
3976 if (elem != NULL) xmlFree(elem);
3977 xmlFreeElementContent(ret);
3978 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
3979 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3980 ctxt->sax->error(ctxt->userData,
3981 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
3982 ctxt->wellFormed = 0;
3983 ctxt->disableSAX = 1;
3984 return(NULL);
3985 }
3986
3987 } else {
3988 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
3989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3990 ctxt->sax->error(ctxt->userData,
3991 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3992 ctxt->wellFormed = 0;
3993 ctxt->disableSAX = 1;
3994 }
3995 return(ret);
3996}
3997
3998/**
3999 * xmlParseElementChildrenContentDecl:
4000 * @ctxt: an XML parser context
4001 *
4002 * parse the declaration for a Mixed Element content
4003 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4004 *
4005 *
4006 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4007 *
4008 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4009 *
4010 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4011 *
4012 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4013 *
4014 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4015 * TODO Parameter-entity replacement text must be properly nested
4016 * with parenthetized groups. That is to say, if either of the
4017 * opening or closing parentheses in a choice, seq, or Mixed
4018 * construct is contained in the replacement text for a parameter
4019 * entity, both must be contained in the same replacement text. For
4020 * interoperability, if a parameter-entity reference appears in a
4021 * choice, seq, or Mixed construct, its replacement text should not
4022 * be empty, and neither the first nor last non-blank character of
4023 * the replacement text should be a connector (| or ,).
4024 *
4025 * returns: the tree of xmlElementContentPtr describing the element
4026 * hierarchy.
4027 */
4028xmlElementContentPtr
4029#ifdef VMS
4030xmlParseElementChildrenContentD
4031#else
4032xmlParseElementChildrenContentDecl
4033#endif
4034(xmlParserCtxtPtr ctxt) {
4035 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4036 xmlChar *elem;
4037 xmlChar type = 0;
4038
4039 SKIP_BLANKS;
4040 GROW;
4041 if (RAW == '(') {
4042 /* Recurse on first child */
4043 NEXT;
4044 SKIP_BLANKS;
4045 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4046 SKIP_BLANKS;
4047 GROW;
4048 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004049 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004050 if (elem == NULL) {
4051 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4053 ctxt->sax->error(ctxt->userData,
4054 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4055 ctxt->wellFormed = 0;
4056 ctxt->disableSAX = 1;
4057 return(NULL);
4058 }
4059 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4060 GROW;
4061 if (RAW == '?') {
4062 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4063 NEXT;
4064 } else if (RAW == '*') {
4065 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4066 NEXT;
4067 } else if (RAW == '+') {
4068 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4069 NEXT;
4070 } else {
4071 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4072 }
4073 xmlFree(elem);
4074 GROW;
4075 }
4076 SKIP_BLANKS;
4077 SHRINK;
4078 while (RAW != ')') {
4079 /*
4080 * Each loop we parse one separator and one element.
4081 */
4082 if (RAW == ',') {
4083 if (type == 0) type = CUR;
4084
4085 /*
4086 * Detect "Name | Name , Name" error
4087 */
4088 else if (type != CUR) {
4089 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4091 ctxt->sax->error(ctxt->userData,
4092 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4093 type);
4094 ctxt->wellFormed = 0;
4095 ctxt->disableSAX = 1;
4096 if ((op != NULL) && (op != ret))
4097 xmlFreeElementContent(op);
4098 if ((last != NULL) && (last != ret) &&
4099 (last != ret->c1) && (last != ret->c2))
4100 xmlFreeElementContent(last);
4101 if (ret != NULL)
4102 xmlFreeElementContent(ret);
4103 return(NULL);
4104 }
4105 NEXT;
4106
4107 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4108 if (op == NULL) {
4109 xmlFreeElementContent(ret);
4110 return(NULL);
4111 }
4112 if (last == NULL) {
4113 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004114 if (ret != NULL)
4115 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004116 ret = cur = op;
4117 } else {
4118 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004119 if (op != NULL)
4120 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004121 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004122 if (last != NULL)
4123 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004124 cur =op;
4125 last = NULL;
4126 }
4127 } else if (RAW == '|') {
4128 if (type == 0) type = CUR;
4129
4130 /*
4131 * Detect "Name , Name | Name" error
4132 */
4133 else if (type != CUR) {
4134 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4136 ctxt->sax->error(ctxt->userData,
4137 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4138 type);
4139 ctxt->wellFormed = 0;
4140 ctxt->disableSAX = 1;
4141 if ((op != NULL) && (op != ret) && (op != last))
4142 xmlFreeElementContent(op);
4143 if ((last != NULL) && (last != ret) &&
4144 (last != ret->c1) && (last != ret->c2))
4145 xmlFreeElementContent(last);
4146 if (ret != NULL)
4147 xmlFreeElementContent(ret);
4148 return(NULL);
4149 }
4150 NEXT;
4151
4152 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4153 if (op == NULL) {
4154 if ((op != NULL) && (op != ret))
4155 xmlFreeElementContent(op);
4156 if ((last != NULL) && (last != ret) &&
4157 (last != ret->c1) && (last != ret->c2))
4158 xmlFreeElementContent(last);
4159 if (ret != NULL)
4160 xmlFreeElementContent(ret);
4161 return(NULL);
4162 }
4163 if (last == NULL) {
4164 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004165 if (ret != NULL)
4166 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004167 ret = cur = op;
4168 } else {
4169 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004170 if (op != NULL)
4171 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004172 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004173 if (last != NULL)
4174 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004175 cur =op;
4176 last = NULL;
4177 }
4178 } else {
4179 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4181 ctxt->sax->error(ctxt->userData,
4182 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4183 ctxt->wellFormed = 0;
4184 ctxt->disableSAX = 1;
4185 if ((op != NULL) && (op != ret))
4186 xmlFreeElementContent(op);
4187 if ((last != NULL) && (last != ret) &&
4188 (last != ret->c1) && (last != ret->c2))
4189 xmlFreeElementContent(last);
4190 if (ret != NULL)
4191 xmlFreeElementContent(ret);
4192 return(NULL);
4193 }
4194 GROW;
4195 SKIP_BLANKS;
4196 GROW;
4197 if (RAW == '(') {
4198 /* Recurse on second child */
4199 NEXT;
4200 SKIP_BLANKS;
4201 last = xmlParseElementChildrenContentDecl(ctxt);
4202 SKIP_BLANKS;
4203 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004204 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004205 if (elem == NULL) {
4206 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4208 ctxt->sax->error(ctxt->userData,
4209 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4210 ctxt->wellFormed = 0;
4211 ctxt->disableSAX = 1;
4212 if ((op != NULL) && (op != ret))
4213 xmlFreeElementContent(op);
4214 if ((last != NULL) && (last != ret) &&
4215 (last != ret->c1) && (last != ret->c2))
4216 xmlFreeElementContent(last);
4217 if (ret != NULL)
4218 xmlFreeElementContent(ret);
4219 return(NULL);
4220 }
4221 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4222 xmlFree(elem);
4223 if (RAW == '?') {
4224 last->ocur = XML_ELEMENT_CONTENT_OPT;
4225 NEXT;
4226 } else if (RAW == '*') {
4227 last->ocur = XML_ELEMENT_CONTENT_MULT;
4228 NEXT;
4229 } else if (RAW == '+') {
4230 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4231 NEXT;
4232 } else {
4233 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4234 }
4235 }
4236 SKIP_BLANKS;
4237 GROW;
4238 }
4239 if ((cur != NULL) && (last != NULL)) {
4240 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004241 if (last != NULL)
4242 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004243 }
4244 ctxt->entity = ctxt->input;
4245 NEXT;
4246 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004247 if (ret != NULL)
4248 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004249 NEXT;
4250 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004251 if (ret != NULL)
4252 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004253 NEXT;
4254 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004255 if (ret != NULL)
4256 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004257 NEXT;
4258 }
4259 return(ret);
4260}
4261
4262/**
4263 * xmlParseElementContentDecl:
4264 * @ctxt: an XML parser context
4265 * @name: the name of the element being defined.
4266 * @result: the Element Content pointer will be stored here if any
4267 *
4268 * parse the declaration for an Element content either Mixed or Children,
4269 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4270 *
4271 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4272 *
4273 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4274 */
4275
4276int
4277xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4278 xmlElementContentPtr *result) {
4279
4280 xmlElementContentPtr tree = NULL;
4281 xmlParserInputPtr input = ctxt->input;
4282 int res;
4283
4284 *result = NULL;
4285
4286 if (RAW != '(') {
4287 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4288 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4289 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004290 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004291 ctxt->wellFormed = 0;
4292 ctxt->disableSAX = 1;
4293 return(-1);
4294 }
4295 NEXT;
4296 GROW;
4297 SKIP_BLANKS;
4298 if ((RAW == '#') && (NXT(1) == 'P') &&
4299 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4300 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4301 (NXT(6) == 'A')) {
4302 tree = xmlParseElementMixedContentDecl(ctxt);
4303 res = XML_ELEMENT_TYPE_MIXED;
4304 } else {
4305 tree = xmlParseElementChildrenContentDecl(ctxt);
4306 res = XML_ELEMENT_TYPE_ELEMENT;
4307 }
4308 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4309 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4311 ctxt->sax->error(ctxt->userData,
4312"Element content declaration doesn't start and stop in the same entity\n");
4313 ctxt->wellFormed = 0;
4314 ctxt->disableSAX = 1;
4315 }
4316 SKIP_BLANKS;
4317 *result = tree;
4318 return(res);
4319}
4320
4321/**
4322 * xmlParseElementDecl:
4323 * @ctxt: an XML parser context
4324 *
4325 * parse an Element declaration.
4326 *
4327 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4328 *
4329 * [ VC: Unique Element Type Declaration ]
4330 * No element type may be declared more than once
4331 *
4332 * Returns the type of the element, or -1 in case of error
4333 */
4334int
4335xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4336 xmlChar *name;
4337 int ret = -1;
4338 xmlElementContentPtr content = NULL;
4339
4340 GROW;
4341 if ((RAW == '<') && (NXT(1) == '!') &&
4342 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4343 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4344 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4345 (NXT(8) == 'T')) {
4346 xmlParserInputPtr input = ctxt->input;
4347
4348 SKIP(9);
4349 if (!IS_BLANK(CUR)) {
4350 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4352 ctxt->sax->error(ctxt->userData,
4353 "Space required after 'ELEMENT'\n");
4354 ctxt->wellFormed = 0;
4355 ctxt->disableSAX = 1;
4356 }
4357 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00004358 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004359 if (name == NULL) {
4360 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4362 ctxt->sax->error(ctxt->userData,
4363 "xmlParseElementDecl: no name for Element\n");
4364 ctxt->wellFormed = 0;
4365 ctxt->disableSAX = 1;
4366 return(-1);
4367 }
4368 while ((RAW == 0) && (ctxt->inputNr > 1))
4369 xmlPopInput(ctxt);
4370 if (!IS_BLANK(CUR)) {
4371 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4373 ctxt->sax->error(ctxt->userData,
4374 "Space required after the element name\n");
4375 ctxt->wellFormed = 0;
4376 ctxt->disableSAX = 1;
4377 }
4378 SKIP_BLANKS;
4379 if ((RAW == 'E') && (NXT(1) == 'M') &&
4380 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4381 (NXT(4) == 'Y')) {
4382 SKIP(5);
4383 /*
4384 * Element must always be empty.
4385 */
4386 ret = XML_ELEMENT_TYPE_EMPTY;
4387 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4388 (NXT(2) == 'Y')) {
4389 SKIP(3);
4390 /*
4391 * Element is a generic container.
4392 */
4393 ret = XML_ELEMENT_TYPE_ANY;
4394 } else if (RAW == '(') {
4395 ret = xmlParseElementContentDecl(ctxt, name, &content);
4396 } else {
4397 /*
4398 * [ WFC: PEs in Internal Subset ] error handling.
4399 */
4400 if ((RAW == '%') && (ctxt->external == 0) &&
4401 (ctxt->inputNr == 1)) {
4402 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4404 ctxt->sax->error(ctxt->userData,
4405 "PEReference: forbidden within markup decl in internal subset\n");
4406 } else {
4407 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4409 ctxt->sax->error(ctxt->userData,
4410 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4411 }
4412 ctxt->wellFormed = 0;
4413 ctxt->disableSAX = 1;
4414 if (name != NULL) xmlFree(name);
4415 return(-1);
4416 }
4417
4418 SKIP_BLANKS;
4419 /*
4420 * Pop-up of finished entities.
4421 */
4422 while ((RAW == 0) && (ctxt->inputNr > 1))
4423 xmlPopInput(ctxt);
4424 SKIP_BLANKS;
4425
4426 if (RAW != '>') {
4427 ctxt->errNo = XML_ERR_GT_REQUIRED;
4428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4429 ctxt->sax->error(ctxt->userData,
4430 "xmlParseElementDecl: expected '>' at the end\n");
4431 ctxt->wellFormed = 0;
4432 ctxt->disableSAX = 1;
4433 } else {
4434 if (input != ctxt->input) {
4435 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4437 ctxt->sax->error(ctxt->userData,
4438"Element declaration doesn't start and stop in the same entity\n");
4439 ctxt->wellFormed = 0;
4440 ctxt->disableSAX = 1;
4441 }
4442
4443 NEXT;
4444 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4445 (ctxt->sax->elementDecl != NULL))
4446 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4447 content);
4448 }
4449 if (content != NULL) {
4450 xmlFreeElementContent(content);
4451 }
4452 if (name != NULL) {
4453 xmlFree(name);
4454 }
4455 }
4456 return(ret);
4457}
4458
4459/**
4460 * xmlParseMarkupDecl:
4461 * @ctxt: an XML parser context
4462 *
4463 * parse Markup declarations
4464 *
4465 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4466 * NotationDecl | PI | Comment
4467 *
4468 * [ VC: Proper Declaration/PE Nesting ]
4469 * Parameter-entity replacement text must be properly nested with
4470 * markup declarations. That is to say, if either the first character
4471 * or the last character of a markup declaration (markupdecl above) is
4472 * contained in the replacement text for a parameter-entity reference,
4473 * both must be contained in the same replacement text.
4474 *
4475 * [ WFC: PEs in Internal Subset ]
4476 * In the internal DTD subset, parameter-entity references can occur
4477 * only where markup declarations can occur, not within markup declarations.
4478 * (This does not apply to references that occur in external parameter
4479 * entities or to the external subset.)
4480 */
4481void
4482xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4483 GROW;
4484 xmlParseElementDecl(ctxt);
4485 xmlParseAttributeListDecl(ctxt);
4486 xmlParseEntityDecl(ctxt);
4487 xmlParseNotationDecl(ctxt);
4488 xmlParsePI(ctxt);
4489 xmlParseComment(ctxt);
4490 /*
4491 * This is only for internal subset. On external entities,
4492 * the replacement is done before parsing stage
4493 */
4494 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4495 xmlParsePEReference(ctxt);
4496 ctxt->instate = XML_PARSER_DTD;
4497}
4498
4499/**
4500 * xmlParseTextDecl:
4501 * @ctxt: an XML parser context
4502 *
4503 * parse an XML declaration header for external entities
4504 *
4505 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4506 *
4507 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4508 */
4509
4510void
4511xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4512 xmlChar *version;
4513
4514 /*
4515 * We know that '<?xml' is here.
4516 */
4517 if ((RAW == '<') && (NXT(1) == '?') &&
4518 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4519 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4520 SKIP(5);
4521 } else {
4522 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4524 ctxt->sax->error(ctxt->userData,
4525 "Text declaration '<?xml' required\n");
4526 ctxt->wellFormed = 0;
4527 ctxt->disableSAX = 1;
4528
4529 return;
4530 }
4531
4532 if (!IS_BLANK(CUR)) {
4533 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4535 ctxt->sax->error(ctxt->userData,
4536 "Space needed after '<?xml'\n");
4537 ctxt->wellFormed = 0;
4538 ctxt->disableSAX = 1;
4539 }
4540 SKIP_BLANKS;
4541
4542 /*
4543 * We may have the VersionInfo here.
4544 */
4545 version = xmlParseVersionInfo(ctxt);
4546 if (version == NULL)
4547 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4548 ctxt->input->version = version;
4549
4550 /*
4551 * We must have the encoding declaration
4552 */
4553 if (!IS_BLANK(CUR)) {
4554 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4556 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4557 ctxt->wellFormed = 0;
4558 ctxt->disableSAX = 1;
4559 }
4560 xmlParseEncodingDecl(ctxt);
4561 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4562 /*
4563 * The XML REC instructs us to stop parsing right here
4564 */
4565 return;
4566 }
4567
4568 SKIP_BLANKS;
4569 if ((RAW == '?') && (NXT(1) == '>')) {
4570 SKIP(2);
4571 } else if (RAW == '>') {
4572 /* Deprecated old WD ... */
4573 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4575 ctxt->sax->error(ctxt->userData,
4576 "XML declaration must end-up with '?>'\n");
4577 ctxt->wellFormed = 0;
4578 ctxt->disableSAX = 1;
4579 NEXT;
4580 } else {
4581 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4582 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4583 ctxt->sax->error(ctxt->userData,
4584 "parsing XML declaration: '?>' expected\n");
4585 ctxt->wellFormed = 0;
4586 ctxt->disableSAX = 1;
4587 MOVETO_ENDTAG(CUR_PTR);
4588 NEXT;
4589 }
4590}
4591
4592/*
4593 * xmlParseConditionalSections
4594 * @ctxt: an XML parser context
4595 *
4596 * [61] conditionalSect ::= includeSect | ignoreSect
4597 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4598 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4599 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4600 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4601 */
4602
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004603static void
Owen Taylor3473f882001-02-23 17:55:21 +00004604xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4605 SKIP(3);
4606 SKIP_BLANKS;
4607 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4608 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4609 (NXT(6) == 'E')) {
4610 SKIP(7);
4611 SKIP_BLANKS;
4612 if (RAW != '[') {
4613 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4615 ctxt->sax->error(ctxt->userData,
4616 "XML conditional section '[' expected\n");
4617 ctxt->wellFormed = 0;
4618 ctxt->disableSAX = 1;
4619 } else {
4620 NEXT;
4621 }
4622 if (xmlParserDebugEntities) {
4623 if ((ctxt->input != NULL) && (ctxt->input->filename))
4624 xmlGenericError(xmlGenericErrorContext,
4625 "%s(%d): ", ctxt->input->filename,
4626 ctxt->input->line);
4627 xmlGenericError(xmlGenericErrorContext,
4628 "Entering INCLUDE Conditional Section\n");
4629 }
4630
4631 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4632 (NXT(2) != '>'))) {
4633 const xmlChar *check = CUR_PTR;
4634 int cons = ctxt->input->consumed;
4635 int tok = ctxt->token;
4636
4637 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4638 xmlParseConditionalSections(ctxt);
4639 } else if (IS_BLANK(CUR)) {
4640 NEXT;
4641 } else if (RAW == '%') {
4642 xmlParsePEReference(ctxt);
4643 } else
4644 xmlParseMarkupDecl(ctxt);
4645
4646 /*
4647 * Pop-up of finished entities.
4648 */
4649 while ((RAW == 0) && (ctxt->inputNr > 1))
4650 xmlPopInput(ctxt);
4651
4652 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4653 (tok == ctxt->token)) {
4654 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4656 ctxt->sax->error(ctxt->userData,
4657 "Content error in the external subset\n");
4658 ctxt->wellFormed = 0;
4659 ctxt->disableSAX = 1;
4660 break;
4661 }
4662 }
4663 if (xmlParserDebugEntities) {
4664 if ((ctxt->input != NULL) && (ctxt->input->filename))
4665 xmlGenericError(xmlGenericErrorContext,
4666 "%s(%d): ", ctxt->input->filename,
4667 ctxt->input->line);
4668 xmlGenericError(xmlGenericErrorContext,
4669 "Leaving INCLUDE Conditional Section\n");
4670 }
4671
4672 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4673 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4674 int state;
4675 int instate;
4676 int depth = 0;
4677
4678 SKIP(6);
4679 SKIP_BLANKS;
4680 if (RAW != '[') {
4681 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4683 ctxt->sax->error(ctxt->userData,
4684 "XML conditional section '[' expected\n");
4685 ctxt->wellFormed = 0;
4686 ctxt->disableSAX = 1;
4687 } else {
4688 NEXT;
4689 }
4690 if (xmlParserDebugEntities) {
4691 if ((ctxt->input != NULL) && (ctxt->input->filename))
4692 xmlGenericError(xmlGenericErrorContext,
4693 "%s(%d): ", ctxt->input->filename,
4694 ctxt->input->line);
4695 xmlGenericError(xmlGenericErrorContext,
4696 "Entering IGNORE Conditional Section\n");
4697 }
4698
4699 /*
4700 * Parse up to the end of the conditionnal section
4701 * But disable SAX event generating DTD building in the meantime
4702 */
4703 state = ctxt->disableSAX;
4704 instate = ctxt->instate;
4705 ctxt->disableSAX = 1;
4706 ctxt->instate = XML_PARSER_IGNORE;
4707
4708 while (depth >= 0) {
4709 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4710 depth++;
4711 SKIP(3);
4712 continue;
4713 }
4714 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4715 if (--depth >= 0) SKIP(3);
4716 continue;
4717 }
4718 NEXT;
4719 continue;
4720 }
4721
4722 ctxt->disableSAX = state;
4723 ctxt->instate = instate;
4724
4725 if (xmlParserDebugEntities) {
4726 if ((ctxt->input != NULL) && (ctxt->input->filename))
4727 xmlGenericError(xmlGenericErrorContext,
4728 "%s(%d): ", ctxt->input->filename,
4729 ctxt->input->line);
4730 xmlGenericError(xmlGenericErrorContext,
4731 "Leaving IGNORE Conditional Section\n");
4732 }
4733
4734 } else {
4735 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4737 ctxt->sax->error(ctxt->userData,
4738 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4739 ctxt->wellFormed = 0;
4740 ctxt->disableSAX = 1;
4741 }
4742
4743 if (RAW == 0)
4744 SHRINK;
4745
4746 if (RAW == 0) {
4747 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4748 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4749 ctxt->sax->error(ctxt->userData,
4750 "XML conditional section not closed\n");
4751 ctxt->wellFormed = 0;
4752 ctxt->disableSAX = 1;
4753 } else {
4754 SKIP(3);
4755 }
4756}
4757
4758/**
4759 * xmlParseExternalSubset:
4760 * @ctxt: an XML parser context
4761 * @ExternalID: the external identifier
4762 * @SystemID: the system identifier (or URL)
4763 *
4764 * parse Markup declarations from an external subset
4765 *
4766 * [30] extSubset ::= textDecl? extSubsetDecl
4767 *
4768 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4769 */
4770void
4771xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4772 const xmlChar *SystemID) {
4773 GROW;
4774 if ((RAW == '<') && (NXT(1) == '?') &&
4775 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4776 (NXT(4) == 'l')) {
4777 xmlParseTextDecl(ctxt);
4778 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4779 /*
4780 * The XML REC instructs us to stop parsing right here
4781 */
4782 ctxt->instate = XML_PARSER_EOF;
4783 return;
4784 }
4785 }
4786 if (ctxt->myDoc == NULL) {
4787 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4788 }
4789 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4790 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4791
4792 ctxt->instate = XML_PARSER_DTD;
4793 ctxt->external = 1;
4794 while (((RAW == '<') && (NXT(1) == '?')) ||
4795 ((RAW == '<') && (NXT(1) == '!')) ||
4796 IS_BLANK(CUR)) {
4797 const xmlChar *check = CUR_PTR;
4798 int cons = ctxt->input->consumed;
4799 int tok = ctxt->token;
4800
4801 GROW;
4802 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4803 xmlParseConditionalSections(ctxt);
4804 } else if (IS_BLANK(CUR)) {
4805 NEXT;
4806 } else if (RAW == '%') {
4807 xmlParsePEReference(ctxt);
4808 } else
4809 xmlParseMarkupDecl(ctxt);
4810
4811 /*
4812 * Pop-up of finished entities.
4813 */
4814 while ((RAW == 0) && (ctxt->inputNr > 1))
4815 xmlPopInput(ctxt);
4816
4817 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4818 (tok == ctxt->token)) {
4819 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4821 ctxt->sax->error(ctxt->userData,
4822 "Content error in the external subset\n");
4823 ctxt->wellFormed = 0;
4824 ctxt->disableSAX = 1;
4825 break;
4826 }
4827 }
4828
4829 if (RAW != 0) {
4830 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4832 ctxt->sax->error(ctxt->userData,
4833 "Extra content at the end of the document\n");
4834 ctxt->wellFormed = 0;
4835 ctxt->disableSAX = 1;
4836 }
4837
4838}
4839
4840/**
4841 * xmlParseReference:
4842 * @ctxt: an XML parser context
4843 *
4844 * parse and handle entity references in content, depending on the SAX
4845 * interface, this may end-up in a call to character() if this is a
4846 * CharRef, a predefined entity, if there is no reference() callback.
4847 * or if the parser was asked to switch to that mode.
4848 *
4849 * [67] Reference ::= EntityRef | CharRef
4850 */
4851void
4852xmlParseReference(xmlParserCtxtPtr ctxt) {
4853 xmlEntityPtr ent;
4854 xmlChar *val;
4855 if (RAW != '&') return;
4856
4857 if (NXT(1) == '#') {
4858 int i = 0;
4859 xmlChar out[10];
4860 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004861 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004862
4863 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4864 /*
4865 * So we are using non-UTF-8 buffers
4866 * Check that the char fit on 8bits, if not
4867 * generate a CharRef.
4868 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004869 if (value <= 0xFF) {
4870 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004871 out[1] = 0;
4872 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4873 (!ctxt->disableSAX))
4874 ctxt->sax->characters(ctxt->userData, out, 1);
4875 } else {
4876 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004877 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004878 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004879 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004880 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4881 (!ctxt->disableSAX))
4882 ctxt->sax->reference(ctxt->userData, out);
4883 }
4884 } else {
4885 /*
4886 * Just encode the value in UTF-8
4887 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004888 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004889 out[i] = 0;
4890 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4891 (!ctxt->disableSAX))
4892 ctxt->sax->characters(ctxt->userData, out, i);
4893 }
4894 } else {
4895 ent = xmlParseEntityRef(ctxt);
4896 if (ent == NULL) return;
4897 if ((ent->name != NULL) &&
4898 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4899 xmlNodePtr list = NULL;
4900 int ret;
4901
4902
4903 /*
4904 * The first reference to the entity trigger a parsing phase
4905 * where the ent->children is filled with the result from
4906 * the parsing.
4907 */
4908 if (ent->children == NULL) {
4909 xmlChar *value;
4910 value = ent->content;
4911
4912 /*
4913 * Check that this entity is well formed
4914 */
4915 if ((value != NULL) &&
4916 (value[1] == 0) && (value[0] == '<') &&
4917 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4918 /*
4919 * DONE: get definite answer on this !!!
4920 * Lots of entity decls are used to declare a single
4921 * char
4922 * <!ENTITY lt "<">
4923 * Which seems to be valid since
4924 * 2.4: The ampersand character (&) and the left angle
4925 * bracket (<) may appear in their literal form only
4926 * when used ... They are also legal within the literal
4927 * entity value of an internal entity declaration;i
4928 * see "4.3.2 Well-Formed Parsed Entities".
4929 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4930 * Looking at the OASIS test suite and James Clark
4931 * tests, this is broken. However the XML REC uses
4932 * it. Is the XML REC not well-formed ????
4933 * This is a hack to avoid this problem
4934 *
4935 * ANSWER: since lt gt amp .. are already defined,
4936 * this is a redefinition and hence the fact that the
4937 * contentis not well balanced is not a Wf error, this
4938 * is lousy but acceptable.
4939 */
4940 list = xmlNewDocText(ctxt->myDoc, value);
4941 if (list != NULL) {
4942 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4943 (ent->children == NULL)) {
4944 ent->children = list;
4945 ent->last = list;
4946 list->parent = (xmlNodePtr) ent;
4947 } else {
4948 xmlFreeNodeList(list);
4949 }
4950 } else if (list != NULL) {
4951 xmlFreeNodeList(list);
4952 }
4953 } else {
4954 /*
4955 * 4.3.2: An internal general parsed entity is well-formed
4956 * if its replacement text matches the production labeled
4957 * content.
4958 */
4959 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4960 ctxt->depth++;
4961 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4962 ctxt->sax, NULL, ctxt->depth,
4963 value, &list);
4964 ctxt->depth--;
4965 } else if (ent->etype ==
4966 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4967 ctxt->depth++;
4968 ret = xmlParseExternalEntity(ctxt->myDoc,
4969 ctxt->sax, NULL, ctxt->depth,
4970 ent->URI, ent->ExternalID, &list);
4971 ctxt->depth--;
4972 } else {
4973 ret = -1;
4974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4975 ctxt->sax->error(ctxt->userData,
4976 "Internal: invalid entity type\n");
4977 }
4978 if (ret == XML_ERR_ENTITY_LOOP) {
4979 ctxt->errNo = XML_ERR_ENTITY_LOOP;
4980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4981 ctxt->sax->error(ctxt->userData,
4982 "Detected entity reference loop\n");
4983 ctxt->wellFormed = 0;
4984 ctxt->disableSAX = 1;
4985 } else if ((ret == 0) && (list != NULL)) {
4986 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4987 (ent->children == NULL)) {
4988 ent->children = list;
4989 while (list != NULL) {
4990 list->parent = (xmlNodePtr) ent;
4991 if (list->next == NULL)
4992 ent->last = list;
4993 list = list->next;
4994 }
4995 } else {
4996 xmlFreeNodeList(list);
4997 }
4998 } else if (ret > 0) {
4999 ctxt->errNo = ret;
5000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5001 ctxt->sax->error(ctxt->userData,
5002 "Entity value required\n");
5003 ctxt->wellFormed = 0;
5004 ctxt->disableSAX = 1;
5005 } else if (list != NULL) {
5006 xmlFreeNodeList(list);
5007 }
5008 }
5009 }
5010 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5011 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5012 /*
5013 * Create a node.
5014 */
5015 ctxt->sax->reference(ctxt->userData, ent->name);
5016 return;
5017 } else if (ctxt->replaceEntities) {
5018 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5019 /*
5020 * Seems we are generating the DOM content, do
5021 * a simple tree copy
5022 */
5023 xmlNodePtr new;
5024 new = xmlCopyNodeList(ent->children);
5025
5026 xmlAddChildList(ctxt->node, new);
5027 /*
5028 * This is to avoid a nasty side effect, see
5029 * characters() in SAX.c
5030 */
5031 ctxt->nodemem = 0;
5032 ctxt->nodelen = 0;
5033 return;
5034 } else {
5035 /*
5036 * Probably running in SAX mode
5037 */
5038 xmlParserInputPtr input;
5039
5040 input = xmlNewEntityInputStream(ctxt, ent);
5041 xmlPushInput(ctxt, input);
5042 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5043 (RAW == '<') && (NXT(1) == '?') &&
5044 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5045 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5046 xmlParseTextDecl(ctxt);
5047 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5048 /*
5049 * The XML REC instructs us to stop parsing right here
5050 */
5051 ctxt->instate = XML_PARSER_EOF;
5052 return;
5053 }
5054 if (input->standalone == 1) {
5055 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5057 ctxt->sax->error(ctxt->userData,
5058 "external parsed entities cannot be standalone\n");
5059 ctxt->wellFormed = 0;
5060 ctxt->disableSAX = 1;
5061 }
5062 }
5063 return;
5064 }
5065 }
5066 } else {
5067 val = ent->content;
5068 if (val == NULL) return;
5069 /*
5070 * inline the entity.
5071 */
5072 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5073 (!ctxt->disableSAX))
5074 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5075 }
5076 }
5077}
5078
5079/**
5080 * xmlParseEntityRef:
5081 * @ctxt: an XML parser context
5082 *
5083 * parse ENTITY references declarations
5084 *
5085 * [68] EntityRef ::= '&' Name ';'
5086 *
5087 * [ WFC: Entity Declared ]
5088 * In a document without any DTD, a document with only an internal DTD
5089 * subset which contains no parameter entity references, or a document
5090 * with "standalone='yes'", the Name given in the entity reference
5091 * must match that in an entity declaration, except that well-formed
5092 * documents need not declare any of the following entities: amp, lt,
5093 * gt, apos, quot. The declaration of a parameter entity must precede
5094 * any reference to it. Similarly, the declaration of a general entity
5095 * must precede any reference to it which appears in a default value in an
5096 * attribute-list declaration. Note that if entities are declared in the
5097 * external subset or in external parameter entities, a non-validating
5098 * processor is not obligated to read and process their declarations;
5099 * for such documents, the rule that an entity must be declared is a
5100 * well-formedness constraint only if standalone='yes'.
5101 *
5102 * [ WFC: Parsed Entity ]
5103 * An entity reference must not contain the name of an unparsed entity
5104 *
5105 * Returns the xmlEntityPtr if found, or NULL otherwise.
5106 */
5107xmlEntityPtr
5108xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5109 xmlChar *name;
5110 xmlEntityPtr ent = NULL;
5111
5112 GROW;
5113
5114 if (RAW == '&') {
5115 NEXT;
5116 name = xmlParseName(ctxt);
5117 if (name == NULL) {
5118 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5119 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5120 ctxt->sax->error(ctxt->userData,
5121 "xmlParseEntityRef: no name\n");
5122 ctxt->wellFormed = 0;
5123 ctxt->disableSAX = 1;
5124 } else {
5125 if (RAW == ';') {
5126 NEXT;
5127 /*
5128 * Ask first SAX for entity resolution, otherwise try the
5129 * predefined set.
5130 */
5131 if (ctxt->sax != NULL) {
5132 if (ctxt->sax->getEntity != NULL)
5133 ent = ctxt->sax->getEntity(ctxt->userData, name);
5134 if (ent == NULL)
5135 ent = xmlGetPredefinedEntity(name);
5136 }
5137 /*
5138 * [ WFC: Entity Declared ]
5139 * In a document without any DTD, a document with only an
5140 * internal DTD subset which contains no parameter entity
5141 * references, or a document with "standalone='yes'", the
5142 * Name given in the entity reference must match that in an
5143 * entity declaration, except that well-formed documents
5144 * need not declare any of the following entities: amp, lt,
5145 * gt, apos, quot.
5146 * The declaration of a parameter entity must precede any
5147 * reference to it.
5148 * Similarly, the declaration of a general entity must
5149 * precede any reference to it which appears in a default
5150 * value in an attribute-list declaration. Note that if
5151 * entities are declared in the external subset or in
5152 * external parameter entities, a non-validating processor
5153 * is not obligated to read and process their declarations;
5154 * for such documents, the rule that an entity must be
5155 * declared is a well-formedness constraint only if
5156 * standalone='yes'.
5157 */
5158 if (ent == NULL) {
5159 if ((ctxt->standalone == 1) ||
5160 ((ctxt->hasExternalSubset == 0) &&
5161 (ctxt->hasPErefs == 0))) {
5162 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5163 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5164 ctxt->sax->error(ctxt->userData,
5165 "Entity '%s' not defined\n", name);
5166 ctxt->wellFormed = 0;
5167 ctxt->disableSAX = 1;
5168 } else {
5169 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5170 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5171 ctxt->sax->warning(ctxt->userData,
5172 "Entity '%s' not defined\n", name);
5173 }
5174 }
5175
5176 /*
5177 * [ WFC: Parsed Entity ]
5178 * An entity reference must not contain the name of an
5179 * unparsed entity
5180 */
5181 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5182 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5184 ctxt->sax->error(ctxt->userData,
5185 "Entity reference to unparsed entity %s\n", name);
5186 ctxt->wellFormed = 0;
5187 ctxt->disableSAX = 1;
5188 }
5189
5190 /*
5191 * [ WFC: No External Entity References ]
5192 * Attribute values cannot contain direct or indirect
5193 * entity references to external entities.
5194 */
5195 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5196 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5197 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5199 ctxt->sax->error(ctxt->userData,
5200 "Attribute references external entity '%s'\n", name);
5201 ctxt->wellFormed = 0;
5202 ctxt->disableSAX = 1;
5203 }
5204 /*
5205 * [ WFC: No < in Attribute Values ]
5206 * The replacement text of any entity referred to directly or
5207 * indirectly in an attribute value (other than "&lt;") must
5208 * not contain a <.
5209 */
5210 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5211 (ent != NULL) &&
5212 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5213 (ent->content != NULL) &&
5214 (xmlStrchr(ent->content, '<'))) {
5215 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5216 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5217 ctxt->sax->error(ctxt->userData,
5218 "'<' in entity '%s' is not allowed in attributes values\n", name);
5219 ctxt->wellFormed = 0;
5220 ctxt->disableSAX = 1;
5221 }
5222
5223 /*
5224 * Internal check, no parameter entities here ...
5225 */
5226 else {
5227 switch (ent->etype) {
5228 case XML_INTERNAL_PARAMETER_ENTITY:
5229 case XML_EXTERNAL_PARAMETER_ENTITY:
5230 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5231 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5232 ctxt->sax->error(ctxt->userData,
5233 "Attempt to reference the parameter entity '%s'\n", name);
5234 ctxt->wellFormed = 0;
5235 ctxt->disableSAX = 1;
5236 break;
5237 default:
5238 break;
5239 }
5240 }
5241
5242 /*
5243 * [ WFC: No Recursion ]
5244 * A parsed entity must not contain a recursive reference
5245 * to itself, either directly or indirectly.
5246 * Done somewhere else
5247 */
5248
5249 } else {
5250 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5251 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5252 ctxt->sax->error(ctxt->userData,
5253 "xmlParseEntityRef: expecting ';'\n");
5254 ctxt->wellFormed = 0;
5255 ctxt->disableSAX = 1;
5256 }
5257 xmlFree(name);
5258 }
5259 }
5260 return(ent);
5261}
5262
5263/**
5264 * xmlParseStringEntityRef:
5265 * @ctxt: an XML parser context
5266 * @str: a pointer to an index in the string
5267 *
5268 * parse ENTITY references declarations, but this version parses it from
5269 * a string value.
5270 *
5271 * [68] EntityRef ::= '&' Name ';'
5272 *
5273 * [ WFC: Entity Declared ]
5274 * In a document without any DTD, a document with only an internal DTD
5275 * subset which contains no parameter entity references, or a document
5276 * with "standalone='yes'", the Name given in the entity reference
5277 * must match that in an entity declaration, except that well-formed
5278 * documents need not declare any of the following entities: amp, lt,
5279 * gt, apos, quot. The declaration of a parameter entity must precede
5280 * any reference to it. Similarly, the declaration of a general entity
5281 * must precede any reference to it which appears in a default value in an
5282 * attribute-list declaration. Note that if entities are declared in the
5283 * external subset or in external parameter entities, a non-validating
5284 * processor is not obligated to read and process their declarations;
5285 * for such documents, the rule that an entity must be declared is a
5286 * well-formedness constraint only if standalone='yes'.
5287 *
5288 * [ WFC: Parsed Entity ]
5289 * An entity reference must not contain the name of an unparsed entity
5290 *
5291 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5292 * is updated to the current location in the string.
5293 */
5294xmlEntityPtr
5295xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5296 xmlChar *name;
5297 const xmlChar *ptr;
5298 xmlChar cur;
5299 xmlEntityPtr ent = NULL;
5300
5301 if ((str == NULL) || (*str == NULL))
5302 return(NULL);
5303 ptr = *str;
5304 cur = *ptr;
5305 if (cur == '&') {
5306 ptr++;
5307 cur = *ptr;
5308 name = xmlParseStringName(ctxt, &ptr);
5309 if (name == NULL) {
5310 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5312 ctxt->sax->error(ctxt->userData,
5313 "xmlParseEntityRef: no name\n");
5314 ctxt->wellFormed = 0;
5315 ctxt->disableSAX = 1;
5316 } else {
5317 if (*ptr == ';') {
5318 ptr++;
5319 /*
5320 * Ask first SAX for entity resolution, otherwise try the
5321 * predefined set.
5322 */
5323 if (ctxt->sax != NULL) {
5324 if (ctxt->sax->getEntity != NULL)
5325 ent = ctxt->sax->getEntity(ctxt->userData, name);
5326 if (ent == NULL)
5327 ent = xmlGetPredefinedEntity(name);
5328 }
5329 /*
5330 * [ WFC: Entity Declared ]
5331 * In a document without any DTD, a document with only an
5332 * internal DTD subset which contains no parameter entity
5333 * references, or a document with "standalone='yes'", the
5334 * Name given in the entity reference must match that in an
5335 * entity declaration, except that well-formed documents
5336 * need not declare any of the following entities: amp, lt,
5337 * gt, apos, quot.
5338 * The declaration of a parameter entity must precede any
5339 * reference to it.
5340 * Similarly, the declaration of a general entity must
5341 * precede any reference to it which appears in a default
5342 * value in an attribute-list declaration. Note that if
5343 * entities are declared in the external subset or in
5344 * external parameter entities, a non-validating processor
5345 * is not obligated to read and process their declarations;
5346 * for such documents, the rule that an entity must be
5347 * declared is a well-formedness constraint only if
5348 * standalone='yes'.
5349 */
5350 if (ent == NULL) {
5351 if ((ctxt->standalone == 1) ||
5352 ((ctxt->hasExternalSubset == 0) &&
5353 (ctxt->hasPErefs == 0))) {
5354 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5356 ctxt->sax->error(ctxt->userData,
5357 "Entity '%s' not defined\n", name);
5358 ctxt->wellFormed = 0;
5359 ctxt->disableSAX = 1;
5360 } else {
5361 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5362 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5363 ctxt->sax->warning(ctxt->userData,
5364 "Entity '%s' not defined\n", name);
5365 }
5366 }
5367
5368 /*
5369 * [ WFC: Parsed Entity ]
5370 * An entity reference must not contain the name of an
5371 * unparsed entity
5372 */
5373 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5374 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5376 ctxt->sax->error(ctxt->userData,
5377 "Entity reference to unparsed entity %s\n", name);
5378 ctxt->wellFormed = 0;
5379 ctxt->disableSAX = 1;
5380 }
5381
5382 /*
5383 * [ WFC: No External Entity References ]
5384 * Attribute values cannot contain direct or indirect
5385 * entity references to external entities.
5386 */
5387 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5388 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5389 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5390 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5391 ctxt->sax->error(ctxt->userData,
5392 "Attribute references external entity '%s'\n", name);
5393 ctxt->wellFormed = 0;
5394 ctxt->disableSAX = 1;
5395 }
5396 /*
5397 * [ WFC: No < in Attribute Values ]
5398 * The replacement text of any entity referred to directly or
5399 * indirectly in an attribute value (other than "&lt;") must
5400 * not contain a <.
5401 */
5402 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5403 (ent != NULL) &&
5404 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5405 (ent->content != NULL) &&
5406 (xmlStrchr(ent->content, '<'))) {
5407 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5409 ctxt->sax->error(ctxt->userData,
5410 "'<' in entity '%s' is not allowed in attributes values\n", name);
5411 ctxt->wellFormed = 0;
5412 ctxt->disableSAX = 1;
5413 }
5414
5415 /*
5416 * Internal check, no parameter entities here ...
5417 */
5418 else {
5419 switch (ent->etype) {
5420 case XML_INTERNAL_PARAMETER_ENTITY:
5421 case XML_EXTERNAL_PARAMETER_ENTITY:
5422 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5423 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5424 ctxt->sax->error(ctxt->userData,
5425 "Attempt to reference the parameter entity '%s'\n", name);
5426 ctxt->wellFormed = 0;
5427 ctxt->disableSAX = 1;
5428 break;
5429 default:
5430 break;
5431 }
5432 }
5433
5434 /*
5435 * [ WFC: No Recursion ]
5436 * A parsed entity must not contain a recursive reference
5437 * to itself, either directly or indirectly.
5438 * Done somewhwere else
5439 */
5440
5441 } else {
5442 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5444 ctxt->sax->error(ctxt->userData,
5445 "xmlParseEntityRef: expecting ';'\n");
5446 ctxt->wellFormed = 0;
5447 ctxt->disableSAX = 1;
5448 }
5449 xmlFree(name);
5450 }
5451 }
5452 *str = ptr;
5453 return(ent);
5454}
5455
5456/**
5457 * xmlParsePEReference:
5458 * @ctxt: an XML parser context
5459 *
5460 * parse PEReference declarations
5461 * The entity content is handled directly by pushing it's content as
5462 * a new input stream.
5463 *
5464 * [69] PEReference ::= '%' Name ';'
5465 *
5466 * [ WFC: No Recursion ]
5467 * A parsed entity must not contain a recursive
5468 * reference to itself, either directly or indirectly.
5469 *
5470 * [ WFC: Entity Declared ]
5471 * In a document without any DTD, a document with only an internal DTD
5472 * subset which contains no parameter entity references, or a document
5473 * with "standalone='yes'", ... ... The declaration of a parameter
5474 * entity must precede any reference to it...
5475 *
5476 * [ VC: Entity Declared ]
5477 * In a document with an external subset or external parameter entities
5478 * with "standalone='no'", ... ... The declaration of a parameter entity
5479 * must precede any reference to it...
5480 *
5481 * [ WFC: In DTD ]
5482 * Parameter-entity references may only appear in the DTD.
5483 * NOTE: misleading but this is handled.
5484 */
5485void
5486xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5487 xmlChar *name;
5488 xmlEntityPtr entity = NULL;
5489 xmlParserInputPtr input;
5490
5491 if (RAW == '%') {
5492 NEXT;
Daniel Veillard29631a82001-03-05 09:49:20 +00005493 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005494 if (name == NULL) {
5495 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5497 ctxt->sax->error(ctxt->userData,
5498 "xmlParsePEReference: no name\n");
5499 ctxt->wellFormed = 0;
5500 ctxt->disableSAX = 1;
5501 } else {
5502 if (RAW == ';') {
5503 NEXT;
5504 if ((ctxt->sax != NULL) &&
5505 (ctxt->sax->getParameterEntity != NULL))
5506 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5507 name);
5508 if (entity == NULL) {
5509 /*
5510 * [ WFC: Entity Declared ]
5511 * In a document without any DTD, a document with only an
5512 * internal DTD subset which contains no parameter entity
5513 * references, or a document with "standalone='yes'", ...
5514 * ... The declaration of a parameter entity must precede
5515 * any reference to it...
5516 */
5517 if ((ctxt->standalone == 1) ||
5518 ((ctxt->hasExternalSubset == 0) &&
5519 (ctxt->hasPErefs == 0))) {
5520 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5521 if ((!ctxt->disableSAX) &&
5522 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5523 ctxt->sax->error(ctxt->userData,
5524 "PEReference: %%%s; not found\n", name);
5525 ctxt->wellFormed = 0;
5526 ctxt->disableSAX = 1;
5527 } else {
5528 /*
5529 * [ VC: Entity Declared ]
5530 * In a document with an external subset or external
5531 * parameter entities with "standalone='no'", ...
5532 * ... The declaration of a parameter entity must precede
5533 * any reference to it...
5534 */
5535 if ((!ctxt->disableSAX) &&
5536 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5537 ctxt->sax->warning(ctxt->userData,
5538 "PEReference: %%%s; not found\n", name);
5539 ctxt->valid = 0;
5540 }
5541 } else {
5542 /*
5543 * Internal checking in case the entity quest barfed
5544 */
5545 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5546 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5547 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5548 ctxt->sax->warning(ctxt->userData,
5549 "Internal: %%%s; is not a parameter entity\n", name);
5550 } else {
5551 /*
5552 * TODO !!!
5553 * handle the extra spaces added before and after
5554 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5555 */
5556 input = xmlNewEntityInputStream(ctxt, entity);
5557 xmlPushInput(ctxt, input);
5558 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5559 (RAW == '<') && (NXT(1) == '?') &&
5560 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5561 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5562 xmlParseTextDecl(ctxt);
5563 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5564 /*
5565 * The XML REC instructs us to stop parsing
5566 * right here
5567 */
5568 ctxt->instate = XML_PARSER_EOF;
5569 xmlFree(name);
5570 return;
5571 }
5572 }
5573 if (ctxt->token == 0)
5574 ctxt->token = ' ';
5575 }
5576 }
5577 ctxt->hasPErefs = 1;
5578 } else {
5579 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5581 ctxt->sax->error(ctxt->userData,
5582 "xmlParsePEReference: expecting ';'\n");
5583 ctxt->wellFormed = 0;
5584 ctxt->disableSAX = 1;
5585 }
5586 xmlFree(name);
5587 }
5588 }
5589}
5590
5591/**
5592 * xmlParseStringPEReference:
5593 * @ctxt: an XML parser context
5594 * @str: a pointer to an index in the string
5595 *
5596 * parse PEReference declarations
5597 *
5598 * [69] PEReference ::= '%' Name ';'
5599 *
5600 * [ WFC: No Recursion ]
5601 * A parsed entity must not contain a recursive
5602 * reference to itself, either directly or indirectly.
5603 *
5604 * [ WFC: Entity Declared ]
5605 * In a document without any DTD, a document with only an internal DTD
5606 * subset which contains no parameter entity references, or a document
5607 * with "standalone='yes'", ... ... The declaration of a parameter
5608 * entity must precede any reference to it...
5609 *
5610 * [ VC: Entity Declared ]
5611 * In a document with an external subset or external parameter entities
5612 * with "standalone='no'", ... ... The declaration of a parameter entity
5613 * must precede any reference to it...
5614 *
5615 * [ WFC: In DTD ]
5616 * Parameter-entity references may only appear in the DTD.
5617 * NOTE: misleading but this is handled.
5618 *
5619 * Returns the string of the entity content.
5620 * str is updated to the current value of the index
5621 */
5622xmlEntityPtr
5623xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5624 const xmlChar *ptr;
5625 xmlChar cur;
5626 xmlChar *name;
5627 xmlEntityPtr entity = NULL;
5628
5629 if ((str == NULL) || (*str == NULL)) return(NULL);
5630 ptr = *str;
5631 cur = *ptr;
5632 if (cur == '%') {
5633 ptr++;
5634 cur = *ptr;
5635 name = xmlParseStringName(ctxt, &ptr);
5636 if (name == NULL) {
5637 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5639 ctxt->sax->error(ctxt->userData,
5640 "xmlParseStringPEReference: no name\n");
5641 ctxt->wellFormed = 0;
5642 ctxt->disableSAX = 1;
5643 } else {
5644 cur = *ptr;
5645 if (cur == ';') {
5646 ptr++;
5647 cur = *ptr;
5648 if ((ctxt->sax != NULL) &&
5649 (ctxt->sax->getParameterEntity != NULL))
5650 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5651 name);
5652 if (entity == NULL) {
5653 /*
5654 * [ WFC: Entity Declared ]
5655 * In a document without any DTD, a document with only an
5656 * internal DTD subset which contains no parameter entity
5657 * references, or a document with "standalone='yes'", ...
5658 * ... The declaration of a parameter entity must precede
5659 * any reference to it...
5660 */
5661 if ((ctxt->standalone == 1) ||
5662 ((ctxt->hasExternalSubset == 0) &&
5663 (ctxt->hasPErefs == 0))) {
5664 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5666 ctxt->sax->error(ctxt->userData,
5667 "PEReference: %%%s; not found\n", name);
5668 ctxt->wellFormed = 0;
5669 ctxt->disableSAX = 1;
5670 } else {
5671 /*
5672 * [ VC: Entity Declared ]
5673 * In a document with an external subset or external
5674 * parameter entities with "standalone='no'", ...
5675 * ... The declaration of a parameter entity must
5676 * precede any reference to it...
5677 */
5678 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5679 ctxt->sax->warning(ctxt->userData,
5680 "PEReference: %%%s; not found\n", name);
5681 ctxt->valid = 0;
5682 }
5683 } else {
5684 /*
5685 * Internal checking in case the entity quest barfed
5686 */
5687 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5688 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5689 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5690 ctxt->sax->warning(ctxt->userData,
5691 "Internal: %%%s; is not a parameter entity\n", name);
5692 }
5693 }
5694 ctxt->hasPErefs = 1;
5695 } else {
5696 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5698 ctxt->sax->error(ctxt->userData,
5699 "xmlParseStringPEReference: expecting ';'\n");
5700 ctxt->wellFormed = 0;
5701 ctxt->disableSAX = 1;
5702 }
5703 xmlFree(name);
5704 }
5705 }
5706 *str = ptr;
5707 return(entity);
5708}
5709
5710/**
5711 * xmlParseDocTypeDecl:
5712 * @ctxt: an XML parser context
5713 *
5714 * parse a DOCTYPE declaration
5715 *
5716 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5717 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5718 *
5719 * [ VC: Root Element Type ]
5720 * The Name in the document type declaration must match the element
5721 * type of the root element.
5722 */
5723
5724void
5725xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5726 xmlChar *name = NULL;
5727 xmlChar *ExternalID = NULL;
5728 xmlChar *URI = NULL;
5729
5730 /*
5731 * We know that '<!DOCTYPE' has been detected.
5732 */
5733 SKIP(9);
5734
5735 SKIP_BLANKS;
5736
5737 /*
5738 * Parse the DOCTYPE name.
5739 */
5740 name = xmlParseName(ctxt);
5741 if (name == NULL) {
5742 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5744 ctxt->sax->error(ctxt->userData,
5745 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5746 ctxt->wellFormed = 0;
5747 ctxt->disableSAX = 1;
5748 }
5749 ctxt->intSubName = name;
5750
5751 SKIP_BLANKS;
5752
5753 /*
5754 * Check for SystemID and ExternalID
5755 */
5756 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5757
5758 if ((URI != NULL) || (ExternalID != NULL)) {
5759 ctxt->hasExternalSubset = 1;
5760 }
5761 ctxt->extSubURI = URI;
5762 ctxt->extSubSystem = ExternalID;
5763
5764 SKIP_BLANKS;
5765
5766 /*
5767 * Create and update the internal subset.
5768 */
5769 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5770 (!ctxt->disableSAX))
5771 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5772
5773 /*
5774 * Is there any internal subset declarations ?
5775 * they are handled separately in xmlParseInternalSubset()
5776 */
5777 if (RAW == '[')
5778 return;
5779
5780 /*
5781 * We should be at the end of the DOCTYPE declaration.
5782 */
5783 if (RAW != '>') {
5784 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5786 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5787 ctxt->wellFormed = 0;
5788 ctxt->disableSAX = 1;
5789 }
5790 NEXT;
5791}
5792
5793/**
5794 * xmlParseInternalsubset:
5795 * @ctxt: an XML parser context
5796 *
5797 * parse the internal subset declaration
5798 *
5799 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5800 */
5801
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005802static void
Owen Taylor3473f882001-02-23 17:55:21 +00005803xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5804 /*
5805 * Is there any DTD definition ?
5806 */
5807 if (RAW == '[') {
5808 ctxt->instate = XML_PARSER_DTD;
5809 NEXT;
5810 /*
5811 * Parse the succession of Markup declarations and
5812 * PEReferences.
5813 * Subsequence (markupdecl | PEReference | S)*
5814 */
5815 while (RAW != ']') {
5816 const xmlChar *check = CUR_PTR;
5817 int cons = ctxt->input->consumed;
5818
5819 SKIP_BLANKS;
5820 xmlParseMarkupDecl(ctxt);
5821 xmlParsePEReference(ctxt);
5822
5823 /*
5824 * Pop-up of finished entities.
5825 */
5826 while ((RAW == 0) && (ctxt->inputNr > 1))
5827 xmlPopInput(ctxt);
5828
5829 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5830 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5832 ctxt->sax->error(ctxt->userData,
5833 "xmlParseInternalSubset: error detected in Markup declaration\n");
5834 ctxt->wellFormed = 0;
5835 ctxt->disableSAX = 1;
5836 break;
5837 }
5838 }
5839 if (RAW == ']') {
5840 NEXT;
5841 SKIP_BLANKS;
5842 }
5843 }
5844
5845 /*
5846 * We should be at the end of the DOCTYPE declaration.
5847 */
5848 if (RAW != '>') {
5849 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5851 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5852 ctxt->wellFormed = 0;
5853 ctxt->disableSAX = 1;
5854 }
5855 NEXT;
5856}
5857
5858/**
5859 * xmlParseAttribute:
5860 * @ctxt: an XML parser context
5861 * @value: a xmlChar ** used to store the value of the attribute
5862 *
5863 * parse an attribute
5864 *
5865 * [41] Attribute ::= Name Eq AttValue
5866 *
5867 * [ WFC: No External Entity References ]
5868 * Attribute values cannot contain direct or indirect entity references
5869 * to external entities.
5870 *
5871 * [ WFC: No < in Attribute Values ]
5872 * The replacement text of any entity referred to directly or indirectly in
5873 * an attribute value (other than "&lt;") must not contain a <.
5874 *
5875 * [ VC: Attribute Value Type ]
5876 * The attribute must have been declared; the value must be of the type
5877 * declared for it.
5878 *
5879 * [25] Eq ::= S? '=' S?
5880 *
5881 * With namespace:
5882 *
5883 * [NS 11] Attribute ::= QName Eq AttValue
5884 *
5885 * Also the case QName == xmlns:??? is handled independently as a namespace
5886 * definition.
5887 *
5888 * Returns the attribute name, and the value in *value.
5889 */
5890
5891xmlChar *
5892xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5893 xmlChar *name, *val;
5894
5895 *value = NULL;
5896 name = xmlParseName(ctxt);
5897 if (name == NULL) {
5898 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5900 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5901 ctxt->wellFormed = 0;
5902 ctxt->disableSAX = 1;
5903 return(NULL);
5904 }
5905
5906 /*
5907 * read the value
5908 */
5909 SKIP_BLANKS;
5910 if (RAW == '=') {
5911 NEXT;
5912 SKIP_BLANKS;
5913 val = xmlParseAttValue(ctxt);
5914 ctxt->instate = XML_PARSER_CONTENT;
5915 } else {
5916 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5917 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5918 ctxt->sax->error(ctxt->userData,
5919 "Specification mandate value for attribute %s\n", name);
5920 ctxt->wellFormed = 0;
5921 ctxt->disableSAX = 1;
5922 xmlFree(name);
5923 return(NULL);
5924 }
5925
5926 /*
5927 * Check that xml:lang conforms to the specification
5928 * No more registered as an error, just generate a warning now
5929 * since this was deprecated in XML second edition
5930 */
5931 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5932 if (!xmlCheckLanguageID(val)) {
5933 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5934 ctxt->sax->warning(ctxt->userData,
5935 "Malformed value for xml:lang : %s\n", val);
5936 }
5937 }
5938
5939 /*
5940 * Check that xml:space conforms to the specification
5941 */
5942 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5943 if (xmlStrEqual(val, BAD_CAST "default"))
5944 *(ctxt->space) = 0;
5945 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5946 *(ctxt->space) = 1;
5947 else {
5948 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5950 ctxt->sax->error(ctxt->userData,
5951"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5952 val);
5953 ctxt->wellFormed = 0;
5954 ctxt->disableSAX = 1;
5955 }
5956 }
5957
5958 *value = val;
5959 return(name);
5960}
5961
5962/**
5963 * xmlParseStartTag:
5964 * @ctxt: an XML parser context
5965 *
5966 * parse a start of tag either for rule element or
5967 * EmptyElement. In both case we don't parse the tag closing chars.
5968 *
5969 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5970 *
5971 * [ WFC: Unique Att Spec ]
5972 * No attribute name may appear more than once in the same start-tag or
5973 * empty-element tag.
5974 *
5975 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5976 *
5977 * [ WFC: Unique Att Spec ]
5978 * No attribute name may appear more than once in the same start-tag or
5979 * empty-element tag.
5980 *
5981 * With namespace:
5982 *
5983 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5984 *
5985 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
5986 *
5987 * Returns the element name parsed
5988 */
5989
5990xmlChar *
5991xmlParseStartTag(xmlParserCtxtPtr ctxt) {
5992 xmlChar *name;
5993 xmlChar *attname;
5994 xmlChar *attvalue;
5995 const xmlChar **atts = NULL;
5996 int nbatts = 0;
5997 int maxatts = 0;
5998 int i;
5999
6000 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006001 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006002
6003 name = xmlParseName(ctxt);
6004 if (name == NULL) {
6005 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6007 ctxt->sax->error(ctxt->userData,
6008 "xmlParseStartTag: invalid element name\n");
6009 ctxt->wellFormed = 0;
6010 ctxt->disableSAX = 1;
6011 return(NULL);
6012 }
6013
6014 /*
6015 * Now parse the attributes, it ends up with the ending
6016 *
6017 * (S Attribute)* S?
6018 */
6019 SKIP_BLANKS;
6020 GROW;
6021
Daniel Veillard21a0f912001-02-25 19:54:14 +00006022 while ((RAW != '>') &&
6023 ((RAW != '/') || (NXT(1) != '>')) &&
6024 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006025 const xmlChar *q = CUR_PTR;
6026 int cons = ctxt->input->consumed;
6027
6028 attname = xmlParseAttribute(ctxt, &attvalue);
6029 if ((attname != NULL) && (attvalue != NULL)) {
6030 /*
6031 * [ WFC: Unique Att Spec ]
6032 * No attribute name may appear more than once in the same
6033 * start-tag or empty-element tag.
6034 */
6035 for (i = 0; i < nbatts;i += 2) {
6036 if (xmlStrEqual(atts[i], attname)) {
6037 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6039 ctxt->sax->error(ctxt->userData,
6040 "Attribute %s redefined\n",
6041 attname);
6042 ctxt->wellFormed = 0;
6043 ctxt->disableSAX = 1;
6044 xmlFree(attname);
6045 xmlFree(attvalue);
6046 goto failed;
6047 }
6048 }
6049
6050 /*
6051 * Add the pair to atts
6052 */
6053 if (atts == NULL) {
6054 maxatts = 10;
6055 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6056 if (atts == NULL) {
6057 xmlGenericError(xmlGenericErrorContext,
6058 "malloc of %ld byte failed\n",
6059 maxatts * (long)sizeof(xmlChar *));
6060 return(NULL);
6061 }
6062 } else if (nbatts + 4 > maxatts) {
6063 maxatts *= 2;
6064 atts = (const xmlChar **) xmlRealloc((void *) atts,
6065 maxatts * sizeof(xmlChar *));
6066 if (atts == NULL) {
6067 xmlGenericError(xmlGenericErrorContext,
6068 "realloc of %ld byte failed\n",
6069 maxatts * (long)sizeof(xmlChar *));
6070 return(NULL);
6071 }
6072 }
6073 atts[nbatts++] = attname;
6074 atts[nbatts++] = attvalue;
6075 atts[nbatts] = NULL;
6076 atts[nbatts + 1] = NULL;
6077 } else {
6078 if (attname != NULL)
6079 xmlFree(attname);
6080 if (attvalue != NULL)
6081 xmlFree(attvalue);
6082 }
6083
6084failed:
6085
6086 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6087 break;
6088 if (!IS_BLANK(RAW)) {
6089 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6091 ctxt->sax->error(ctxt->userData,
6092 "attributes construct error\n");
6093 ctxt->wellFormed = 0;
6094 ctxt->disableSAX = 1;
6095 }
6096 SKIP_BLANKS;
6097 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6098 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6100 ctxt->sax->error(ctxt->userData,
6101 "xmlParseStartTag: problem parsing attributes\n");
6102 ctxt->wellFormed = 0;
6103 ctxt->disableSAX = 1;
6104 break;
6105 }
6106 GROW;
6107 }
6108
6109 /*
6110 * SAX: Start of Element !
6111 */
6112 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6113 (!ctxt->disableSAX))
6114 ctxt->sax->startElement(ctxt->userData, name, atts);
6115
6116 if (atts != NULL) {
6117 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6118 xmlFree((void *) atts);
6119 }
6120 return(name);
6121}
6122
6123/**
6124 * xmlParseEndTag:
6125 * @ctxt: an XML parser context
6126 *
6127 * parse an end of tag
6128 *
6129 * [42] ETag ::= '</' Name S? '>'
6130 *
6131 * With namespace
6132 *
6133 * [NS 9] ETag ::= '</' QName S? '>'
6134 */
6135
6136void
6137xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6138 xmlChar *name;
6139 xmlChar *oldname;
6140
6141 GROW;
6142 if ((RAW != '<') || (NXT(1) != '/')) {
6143 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6145 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6146 ctxt->wellFormed = 0;
6147 ctxt->disableSAX = 1;
6148 return;
6149 }
6150 SKIP(2);
6151
6152 name = xmlParseName(ctxt);
6153
6154 /*
6155 * We should definitely be at the ending "S? '>'" part
6156 */
6157 GROW;
6158 SKIP_BLANKS;
6159 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6160 ctxt->errNo = XML_ERR_GT_REQUIRED;
6161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6162 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6163 ctxt->wellFormed = 0;
6164 ctxt->disableSAX = 1;
6165 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006166 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006167
6168 /*
6169 * [ WFC: Element Type Match ]
6170 * The Name in an element's end-tag must match the element type in the
6171 * start-tag.
6172 *
6173 */
6174 if ((name == NULL) || (ctxt->name == NULL) ||
6175 (!xmlStrEqual(name, ctxt->name))) {
6176 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6178 if ((name != NULL) && (ctxt->name != NULL)) {
6179 ctxt->sax->error(ctxt->userData,
6180 "Opening and ending tag mismatch: %s and %s\n",
6181 ctxt->name, name);
6182 } else if (ctxt->name != NULL) {
6183 ctxt->sax->error(ctxt->userData,
6184 "Ending tag eror for: %s\n", ctxt->name);
6185 } else {
6186 ctxt->sax->error(ctxt->userData,
6187 "Ending tag error: internal error ???\n");
6188 }
6189
6190 }
6191 ctxt->wellFormed = 0;
6192 ctxt->disableSAX = 1;
6193 }
6194
6195 /*
6196 * SAX: End of Tag
6197 */
6198 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6199 (!ctxt->disableSAX))
6200 ctxt->sax->endElement(ctxt->userData, name);
6201
6202 if (name != NULL)
6203 xmlFree(name);
6204 oldname = namePop(ctxt);
6205 spacePop(ctxt);
6206 if (oldname != NULL) {
6207#ifdef DEBUG_STACK
6208 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6209#endif
6210 xmlFree(oldname);
6211 }
6212 return;
6213}
6214
6215/**
6216 * xmlParseCDSect:
6217 * @ctxt: an XML parser context
6218 *
6219 * Parse escaped pure raw content.
6220 *
6221 * [18] CDSect ::= CDStart CData CDEnd
6222 *
6223 * [19] CDStart ::= '<![CDATA['
6224 *
6225 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6226 *
6227 * [21] CDEnd ::= ']]>'
6228 */
6229void
6230xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6231 xmlChar *buf = NULL;
6232 int len = 0;
6233 int size = XML_PARSER_BUFFER_SIZE;
6234 int r, rl;
6235 int s, sl;
6236 int cur, l;
6237 int count = 0;
6238
6239 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6240 (NXT(2) == '[') && (NXT(3) == 'C') &&
6241 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6242 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6243 (NXT(8) == '[')) {
6244 SKIP(9);
6245 } else
6246 return;
6247
6248 ctxt->instate = XML_PARSER_CDATA_SECTION;
6249 r = CUR_CHAR(rl);
6250 if (!IS_CHAR(r)) {
6251 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6253 ctxt->sax->error(ctxt->userData,
6254 "CData section not finished\n");
6255 ctxt->wellFormed = 0;
6256 ctxt->disableSAX = 1;
6257 ctxt->instate = XML_PARSER_CONTENT;
6258 return;
6259 }
6260 NEXTL(rl);
6261 s = CUR_CHAR(sl);
6262 if (!IS_CHAR(s)) {
6263 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6265 ctxt->sax->error(ctxt->userData,
6266 "CData section not finished\n");
6267 ctxt->wellFormed = 0;
6268 ctxt->disableSAX = 1;
6269 ctxt->instate = XML_PARSER_CONTENT;
6270 return;
6271 }
6272 NEXTL(sl);
6273 cur = CUR_CHAR(l);
6274 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6275 if (buf == NULL) {
6276 xmlGenericError(xmlGenericErrorContext,
6277 "malloc of %d byte failed\n", size);
6278 return;
6279 }
6280 while (IS_CHAR(cur) &&
6281 ((r != ']') || (s != ']') || (cur != '>'))) {
6282 if (len + 5 >= size) {
6283 size *= 2;
6284 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6285 if (buf == NULL) {
6286 xmlGenericError(xmlGenericErrorContext,
6287 "realloc of %d byte failed\n", size);
6288 return;
6289 }
6290 }
6291 COPY_BUF(rl,buf,len,r);
6292 r = s;
6293 rl = sl;
6294 s = cur;
6295 sl = l;
6296 count++;
6297 if (count > 50) {
6298 GROW;
6299 count = 0;
6300 }
6301 NEXTL(l);
6302 cur = CUR_CHAR(l);
6303 }
6304 buf[len] = 0;
6305 ctxt->instate = XML_PARSER_CONTENT;
6306 if (cur != '>') {
6307 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6309 ctxt->sax->error(ctxt->userData,
6310 "CData section not finished\n%.50s\n", buf);
6311 ctxt->wellFormed = 0;
6312 ctxt->disableSAX = 1;
6313 xmlFree(buf);
6314 return;
6315 }
6316 NEXTL(l);
6317
6318 /*
6319 * Ok the buffer is to be consumed as cdata.
6320 */
6321 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6322 if (ctxt->sax->cdataBlock != NULL)
6323 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6324 }
6325 xmlFree(buf);
6326}
6327
6328/**
6329 * xmlParseContent:
6330 * @ctxt: an XML parser context
6331 *
6332 * Parse a content:
6333 *
6334 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6335 */
6336
6337void
6338xmlParseContent(xmlParserCtxtPtr ctxt) {
6339 GROW;
6340 while (((RAW != 0) || (ctxt->token != 0)) &&
6341 ((RAW != '<') || (NXT(1) != '/'))) {
6342 const xmlChar *test = CUR_PTR;
6343 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006344 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006345 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006346
6347 /*
6348 * Handle possible processed charrefs.
6349 */
6350 if (ctxt->token != 0) {
6351 xmlParseCharData(ctxt, 0);
6352 }
6353 /*
6354 * First case : a Processing Instruction.
6355 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006356 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006357 xmlParsePI(ctxt);
6358 }
6359
6360 /*
6361 * Second case : a CDSection
6362 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006363 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006364 (NXT(2) == '[') && (NXT(3) == 'C') &&
6365 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6366 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6367 (NXT(8) == '[')) {
6368 xmlParseCDSect(ctxt);
6369 }
6370
6371 /*
6372 * Third case : a comment
6373 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006374 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006375 (NXT(2) == '-') && (NXT(3) == '-')) {
6376 xmlParseComment(ctxt);
6377 ctxt->instate = XML_PARSER_CONTENT;
6378 }
6379
6380 /*
6381 * Fourth case : a sub-element.
6382 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006383 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006384 xmlParseElement(ctxt);
6385 }
6386
6387 /*
6388 * Fifth case : a reference. If if has not been resolved,
6389 * parsing returns it's Name, create the node
6390 */
6391
Daniel Veillard21a0f912001-02-25 19:54:14 +00006392 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006393 xmlParseReference(ctxt);
6394 }
6395
6396 /*
6397 * Last case, text. Note that References are handled directly.
6398 */
6399 else {
6400 xmlParseCharData(ctxt, 0);
6401 }
6402
6403 GROW;
6404 /*
6405 * Pop-up of finished entities.
6406 */
6407 while ((RAW == 0) && (ctxt->inputNr > 1))
6408 xmlPopInput(ctxt);
6409 SHRINK;
6410
6411 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6412 (tok == ctxt->token)) {
6413 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6415 ctxt->sax->error(ctxt->userData,
6416 "detected an error in element content\n");
6417 ctxt->wellFormed = 0;
6418 ctxt->disableSAX = 1;
6419 ctxt->instate = XML_PARSER_EOF;
6420 break;
6421 }
6422 }
6423}
6424
6425/**
6426 * xmlParseElement:
6427 * @ctxt: an XML parser context
6428 *
6429 * parse an XML element, this is highly recursive
6430 *
6431 * [39] element ::= EmptyElemTag | STag content ETag
6432 *
6433 * [ WFC: Element Type Match ]
6434 * The Name in an element's end-tag must match the element type in the
6435 * start-tag.
6436 *
6437 * [ VC: Element Valid ]
6438 * An element is valid if there is a declaration matching elementdecl
6439 * where the Name matches the element type and one of the following holds:
6440 * - The declaration matches EMPTY and the element has no content.
6441 * - The declaration matches children and the sequence of child elements
6442 * belongs to the language generated by the regular expression in the
6443 * content model, with optional white space (characters matching the
6444 * nonterminal S) between each pair of child elements.
6445 * - The declaration matches Mixed and the content consists of character
6446 * data and child elements whose types match names in the content model.
6447 * - The declaration matches ANY, and the types of any child elements have
6448 * been declared.
6449 */
6450
6451void
6452xmlParseElement(xmlParserCtxtPtr ctxt) {
6453 const xmlChar *openTag = CUR_PTR;
6454 xmlChar *name;
6455 xmlChar *oldname;
6456 xmlParserNodeInfo node_info;
6457 xmlNodePtr ret;
6458
6459 /* Capture start position */
6460 if (ctxt->record_info) {
6461 node_info.begin_pos = ctxt->input->consumed +
6462 (CUR_PTR - ctxt->input->base);
6463 node_info.begin_line = ctxt->input->line;
6464 }
6465
6466 if (ctxt->spaceNr == 0)
6467 spacePush(ctxt, -1);
6468 else
6469 spacePush(ctxt, *ctxt->space);
6470
6471 name = xmlParseStartTag(ctxt);
6472 if (name == NULL) {
6473 spacePop(ctxt);
6474 return;
6475 }
6476 namePush(ctxt, name);
6477 ret = ctxt->node;
6478
6479 /*
6480 * [ VC: Root Element Type ]
6481 * The Name in the document type declaration must match the element
6482 * type of the root element.
6483 */
6484 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6485 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6486 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6487
6488 /*
6489 * Check for an Empty Element.
6490 */
6491 if ((RAW == '/') && (NXT(1) == '>')) {
6492 SKIP(2);
6493 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6494 (!ctxt->disableSAX))
6495 ctxt->sax->endElement(ctxt->userData, name);
6496 oldname = namePop(ctxt);
6497 spacePop(ctxt);
6498 if (oldname != NULL) {
6499#ifdef DEBUG_STACK
6500 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6501#endif
6502 xmlFree(oldname);
6503 }
6504 if ( ret != NULL && ctxt->record_info ) {
6505 node_info.end_pos = ctxt->input->consumed +
6506 (CUR_PTR - ctxt->input->base);
6507 node_info.end_line = ctxt->input->line;
6508 node_info.node = ret;
6509 xmlParserAddNodeInfo(ctxt, &node_info);
6510 }
6511 return;
6512 }
6513 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006514 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006515 } else {
6516 ctxt->errNo = XML_ERR_GT_REQUIRED;
6517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6518 ctxt->sax->error(ctxt->userData,
6519 "Couldn't find end of Start Tag\n%.30s\n",
6520 openTag);
6521 ctxt->wellFormed = 0;
6522 ctxt->disableSAX = 1;
6523
6524 /*
6525 * end of parsing of this node.
6526 */
6527 nodePop(ctxt);
6528 oldname = namePop(ctxt);
6529 spacePop(ctxt);
6530 if (oldname != NULL) {
6531#ifdef DEBUG_STACK
6532 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6533#endif
6534 xmlFree(oldname);
6535 }
6536
6537 /*
6538 * Capture end position and add node
6539 */
6540 if ( ret != NULL && ctxt->record_info ) {
6541 node_info.end_pos = ctxt->input->consumed +
6542 (CUR_PTR - ctxt->input->base);
6543 node_info.end_line = ctxt->input->line;
6544 node_info.node = ret;
6545 xmlParserAddNodeInfo(ctxt, &node_info);
6546 }
6547 return;
6548 }
6549
6550 /*
6551 * Parse the content of the element:
6552 */
6553 xmlParseContent(ctxt);
6554 if (!IS_CHAR(RAW)) {
6555 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6557 ctxt->sax->error(ctxt->userData,
6558 "Premature end of data in tag %.30s\n", openTag);
6559 ctxt->wellFormed = 0;
6560 ctxt->disableSAX = 1;
6561
6562 /*
6563 * end of parsing of this node.
6564 */
6565 nodePop(ctxt);
6566 oldname = namePop(ctxt);
6567 spacePop(ctxt);
6568 if (oldname != NULL) {
6569#ifdef DEBUG_STACK
6570 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6571#endif
6572 xmlFree(oldname);
6573 }
6574 return;
6575 }
6576
6577 /*
6578 * parse the end of tag: '</' should be here.
6579 */
6580 xmlParseEndTag(ctxt);
6581
6582 /*
6583 * Capture end position and add node
6584 */
6585 if ( ret != NULL && ctxt->record_info ) {
6586 node_info.end_pos = ctxt->input->consumed +
6587 (CUR_PTR - ctxt->input->base);
6588 node_info.end_line = ctxt->input->line;
6589 node_info.node = ret;
6590 xmlParserAddNodeInfo(ctxt, &node_info);
6591 }
6592}
6593
6594/**
6595 * xmlParseVersionNum:
6596 * @ctxt: an XML parser context
6597 *
6598 * parse the XML version value.
6599 *
6600 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6601 *
6602 * Returns the string giving the XML version number, or NULL
6603 */
6604xmlChar *
6605xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6606 xmlChar *buf = NULL;
6607 int len = 0;
6608 int size = 10;
6609 xmlChar cur;
6610
6611 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6612 if (buf == NULL) {
6613 xmlGenericError(xmlGenericErrorContext,
6614 "malloc of %d byte failed\n", size);
6615 return(NULL);
6616 }
6617 cur = CUR;
6618 while (((cur >= 'a') && (cur <= 'z')) ||
6619 ((cur >= 'A') && (cur <= 'Z')) ||
6620 ((cur >= '0') && (cur <= '9')) ||
6621 (cur == '_') || (cur == '.') ||
6622 (cur == ':') || (cur == '-')) {
6623 if (len + 1 >= size) {
6624 size *= 2;
6625 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6626 if (buf == NULL) {
6627 xmlGenericError(xmlGenericErrorContext,
6628 "realloc of %d byte failed\n", size);
6629 return(NULL);
6630 }
6631 }
6632 buf[len++] = cur;
6633 NEXT;
6634 cur=CUR;
6635 }
6636 buf[len] = 0;
6637 return(buf);
6638}
6639
6640/**
6641 * xmlParseVersionInfo:
6642 * @ctxt: an XML parser context
6643 *
6644 * parse the XML version.
6645 *
6646 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6647 *
6648 * [25] Eq ::= S? '=' S?
6649 *
6650 * Returns the version string, e.g. "1.0"
6651 */
6652
6653xmlChar *
6654xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6655 xmlChar *version = NULL;
6656 const xmlChar *q;
6657
6658 if ((RAW == 'v') && (NXT(1) == 'e') &&
6659 (NXT(2) == 'r') && (NXT(3) == 's') &&
6660 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6661 (NXT(6) == 'n')) {
6662 SKIP(7);
6663 SKIP_BLANKS;
6664 if (RAW != '=') {
6665 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6667 ctxt->sax->error(ctxt->userData,
6668 "xmlParseVersionInfo : expected '='\n");
6669 ctxt->wellFormed = 0;
6670 ctxt->disableSAX = 1;
6671 return(NULL);
6672 }
6673 NEXT;
6674 SKIP_BLANKS;
6675 if (RAW == '"') {
6676 NEXT;
6677 q = CUR_PTR;
6678 version = xmlParseVersionNum(ctxt);
6679 if (RAW != '"') {
6680 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6681 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6682 ctxt->sax->error(ctxt->userData,
6683 "String not closed\n%.50s\n", q);
6684 ctxt->wellFormed = 0;
6685 ctxt->disableSAX = 1;
6686 } else
6687 NEXT;
6688 } else if (RAW == '\''){
6689 NEXT;
6690 q = CUR_PTR;
6691 version = xmlParseVersionNum(ctxt);
6692 if (RAW != '\'') {
6693 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6695 ctxt->sax->error(ctxt->userData,
6696 "String not closed\n%.50s\n", q);
6697 ctxt->wellFormed = 0;
6698 ctxt->disableSAX = 1;
6699 } else
6700 NEXT;
6701 } else {
6702 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6704 ctxt->sax->error(ctxt->userData,
6705 "xmlParseVersionInfo : expected ' or \"\n");
6706 ctxt->wellFormed = 0;
6707 ctxt->disableSAX = 1;
6708 }
6709 }
6710 return(version);
6711}
6712
6713/**
6714 * xmlParseEncName:
6715 * @ctxt: an XML parser context
6716 *
6717 * parse the XML encoding name
6718 *
6719 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6720 *
6721 * Returns the encoding name value or NULL
6722 */
6723xmlChar *
6724xmlParseEncName(xmlParserCtxtPtr ctxt) {
6725 xmlChar *buf = NULL;
6726 int len = 0;
6727 int size = 10;
6728 xmlChar cur;
6729
6730 cur = CUR;
6731 if (((cur >= 'a') && (cur <= 'z')) ||
6732 ((cur >= 'A') && (cur <= 'Z'))) {
6733 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6734 if (buf == NULL) {
6735 xmlGenericError(xmlGenericErrorContext,
6736 "malloc of %d byte failed\n", size);
6737 return(NULL);
6738 }
6739
6740 buf[len++] = cur;
6741 NEXT;
6742 cur = CUR;
6743 while (((cur >= 'a') && (cur <= 'z')) ||
6744 ((cur >= 'A') && (cur <= 'Z')) ||
6745 ((cur >= '0') && (cur <= '9')) ||
6746 (cur == '.') || (cur == '_') ||
6747 (cur == '-')) {
6748 if (len + 1 >= size) {
6749 size *= 2;
6750 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6751 if (buf == NULL) {
6752 xmlGenericError(xmlGenericErrorContext,
6753 "realloc of %d byte failed\n", size);
6754 return(NULL);
6755 }
6756 }
6757 buf[len++] = cur;
6758 NEXT;
6759 cur = CUR;
6760 if (cur == 0) {
6761 SHRINK;
6762 GROW;
6763 cur = CUR;
6764 }
6765 }
6766 buf[len] = 0;
6767 } else {
6768 ctxt->errNo = XML_ERR_ENCODING_NAME;
6769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6770 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6771 ctxt->wellFormed = 0;
6772 ctxt->disableSAX = 1;
6773 }
6774 return(buf);
6775}
6776
6777/**
6778 * xmlParseEncodingDecl:
6779 * @ctxt: an XML parser context
6780 *
6781 * parse the XML encoding declaration
6782 *
6783 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6784 *
6785 * this setups the conversion filters.
6786 *
6787 * Returns the encoding value or NULL
6788 */
6789
6790xmlChar *
6791xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6792 xmlChar *encoding = NULL;
6793 const xmlChar *q;
6794
6795 SKIP_BLANKS;
6796 if ((RAW == 'e') && (NXT(1) == 'n') &&
6797 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6798 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6799 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6800 SKIP(8);
6801 SKIP_BLANKS;
6802 if (RAW != '=') {
6803 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6804 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6805 ctxt->sax->error(ctxt->userData,
6806 "xmlParseEncodingDecl : expected '='\n");
6807 ctxt->wellFormed = 0;
6808 ctxt->disableSAX = 1;
6809 return(NULL);
6810 }
6811 NEXT;
6812 SKIP_BLANKS;
6813 if (RAW == '"') {
6814 NEXT;
6815 q = CUR_PTR;
6816 encoding = xmlParseEncName(ctxt);
6817 if (RAW != '"') {
6818 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6820 ctxt->sax->error(ctxt->userData,
6821 "String not closed\n%.50s\n", q);
6822 ctxt->wellFormed = 0;
6823 ctxt->disableSAX = 1;
6824 } else
6825 NEXT;
6826 } else if (RAW == '\''){
6827 NEXT;
6828 q = CUR_PTR;
6829 encoding = xmlParseEncName(ctxt);
6830 if (RAW != '\'') {
6831 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6833 ctxt->sax->error(ctxt->userData,
6834 "String not closed\n%.50s\n", q);
6835 ctxt->wellFormed = 0;
6836 ctxt->disableSAX = 1;
6837 } else
6838 NEXT;
6839 } else if (RAW == '"'){
6840 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6842 ctxt->sax->error(ctxt->userData,
6843 "xmlParseEncodingDecl : expected ' or \"\n");
6844 ctxt->wellFormed = 0;
6845 ctxt->disableSAX = 1;
6846 }
6847 if (encoding != NULL) {
6848 xmlCharEncoding enc;
6849 xmlCharEncodingHandlerPtr handler;
6850
6851 if (ctxt->input->encoding != NULL)
6852 xmlFree((xmlChar *) ctxt->input->encoding);
6853 ctxt->input->encoding = encoding;
6854
6855 enc = xmlParseCharEncoding((const char *) encoding);
6856 /*
6857 * registered set of known encodings
6858 */
6859 if (enc != XML_CHAR_ENCODING_ERROR) {
6860 xmlSwitchEncoding(ctxt, enc);
6861 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6862 xmlFree(encoding);
6863 return(NULL);
6864 }
6865 } else {
6866 /*
6867 * fallback for unknown encodings
6868 */
6869 handler = xmlFindCharEncodingHandler((const char *) encoding);
6870 if (handler != NULL) {
6871 xmlSwitchToEncoding(ctxt, handler);
6872 } else {
6873 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6875 ctxt->sax->error(ctxt->userData,
6876 "Unsupported encoding %s\n", encoding);
6877 return(NULL);
6878 }
6879 }
6880 }
6881 }
6882 return(encoding);
6883}
6884
6885/**
6886 * xmlParseSDDecl:
6887 * @ctxt: an XML parser context
6888 *
6889 * parse the XML standalone declaration
6890 *
6891 * [32] SDDecl ::= S 'standalone' Eq
6892 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6893 *
6894 * [ VC: Standalone Document Declaration ]
6895 * TODO The standalone document declaration must have the value "no"
6896 * if any external markup declarations contain declarations of:
6897 * - attributes with default values, if elements to which these
6898 * attributes apply appear in the document without specifications
6899 * of values for these attributes, or
6900 * - entities (other than amp, lt, gt, apos, quot), if references
6901 * to those entities appear in the document, or
6902 * - attributes with values subject to normalization, where the
6903 * attribute appears in the document with a value which will change
6904 * as a result of normalization, or
6905 * - element types with element content, if white space occurs directly
6906 * within any instance of those types.
6907 *
6908 * Returns 1 if standalone, 0 otherwise
6909 */
6910
6911int
6912xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6913 int standalone = -1;
6914
6915 SKIP_BLANKS;
6916 if ((RAW == 's') && (NXT(1) == 't') &&
6917 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6918 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6919 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6920 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6921 SKIP(10);
6922 SKIP_BLANKS;
6923 if (RAW != '=') {
6924 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6926 ctxt->sax->error(ctxt->userData,
6927 "XML standalone declaration : expected '='\n");
6928 ctxt->wellFormed = 0;
6929 ctxt->disableSAX = 1;
6930 return(standalone);
6931 }
6932 NEXT;
6933 SKIP_BLANKS;
6934 if (RAW == '\''){
6935 NEXT;
6936 if ((RAW == 'n') && (NXT(1) == 'o')) {
6937 standalone = 0;
6938 SKIP(2);
6939 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6940 (NXT(2) == 's')) {
6941 standalone = 1;
6942 SKIP(3);
6943 } else {
6944 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6946 ctxt->sax->error(ctxt->userData,
6947 "standalone accepts only 'yes' or 'no'\n");
6948 ctxt->wellFormed = 0;
6949 ctxt->disableSAX = 1;
6950 }
6951 if (RAW != '\'') {
6952 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6954 ctxt->sax->error(ctxt->userData, "String not closed\n");
6955 ctxt->wellFormed = 0;
6956 ctxt->disableSAX = 1;
6957 } else
6958 NEXT;
6959 } else if (RAW == '"'){
6960 NEXT;
6961 if ((RAW == 'n') && (NXT(1) == 'o')) {
6962 standalone = 0;
6963 SKIP(2);
6964 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6965 (NXT(2) == 's')) {
6966 standalone = 1;
6967 SKIP(3);
6968 } else {
6969 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6971 ctxt->sax->error(ctxt->userData,
6972 "standalone accepts only 'yes' or 'no'\n");
6973 ctxt->wellFormed = 0;
6974 ctxt->disableSAX = 1;
6975 }
6976 if (RAW != '"') {
6977 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6979 ctxt->sax->error(ctxt->userData, "String not closed\n");
6980 ctxt->wellFormed = 0;
6981 ctxt->disableSAX = 1;
6982 } else
6983 NEXT;
6984 } else {
6985 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6987 ctxt->sax->error(ctxt->userData,
6988 "Standalone value not found\n");
6989 ctxt->wellFormed = 0;
6990 ctxt->disableSAX = 1;
6991 }
6992 }
6993 return(standalone);
6994}
6995
6996/**
6997 * xmlParseXMLDecl:
6998 * @ctxt: an XML parser context
6999 *
7000 * parse an XML declaration header
7001 *
7002 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7003 */
7004
7005void
7006xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7007 xmlChar *version;
7008
7009 /*
7010 * We know that '<?xml' is here.
7011 */
7012 SKIP(5);
7013
7014 if (!IS_BLANK(RAW)) {
7015 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7017 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7018 ctxt->wellFormed = 0;
7019 ctxt->disableSAX = 1;
7020 }
7021 SKIP_BLANKS;
7022
7023 /*
7024 * We should have the VersionInfo here.
7025 */
7026 version = xmlParseVersionInfo(ctxt);
7027 if (version == NULL)
7028 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7029 ctxt->version = xmlStrdup(version);
7030 xmlFree(version);
7031
7032 /*
7033 * We may have the encoding declaration
7034 */
7035 if (!IS_BLANK(RAW)) {
7036 if ((RAW == '?') && (NXT(1) == '>')) {
7037 SKIP(2);
7038 return;
7039 }
7040 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7042 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7043 ctxt->wellFormed = 0;
7044 ctxt->disableSAX = 1;
7045 }
7046 xmlParseEncodingDecl(ctxt);
7047 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7048 /*
7049 * The XML REC instructs us to stop parsing right here
7050 */
7051 return;
7052 }
7053
7054 /*
7055 * We may have the standalone status.
7056 */
7057 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7058 if ((RAW == '?') && (NXT(1) == '>')) {
7059 SKIP(2);
7060 return;
7061 }
7062 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7063 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7064 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7065 ctxt->wellFormed = 0;
7066 ctxt->disableSAX = 1;
7067 }
7068 SKIP_BLANKS;
7069 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7070
7071 SKIP_BLANKS;
7072 if ((RAW == '?') && (NXT(1) == '>')) {
7073 SKIP(2);
7074 } else if (RAW == '>') {
7075 /* Deprecated old WD ... */
7076 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7078 ctxt->sax->error(ctxt->userData,
7079 "XML declaration must end-up with '?>'\n");
7080 ctxt->wellFormed = 0;
7081 ctxt->disableSAX = 1;
7082 NEXT;
7083 } else {
7084 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7086 ctxt->sax->error(ctxt->userData,
7087 "parsing XML declaration: '?>' expected\n");
7088 ctxt->wellFormed = 0;
7089 ctxt->disableSAX = 1;
7090 MOVETO_ENDTAG(CUR_PTR);
7091 NEXT;
7092 }
7093}
7094
7095/**
7096 * xmlParseMisc:
7097 * @ctxt: an XML parser context
7098 *
7099 * parse an XML Misc* optionnal field.
7100 *
7101 * [27] Misc ::= Comment | PI | S
7102 */
7103
7104void
7105xmlParseMisc(xmlParserCtxtPtr ctxt) {
7106 while (((RAW == '<') && (NXT(1) == '?')) ||
7107 ((RAW == '<') && (NXT(1) == '!') &&
7108 (NXT(2) == '-') && (NXT(3) == '-')) ||
7109 IS_BLANK(CUR)) {
7110 if ((RAW == '<') && (NXT(1) == '?')) {
7111 xmlParsePI(ctxt);
7112 } else if (IS_BLANK(CUR)) {
7113 NEXT;
7114 } else
7115 xmlParseComment(ctxt);
7116 }
7117}
7118
7119/**
7120 * xmlParseDocument:
7121 * @ctxt: an XML parser context
7122 *
7123 * parse an XML document (and build a tree if using the standard SAX
7124 * interface).
7125 *
7126 * [1] document ::= prolog element Misc*
7127 *
7128 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7129 *
7130 * Returns 0, -1 in case of error. the parser context is augmented
7131 * as a result of the parsing.
7132 */
7133
7134int
7135xmlParseDocument(xmlParserCtxtPtr ctxt) {
7136 xmlChar start[4];
7137 xmlCharEncoding enc;
7138
7139 xmlInitParser();
7140
7141 GROW;
7142
7143 /*
7144 * SAX: beginning of the document processing.
7145 */
7146 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7147 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7148
7149 /*
7150 * Get the 4 first bytes and decode the charset
7151 * if enc != XML_CHAR_ENCODING_NONE
7152 * plug some encoding conversion routines.
7153 */
7154 start[0] = RAW;
7155 start[1] = NXT(1);
7156 start[2] = NXT(2);
7157 start[3] = NXT(3);
7158 enc = xmlDetectCharEncoding(start, 4);
7159 if (enc != XML_CHAR_ENCODING_NONE) {
7160 xmlSwitchEncoding(ctxt, enc);
7161 }
7162
7163
7164 if (CUR == 0) {
7165 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7167 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7168 ctxt->wellFormed = 0;
7169 ctxt->disableSAX = 1;
7170 }
7171
7172 /*
7173 * Check for the XMLDecl in the Prolog.
7174 */
7175 GROW;
7176 if ((RAW == '<') && (NXT(1) == '?') &&
7177 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7178 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7179
7180 /*
7181 * Note that we will switch encoding on the fly.
7182 */
7183 xmlParseXMLDecl(ctxt);
7184 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7185 /*
7186 * The XML REC instructs us to stop parsing right here
7187 */
7188 return(-1);
7189 }
7190 ctxt->standalone = ctxt->input->standalone;
7191 SKIP_BLANKS;
7192 } else {
7193 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7194 }
7195 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7196 ctxt->sax->startDocument(ctxt->userData);
7197
7198 /*
7199 * The Misc part of the Prolog
7200 */
7201 GROW;
7202 xmlParseMisc(ctxt);
7203
7204 /*
7205 * Then possibly doc type declaration(s) and more Misc
7206 * (doctypedecl Misc*)?
7207 */
7208 GROW;
7209 if ((RAW == '<') && (NXT(1) == '!') &&
7210 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7211 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7212 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7213 (NXT(8) == 'E')) {
7214
7215 ctxt->inSubset = 1;
7216 xmlParseDocTypeDecl(ctxt);
7217 if (RAW == '[') {
7218 ctxt->instate = XML_PARSER_DTD;
7219 xmlParseInternalSubset(ctxt);
7220 }
7221
7222 /*
7223 * Create and update the external subset.
7224 */
7225 ctxt->inSubset = 2;
7226 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7227 (!ctxt->disableSAX))
7228 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7229 ctxt->extSubSystem, ctxt->extSubURI);
7230 ctxt->inSubset = 0;
7231
7232
7233 ctxt->instate = XML_PARSER_PROLOG;
7234 xmlParseMisc(ctxt);
7235 }
7236
7237 /*
7238 * Time to start parsing the tree itself
7239 */
7240 GROW;
7241 if (RAW != '<') {
7242 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7244 ctxt->sax->error(ctxt->userData,
7245 "Start tag expected, '<' not found\n");
7246 ctxt->wellFormed = 0;
7247 ctxt->disableSAX = 1;
7248 ctxt->instate = XML_PARSER_EOF;
7249 } else {
7250 ctxt->instate = XML_PARSER_CONTENT;
7251 xmlParseElement(ctxt);
7252 ctxt->instate = XML_PARSER_EPILOG;
7253
7254
7255 /*
7256 * The Misc part at the end
7257 */
7258 xmlParseMisc(ctxt);
7259
7260 if (RAW != 0) {
7261 ctxt->errNo = XML_ERR_DOCUMENT_END;
7262 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7263 ctxt->sax->error(ctxt->userData,
7264 "Extra content at the end of the document\n");
7265 ctxt->wellFormed = 0;
7266 ctxt->disableSAX = 1;
7267 }
7268 ctxt->instate = XML_PARSER_EOF;
7269 }
7270
7271 /*
7272 * SAX: end of the document processing.
7273 */
7274 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7275 (!ctxt->disableSAX))
7276 ctxt->sax->endDocument(ctxt->userData);
7277
7278 if (! ctxt->wellFormed) return(-1);
7279 return(0);
7280}
7281
7282/**
7283 * xmlParseExtParsedEnt:
7284 * @ctxt: an XML parser context
7285 *
7286 * parse a genreral parsed entity
7287 * An external general parsed entity is well-formed if it matches the
7288 * production labeled extParsedEnt.
7289 *
7290 * [78] extParsedEnt ::= TextDecl? content
7291 *
7292 * Returns 0, -1 in case of error. the parser context is augmented
7293 * as a result of the parsing.
7294 */
7295
7296int
7297xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7298 xmlChar start[4];
7299 xmlCharEncoding enc;
7300
7301 xmlDefaultSAXHandlerInit();
7302
7303 GROW;
7304
7305 /*
7306 * SAX: beginning of the document processing.
7307 */
7308 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7309 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7310
7311 /*
7312 * Get the 4 first bytes and decode the charset
7313 * if enc != XML_CHAR_ENCODING_NONE
7314 * plug some encoding conversion routines.
7315 */
7316 start[0] = RAW;
7317 start[1] = NXT(1);
7318 start[2] = NXT(2);
7319 start[3] = NXT(3);
7320 enc = xmlDetectCharEncoding(start, 4);
7321 if (enc != XML_CHAR_ENCODING_NONE) {
7322 xmlSwitchEncoding(ctxt, enc);
7323 }
7324
7325
7326 if (CUR == 0) {
7327 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7329 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7330 ctxt->wellFormed = 0;
7331 ctxt->disableSAX = 1;
7332 }
7333
7334 /*
7335 * Check for the XMLDecl in the Prolog.
7336 */
7337 GROW;
7338 if ((RAW == '<') && (NXT(1) == '?') &&
7339 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7340 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7341
7342 /*
7343 * Note that we will switch encoding on the fly.
7344 */
7345 xmlParseXMLDecl(ctxt);
7346 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7347 /*
7348 * The XML REC instructs us to stop parsing right here
7349 */
7350 return(-1);
7351 }
7352 SKIP_BLANKS;
7353 } else {
7354 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7355 }
7356 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7357 ctxt->sax->startDocument(ctxt->userData);
7358
7359 /*
7360 * Doing validity checking on chunk doesn't make sense
7361 */
7362 ctxt->instate = XML_PARSER_CONTENT;
7363 ctxt->validate = 0;
7364 ctxt->loadsubset = 0;
7365 ctxt->depth = 0;
7366
7367 xmlParseContent(ctxt);
7368
7369 if ((RAW == '<') && (NXT(1) == '/')) {
7370 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7372 ctxt->sax->error(ctxt->userData,
7373 "chunk is not well balanced\n");
7374 ctxt->wellFormed = 0;
7375 ctxt->disableSAX = 1;
7376 } else if (RAW != 0) {
7377 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7379 ctxt->sax->error(ctxt->userData,
7380 "extra content at the end of well balanced chunk\n");
7381 ctxt->wellFormed = 0;
7382 ctxt->disableSAX = 1;
7383 }
7384
7385 /*
7386 * SAX: end of the document processing.
7387 */
7388 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7389 (!ctxt->disableSAX))
7390 ctxt->sax->endDocument(ctxt->userData);
7391
7392 if (! ctxt->wellFormed) return(-1);
7393 return(0);
7394}
7395
7396/************************************************************************
7397 * *
7398 * Progressive parsing interfaces *
7399 * *
7400 ************************************************************************/
7401
7402/**
7403 * xmlParseLookupSequence:
7404 * @ctxt: an XML parser context
7405 * @first: the first char to lookup
7406 * @next: the next char to lookup or zero
7407 * @third: the next char to lookup or zero
7408 *
7409 * Try to find if a sequence (first, next, third) or just (first next) or
7410 * (first) is available in the input stream.
7411 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7412 * to avoid rescanning sequences of bytes, it DOES change the state of the
7413 * parser, do not use liberally.
7414 *
7415 * Returns the index to the current parsing point if the full sequence
7416 * is available, -1 otherwise.
7417 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007418static int
Owen Taylor3473f882001-02-23 17:55:21 +00007419xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7420 xmlChar next, xmlChar third) {
7421 int base, len;
7422 xmlParserInputPtr in;
7423 const xmlChar *buf;
7424
7425 in = ctxt->input;
7426 if (in == NULL) return(-1);
7427 base = in->cur - in->base;
7428 if (base < 0) return(-1);
7429 if (ctxt->checkIndex > base)
7430 base = ctxt->checkIndex;
7431 if (in->buf == NULL) {
7432 buf = in->base;
7433 len = in->length;
7434 } else {
7435 buf = in->buf->buffer->content;
7436 len = in->buf->buffer->use;
7437 }
7438 /* take into account the sequence length */
7439 if (third) len -= 2;
7440 else if (next) len --;
7441 for (;base < len;base++) {
7442 if (buf[base] == first) {
7443 if (third != 0) {
7444 if ((buf[base + 1] != next) ||
7445 (buf[base + 2] != third)) continue;
7446 } else if (next != 0) {
7447 if (buf[base + 1] != next) continue;
7448 }
7449 ctxt->checkIndex = 0;
7450#ifdef DEBUG_PUSH
7451 if (next == 0)
7452 xmlGenericError(xmlGenericErrorContext,
7453 "PP: lookup '%c' found at %d\n",
7454 first, base);
7455 else if (third == 0)
7456 xmlGenericError(xmlGenericErrorContext,
7457 "PP: lookup '%c%c' found at %d\n",
7458 first, next, base);
7459 else
7460 xmlGenericError(xmlGenericErrorContext,
7461 "PP: lookup '%c%c%c' found at %d\n",
7462 first, next, third, base);
7463#endif
7464 return(base - (in->cur - in->base));
7465 }
7466 }
7467 ctxt->checkIndex = base;
7468#ifdef DEBUG_PUSH
7469 if (next == 0)
7470 xmlGenericError(xmlGenericErrorContext,
7471 "PP: lookup '%c' failed\n", first);
7472 else if (third == 0)
7473 xmlGenericError(xmlGenericErrorContext,
7474 "PP: lookup '%c%c' failed\n", first, next);
7475 else
7476 xmlGenericError(xmlGenericErrorContext,
7477 "PP: lookup '%c%c%c' failed\n", first, next, third);
7478#endif
7479 return(-1);
7480}
7481
7482/**
7483 * xmlParseTryOrFinish:
7484 * @ctxt: an XML parser context
7485 * @terminate: last chunk indicator
7486 *
7487 * Try to progress on parsing
7488 *
7489 * Returns zero if no parsing was possible
7490 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007491static int
Owen Taylor3473f882001-02-23 17:55:21 +00007492xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7493 int ret = 0;
7494 int avail;
7495 xmlChar cur, next;
7496
7497#ifdef DEBUG_PUSH
7498 switch (ctxt->instate) {
7499 case XML_PARSER_EOF:
7500 xmlGenericError(xmlGenericErrorContext,
7501 "PP: try EOF\n"); break;
7502 case XML_PARSER_START:
7503 xmlGenericError(xmlGenericErrorContext,
7504 "PP: try START\n"); break;
7505 case XML_PARSER_MISC:
7506 xmlGenericError(xmlGenericErrorContext,
7507 "PP: try MISC\n");break;
7508 case XML_PARSER_COMMENT:
7509 xmlGenericError(xmlGenericErrorContext,
7510 "PP: try COMMENT\n");break;
7511 case XML_PARSER_PROLOG:
7512 xmlGenericError(xmlGenericErrorContext,
7513 "PP: try PROLOG\n");break;
7514 case XML_PARSER_START_TAG:
7515 xmlGenericError(xmlGenericErrorContext,
7516 "PP: try START_TAG\n");break;
7517 case XML_PARSER_CONTENT:
7518 xmlGenericError(xmlGenericErrorContext,
7519 "PP: try CONTENT\n");break;
7520 case XML_PARSER_CDATA_SECTION:
7521 xmlGenericError(xmlGenericErrorContext,
7522 "PP: try CDATA_SECTION\n");break;
7523 case XML_PARSER_END_TAG:
7524 xmlGenericError(xmlGenericErrorContext,
7525 "PP: try END_TAG\n");break;
7526 case XML_PARSER_ENTITY_DECL:
7527 xmlGenericError(xmlGenericErrorContext,
7528 "PP: try ENTITY_DECL\n");break;
7529 case XML_PARSER_ENTITY_VALUE:
7530 xmlGenericError(xmlGenericErrorContext,
7531 "PP: try ENTITY_VALUE\n");break;
7532 case XML_PARSER_ATTRIBUTE_VALUE:
7533 xmlGenericError(xmlGenericErrorContext,
7534 "PP: try ATTRIBUTE_VALUE\n");break;
7535 case XML_PARSER_DTD:
7536 xmlGenericError(xmlGenericErrorContext,
7537 "PP: try DTD\n");break;
7538 case XML_PARSER_EPILOG:
7539 xmlGenericError(xmlGenericErrorContext,
7540 "PP: try EPILOG\n");break;
7541 case XML_PARSER_PI:
7542 xmlGenericError(xmlGenericErrorContext,
7543 "PP: try PI\n");break;
7544 case XML_PARSER_IGNORE:
7545 xmlGenericError(xmlGenericErrorContext,
7546 "PP: try IGNORE\n");break;
7547 }
7548#endif
7549
7550 while (1) {
7551 /*
7552 * Pop-up of finished entities.
7553 */
7554 while ((RAW == 0) && (ctxt->inputNr > 1))
7555 xmlPopInput(ctxt);
7556
7557 if (ctxt->input ==NULL) break;
7558 if (ctxt->input->buf == NULL)
7559 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7560 else
7561 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7562 if (avail < 1)
7563 goto done;
7564 switch (ctxt->instate) {
7565 case XML_PARSER_EOF:
7566 /*
7567 * Document parsing is done !
7568 */
7569 goto done;
7570 case XML_PARSER_START:
7571 /*
7572 * Very first chars read from the document flow.
7573 */
Owen Taylor3473f882001-02-23 17:55:21 +00007574 if (avail < 2)
7575 goto done;
7576
7577 cur = ctxt->input->cur[0];
7578 next = ctxt->input->cur[1];
7579 if (cur == 0) {
7580 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7581 ctxt->sax->setDocumentLocator(ctxt->userData,
7582 &xmlDefaultSAXLocator);
7583 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7585 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7586 ctxt->wellFormed = 0;
7587 ctxt->disableSAX = 1;
7588 ctxt->instate = XML_PARSER_EOF;
7589#ifdef DEBUG_PUSH
7590 xmlGenericError(xmlGenericErrorContext,
7591 "PP: entering EOF\n");
7592#endif
7593 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7594 ctxt->sax->endDocument(ctxt->userData);
7595 goto done;
7596 }
7597 if ((cur == '<') && (next == '?')) {
7598 /* PI or XML decl */
7599 if (avail < 5) return(ret);
7600 if ((!terminate) &&
7601 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7602 return(ret);
7603 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7604 ctxt->sax->setDocumentLocator(ctxt->userData,
7605 &xmlDefaultSAXLocator);
7606 if ((ctxt->input->cur[2] == 'x') &&
7607 (ctxt->input->cur[3] == 'm') &&
7608 (ctxt->input->cur[4] == 'l') &&
7609 (IS_BLANK(ctxt->input->cur[5]))) {
7610 ret += 5;
7611#ifdef DEBUG_PUSH
7612 xmlGenericError(xmlGenericErrorContext,
7613 "PP: Parsing XML Decl\n");
7614#endif
7615 xmlParseXMLDecl(ctxt);
7616 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7617 /*
7618 * The XML REC instructs us to stop parsing right
7619 * here
7620 */
7621 ctxt->instate = XML_PARSER_EOF;
7622 return(0);
7623 }
7624 ctxt->standalone = ctxt->input->standalone;
7625 if ((ctxt->encoding == NULL) &&
7626 (ctxt->input->encoding != NULL))
7627 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7628 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7629 (!ctxt->disableSAX))
7630 ctxt->sax->startDocument(ctxt->userData);
7631 ctxt->instate = XML_PARSER_MISC;
7632#ifdef DEBUG_PUSH
7633 xmlGenericError(xmlGenericErrorContext,
7634 "PP: entering MISC\n");
7635#endif
7636 } else {
7637 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7638 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7639 (!ctxt->disableSAX))
7640 ctxt->sax->startDocument(ctxt->userData);
7641 ctxt->instate = XML_PARSER_MISC;
7642#ifdef DEBUG_PUSH
7643 xmlGenericError(xmlGenericErrorContext,
7644 "PP: entering MISC\n");
7645#endif
7646 }
7647 } else {
7648 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7649 ctxt->sax->setDocumentLocator(ctxt->userData,
7650 &xmlDefaultSAXLocator);
7651 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7652 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7653 (!ctxt->disableSAX))
7654 ctxt->sax->startDocument(ctxt->userData);
7655 ctxt->instate = XML_PARSER_MISC;
7656#ifdef DEBUG_PUSH
7657 xmlGenericError(xmlGenericErrorContext,
7658 "PP: entering MISC\n");
7659#endif
7660 }
7661 break;
7662 case XML_PARSER_MISC:
7663 SKIP_BLANKS;
7664 if (ctxt->input->buf == NULL)
7665 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7666 else
7667 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7668 if (avail < 2)
7669 goto done;
7670 cur = ctxt->input->cur[0];
7671 next = ctxt->input->cur[1];
7672 if ((cur == '<') && (next == '?')) {
7673 if ((!terminate) &&
7674 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7675 goto done;
7676#ifdef DEBUG_PUSH
7677 xmlGenericError(xmlGenericErrorContext,
7678 "PP: Parsing PI\n");
7679#endif
7680 xmlParsePI(ctxt);
7681 } else if ((cur == '<') && (next == '!') &&
7682 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7683 if ((!terminate) &&
7684 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7685 goto done;
7686#ifdef DEBUG_PUSH
7687 xmlGenericError(xmlGenericErrorContext,
7688 "PP: Parsing Comment\n");
7689#endif
7690 xmlParseComment(ctxt);
7691 ctxt->instate = XML_PARSER_MISC;
7692 } else if ((cur == '<') && (next == '!') &&
7693 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7694 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7695 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7696 (ctxt->input->cur[8] == 'E')) {
7697 if ((!terminate) &&
7698 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7699 goto done;
7700#ifdef DEBUG_PUSH
7701 xmlGenericError(xmlGenericErrorContext,
7702 "PP: Parsing internal subset\n");
7703#endif
7704 ctxt->inSubset = 1;
7705 xmlParseDocTypeDecl(ctxt);
7706 if (RAW == '[') {
7707 ctxt->instate = XML_PARSER_DTD;
7708#ifdef DEBUG_PUSH
7709 xmlGenericError(xmlGenericErrorContext,
7710 "PP: entering DTD\n");
7711#endif
7712 } else {
7713 /*
7714 * Create and update the external subset.
7715 */
7716 ctxt->inSubset = 2;
7717 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7718 (ctxt->sax->externalSubset != NULL))
7719 ctxt->sax->externalSubset(ctxt->userData,
7720 ctxt->intSubName, ctxt->extSubSystem,
7721 ctxt->extSubURI);
7722 ctxt->inSubset = 0;
7723 ctxt->instate = XML_PARSER_PROLOG;
7724#ifdef DEBUG_PUSH
7725 xmlGenericError(xmlGenericErrorContext,
7726 "PP: entering PROLOG\n");
7727#endif
7728 }
7729 } else if ((cur == '<') && (next == '!') &&
7730 (avail < 9)) {
7731 goto done;
7732 } else {
7733 ctxt->instate = XML_PARSER_START_TAG;
7734#ifdef DEBUG_PUSH
7735 xmlGenericError(xmlGenericErrorContext,
7736 "PP: entering START_TAG\n");
7737#endif
7738 }
7739 break;
7740 case XML_PARSER_IGNORE:
7741 xmlGenericError(xmlGenericErrorContext,
7742 "PP: internal error, state == IGNORE");
7743 ctxt->instate = XML_PARSER_DTD;
7744#ifdef DEBUG_PUSH
7745 xmlGenericError(xmlGenericErrorContext,
7746 "PP: entering DTD\n");
7747#endif
7748 break;
7749 case XML_PARSER_PROLOG:
7750 SKIP_BLANKS;
7751 if (ctxt->input->buf == NULL)
7752 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7753 else
7754 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7755 if (avail < 2)
7756 goto done;
7757 cur = ctxt->input->cur[0];
7758 next = ctxt->input->cur[1];
7759 if ((cur == '<') && (next == '?')) {
7760 if ((!terminate) &&
7761 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7762 goto done;
7763#ifdef DEBUG_PUSH
7764 xmlGenericError(xmlGenericErrorContext,
7765 "PP: Parsing PI\n");
7766#endif
7767 xmlParsePI(ctxt);
7768 } else if ((cur == '<') && (next == '!') &&
7769 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7770 if ((!terminate) &&
7771 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7772 goto done;
7773#ifdef DEBUG_PUSH
7774 xmlGenericError(xmlGenericErrorContext,
7775 "PP: Parsing Comment\n");
7776#endif
7777 xmlParseComment(ctxt);
7778 ctxt->instate = XML_PARSER_PROLOG;
7779 } else if ((cur == '<') && (next == '!') &&
7780 (avail < 4)) {
7781 goto done;
7782 } else {
7783 ctxt->instate = XML_PARSER_START_TAG;
7784#ifdef DEBUG_PUSH
7785 xmlGenericError(xmlGenericErrorContext,
7786 "PP: entering START_TAG\n");
7787#endif
7788 }
7789 break;
7790 case XML_PARSER_EPILOG:
7791 SKIP_BLANKS;
7792 if (ctxt->input->buf == NULL)
7793 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7794 else
7795 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7796 if (avail < 2)
7797 goto done;
7798 cur = ctxt->input->cur[0];
7799 next = ctxt->input->cur[1];
7800 if ((cur == '<') && (next == '?')) {
7801 if ((!terminate) &&
7802 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7803 goto done;
7804#ifdef DEBUG_PUSH
7805 xmlGenericError(xmlGenericErrorContext,
7806 "PP: Parsing PI\n");
7807#endif
7808 xmlParsePI(ctxt);
7809 ctxt->instate = XML_PARSER_EPILOG;
7810 } else if ((cur == '<') && (next == '!') &&
7811 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7812 if ((!terminate) &&
7813 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7814 goto done;
7815#ifdef DEBUG_PUSH
7816 xmlGenericError(xmlGenericErrorContext,
7817 "PP: Parsing Comment\n");
7818#endif
7819 xmlParseComment(ctxt);
7820 ctxt->instate = XML_PARSER_EPILOG;
7821 } else if ((cur == '<') && (next == '!') &&
7822 (avail < 4)) {
7823 goto done;
7824 } else {
7825 ctxt->errNo = XML_ERR_DOCUMENT_END;
7826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7827 ctxt->sax->error(ctxt->userData,
7828 "Extra content at the end of the document\n");
7829 ctxt->wellFormed = 0;
7830 ctxt->disableSAX = 1;
7831 ctxt->instate = XML_PARSER_EOF;
7832#ifdef DEBUG_PUSH
7833 xmlGenericError(xmlGenericErrorContext,
7834 "PP: entering EOF\n");
7835#endif
7836 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7837 (!ctxt->disableSAX))
7838 ctxt->sax->endDocument(ctxt->userData);
7839 goto done;
7840 }
7841 break;
7842 case XML_PARSER_START_TAG: {
7843 xmlChar *name, *oldname;
7844
7845 if ((avail < 2) && (ctxt->inputNr == 1))
7846 goto done;
7847 cur = ctxt->input->cur[0];
7848 if (cur != '<') {
7849 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7851 ctxt->sax->error(ctxt->userData,
7852 "Start tag expect, '<' not found\n");
7853 ctxt->wellFormed = 0;
7854 ctxt->disableSAX = 1;
7855 ctxt->instate = XML_PARSER_EOF;
7856#ifdef DEBUG_PUSH
7857 xmlGenericError(xmlGenericErrorContext,
7858 "PP: entering EOF\n");
7859#endif
7860 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7861 (!ctxt->disableSAX))
7862 ctxt->sax->endDocument(ctxt->userData);
7863 goto done;
7864 }
7865 if ((!terminate) &&
7866 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7867 goto done;
7868 if (ctxt->spaceNr == 0)
7869 spacePush(ctxt, -1);
7870 else
7871 spacePush(ctxt, *ctxt->space);
7872 name = xmlParseStartTag(ctxt);
7873 if (name == NULL) {
7874 spacePop(ctxt);
7875 ctxt->instate = XML_PARSER_EOF;
7876#ifdef DEBUG_PUSH
7877 xmlGenericError(xmlGenericErrorContext,
7878 "PP: entering EOF\n");
7879#endif
7880 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7881 (!ctxt->disableSAX))
7882 ctxt->sax->endDocument(ctxt->userData);
7883 goto done;
7884 }
7885 namePush(ctxt, xmlStrdup(name));
7886
7887 /*
7888 * [ VC: Root Element Type ]
7889 * The Name in the document type declaration must match
7890 * the element type of the root element.
7891 */
7892 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7893 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7894 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7895
7896 /*
7897 * Check for an Empty Element.
7898 */
7899 if ((RAW == '/') && (NXT(1) == '>')) {
7900 SKIP(2);
7901 if ((ctxt->sax != NULL) &&
7902 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7903 ctxt->sax->endElement(ctxt->userData, name);
7904 xmlFree(name);
7905 oldname = namePop(ctxt);
7906 spacePop(ctxt);
7907 if (oldname != NULL) {
7908#ifdef DEBUG_STACK
7909 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7910#endif
7911 xmlFree(oldname);
7912 }
7913 if (ctxt->name == NULL) {
7914 ctxt->instate = XML_PARSER_EPILOG;
7915#ifdef DEBUG_PUSH
7916 xmlGenericError(xmlGenericErrorContext,
7917 "PP: entering EPILOG\n");
7918#endif
7919 } else {
7920 ctxt->instate = XML_PARSER_CONTENT;
7921#ifdef DEBUG_PUSH
7922 xmlGenericError(xmlGenericErrorContext,
7923 "PP: entering CONTENT\n");
7924#endif
7925 }
7926 break;
7927 }
7928 if (RAW == '>') {
7929 NEXT;
7930 } else {
7931 ctxt->errNo = XML_ERR_GT_REQUIRED;
7932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7933 ctxt->sax->error(ctxt->userData,
7934 "Couldn't find end of Start Tag %s\n",
7935 name);
7936 ctxt->wellFormed = 0;
7937 ctxt->disableSAX = 1;
7938
7939 /*
7940 * end of parsing of this node.
7941 */
7942 nodePop(ctxt);
7943 oldname = namePop(ctxt);
7944 spacePop(ctxt);
7945 if (oldname != NULL) {
7946#ifdef DEBUG_STACK
7947 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7948#endif
7949 xmlFree(oldname);
7950 }
7951 }
7952 xmlFree(name);
7953 ctxt->instate = XML_PARSER_CONTENT;
7954#ifdef DEBUG_PUSH
7955 xmlGenericError(xmlGenericErrorContext,
7956 "PP: entering CONTENT\n");
7957#endif
7958 break;
7959 }
7960 case XML_PARSER_CONTENT: {
7961 const xmlChar *test;
7962 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00007963 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00007964
7965 /*
7966 * Handle preparsed entities and charRef
7967 */
7968 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007969 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00007970
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007971 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00007972 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7973 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007974 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00007975 ctxt->token = 0;
7976 }
7977 if ((avail < 2) && (ctxt->inputNr == 1))
7978 goto done;
7979 cur = ctxt->input->cur[0];
7980 next = ctxt->input->cur[1];
7981
7982 test = CUR_PTR;
7983 cons = ctxt->input->consumed;
7984 tok = ctxt->token;
7985 if ((cur == '<') && (next == '?')) {
7986 if ((!terminate) &&
7987 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7988 goto done;
7989#ifdef DEBUG_PUSH
7990 xmlGenericError(xmlGenericErrorContext,
7991 "PP: Parsing PI\n");
7992#endif
7993 xmlParsePI(ctxt);
7994 } else if ((cur == '<') && (next == '!') &&
7995 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7996 if ((!terminate) &&
7997 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7998 goto done;
7999#ifdef DEBUG_PUSH
8000 xmlGenericError(xmlGenericErrorContext,
8001 "PP: Parsing Comment\n");
8002#endif
8003 xmlParseComment(ctxt);
8004 ctxt->instate = XML_PARSER_CONTENT;
8005 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8006 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8007 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8008 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8009 (ctxt->input->cur[8] == '[')) {
8010 SKIP(9);
8011 ctxt->instate = XML_PARSER_CDATA_SECTION;
8012#ifdef DEBUG_PUSH
8013 xmlGenericError(xmlGenericErrorContext,
8014 "PP: entering CDATA_SECTION\n");
8015#endif
8016 break;
8017 } else if ((cur == '<') && (next == '!') &&
8018 (avail < 9)) {
8019 goto done;
8020 } else if ((cur == '<') && (next == '/')) {
8021 ctxt->instate = XML_PARSER_END_TAG;
8022#ifdef DEBUG_PUSH
8023 xmlGenericError(xmlGenericErrorContext,
8024 "PP: entering END_TAG\n");
8025#endif
8026 break;
8027 } else if (cur == '<') {
8028 ctxt->instate = XML_PARSER_START_TAG;
8029#ifdef DEBUG_PUSH
8030 xmlGenericError(xmlGenericErrorContext,
8031 "PP: entering START_TAG\n");
8032#endif
8033 break;
8034 } else if (cur == '&') {
8035 if ((!terminate) &&
8036 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8037 goto done;
8038#ifdef DEBUG_PUSH
8039 xmlGenericError(xmlGenericErrorContext,
8040 "PP: Parsing Reference\n");
8041#endif
8042 xmlParseReference(ctxt);
8043 } else {
8044 /* TODO Avoid the extra copy, handle directly !!! */
8045 /*
8046 * Goal of the following test is:
8047 * - minimize calls to the SAX 'character' callback
8048 * when they are mergeable
8049 * - handle an problem for isBlank when we only parse
8050 * a sequence of blank chars and the next one is
8051 * not available to check against '<' presence.
8052 * - tries to homogenize the differences in SAX
8053 * callbacks beween the push and pull versions
8054 * of the parser.
8055 */
8056 if ((ctxt->inputNr == 1) &&
8057 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8058 if ((!terminate) &&
8059 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8060 goto done;
8061 }
8062 ctxt->checkIndex = 0;
8063#ifdef DEBUG_PUSH
8064 xmlGenericError(xmlGenericErrorContext,
8065 "PP: Parsing char data\n");
8066#endif
8067 xmlParseCharData(ctxt, 0);
8068 }
8069 /*
8070 * Pop-up of finished entities.
8071 */
8072 while ((RAW == 0) && (ctxt->inputNr > 1))
8073 xmlPopInput(ctxt);
8074 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8075 (tok == ctxt->token)) {
8076 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8078 ctxt->sax->error(ctxt->userData,
8079 "detected an error in element content\n");
8080 ctxt->wellFormed = 0;
8081 ctxt->disableSAX = 1;
8082 ctxt->instate = XML_PARSER_EOF;
8083 break;
8084 }
8085 break;
8086 }
8087 case XML_PARSER_CDATA_SECTION: {
8088 /*
8089 * The Push mode need to have the SAX callback for
8090 * cdataBlock merge back contiguous callbacks.
8091 */
8092 int base;
8093
8094 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8095 if (base < 0) {
8096 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8097 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8098 if (ctxt->sax->cdataBlock != NULL)
8099 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8100 XML_PARSER_BIG_BUFFER_SIZE);
8101 }
8102 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8103 ctxt->checkIndex = 0;
8104 }
8105 goto done;
8106 } else {
8107 if ((ctxt->sax != NULL) && (base > 0) &&
8108 (!ctxt->disableSAX)) {
8109 if (ctxt->sax->cdataBlock != NULL)
8110 ctxt->sax->cdataBlock(ctxt->userData,
8111 ctxt->input->cur, base);
8112 }
8113 SKIP(base + 3);
8114 ctxt->checkIndex = 0;
8115 ctxt->instate = XML_PARSER_CONTENT;
8116#ifdef DEBUG_PUSH
8117 xmlGenericError(xmlGenericErrorContext,
8118 "PP: entering CONTENT\n");
8119#endif
8120 }
8121 break;
8122 }
8123 case XML_PARSER_END_TAG:
8124 if (avail < 2)
8125 goto done;
8126 if ((!terminate) &&
8127 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8128 goto done;
8129 xmlParseEndTag(ctxt);
8130 if (ctxt->name == NULL) {
8131 ctxt->instate = XML_PARSER_EPILOG;
8132#ifdef DEBUG_PUSH
8133 xmlGenericError(xmlGenericErrorContext,
8134 "PP: entering EPILOG\n");
8135#endif
8136 } else {
8137 ctxt->instate = XML_PARSER_CONTENT;
8138#ifdef DEBUG_PUSH
8139 xmlGenericError(xmlGenericErrorContext,
8140 "PP: entering CONTENT\n");
8141#endif
8142 }
8143 break;
8144 case XML_PARSER_DTD: {
8145 /*
8146 * Sorry but progressive parsing of the internal subset
8147 * is not expected to be supported. We first check that
8148 * the full content of the internal subset is available and
8149 * the parsing is launched only at that point.
8150 * Internal subset ends up with "']' S? '>'" in an unescaped
8151 * section and not in a ']]>' sequence which are conditional
8152 * sections (whoever argued to keep that crap in XML deserve
8153 * a place in hell !).
8154 */
8155 int base, i;
8156 xmlChar *buf;
8157 xmlChar quote = 0;
8158
8159 base = ctxt->input->cur - ctxt->input->base;
8160 if (base < 0) return(0);
8161 if (ctxt->checkIndex > base)
8162 base = ctxt->checkIndex;
8163 buf = ctxt->input->buf->buffer->content;
8164 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8165 base++) {
8166 if (quote != 0) {
8167 if (buf[base] == quote)
8168 quote = 0;
8169 continue;
8170 }
8171 if (buf[base] == '"') {
8172 quote = '"';
8173 continue;
8174 }
8175 if (buf[base] == '\'') {
8176 quote = '\'';
8177 continue;
8178 }
8179 if (buf[base] == ']') {
8180 if ((unsigned int) base +1 >=
8181 ctxt->input->buf->buffer->use)
8182 break;
8183 if (buf[base + 1] == ']') {
8184 /* conditional crap, skip both ']' ! */
8185 base++;
8186 continue;
8187 }
8188 for (i = 0;
8189 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8190 i++) {
8191 if (buf[base + i] == '>')
8192 goto found_end_int_subset;
8193 }
8194 break;
8195 }
8196 }
8197 /*
8198 * We didn't found the end of the Internal subset
8199 */
8200 if (quote == 0)
8201 ctxt->checkIndex = base;
8202#ifdef DEBUG_PUSH
8203 if (next == 0)
8204 xmlGenericError(xmlGenericErrorContext,
8205 "PP: lookup of int subset end filed\n");
8206#endif
8207 goto done;
8208
8209found_end_int_subset:
8210 xmlParseInternalSubset(ctxt);
8211 ctxt->inSubset = 2;
8212 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8213 (ctxt->sax->externalSubset != NULL))
8214 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8215 ctxt->extSubSystem, ctxt->extSubURI);
8216 ctxt->inSubset = 0;
8217 ctxt->instate = XML_PARSER_PROLOG;
8218 ctxt->checkIndex = 0;
8219#ifdef DEBUG_PUSH
8220 xmlGenericError(xmlGenericErrorContext,
8221 "PP: entering PROLOG\n");
8222#endif
8223 break;
8224 }
8225 case XML_PARSER_COMMENT:
8226 xmlGenericError(xmlGenericErrorContext,
8227 "PP: internal error, state == COMMENT\n");
8228 ctxt->instate = XML_PARSER_CONTENT;
8229#ifdef DEBUG_PUSH
8230 xmlGenericError(xmlGenericErrorContext,
8231 "PP: entering CONTENT\n");
8232#endif
8233 break;
8234 case XML_PARSER_PI:
8235 xmlGenericError(xmlGenericErrorContext,
8236 "PP: internal error, state == PI\n");
8237 ctxt->instate = XML_PARSER_CONTENT;
8238#ifdef DEBUG_PUSH
8239 xmlGenericError(xmlGenericErrorContext,
8240 "PP: entering CONTENT\n");
8241#endif
8242 break;
8243 case XML_PARSER_ENTITY_DECL:
8244 xmlGenericError(xmlGenericErrorContext,
8245 "PP: internal error, state == ENTITY_DECL\n");
8246 ctxt->instate = XML_PARSER_DTD;
8247#ifdef DEBUG_PUSH
8248 xmlGenericError(xmlGenericErrorContext,
8249 "PP: entering DTD\n");
8250#endif
8251 break;
8252 case XML_PARSER_ENTITY_VALUE:
8253 xmlGenericError(xmlGenericErrorContext,
8254 "PP: internal error, state == ENTITY_VALUE\n");
8255 ctxt->instate = XML_PARSER_CONTENT;
8256#ifdef DEBUG_PUSH
8257 xmlGenericError(xmlGenericErrorContext,
8258 "PP: entering DTD\n");
8259#endif
8260 break;
8261 case XML_PARSER_ATTRIBUTE_VALUE:
8262 xmlGenericError(xmlGenericErrorContext,
8263 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8264 ctxt->instate = XML_PARSER_START_TAG;
8265#ifdef DEBUG_PUSH
8266 xmlGenericError(xmlGenericErrorContext,
8267 "PP: entering START_TAG\n");
8268#endif
8269 break;
8270 case XML_PARSER_SYSTEM_LITERAL:
8271 xmlGenericError(xmlGenericErrorContext,
8272 "PP: internal error, state == SYSTEM_LITERAL\n");
8273 ctxt->instate = XML_PARSER_START_TAG;
8274#ifdef DEBUG_PUSH
8275 xmlGenericError(xmlGenericErrorContext,
8276 "PP: entering START_TAG\n");
8277#endif
8278 break;
8279 }
8280 }
8281done:
8282#ifdef DEBUG_PUSH
8283 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8284#endif
8285 return(ret);
8286}
8287
8288/**
Owen Taylor3473f882001-02-23 17:55:21 +00008289 * xmlParseChunk:
8290 * @ctxt: an XML parser context
8291 * @chunk: an char array
8292 * @size: the size in byte of the chunk
8293 * @terminate: last chunk indicator
8294 *
8295 * Parse a Chunk of memory
8296 *
8297 * Returns zero if no error, the xmlParserErrors otherwise.
8298 */
8299int
8300xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8301 int terminate) {
8302 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8303 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8304 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8305 int cur = ctxt->input->cur - ctxt->input->base;
8306
8307 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8308 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8309 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008310 ctxt->input->end =
8311 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008312#ifdef DEBUG_PUSH
8313 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8314#endif
8315
8316 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8317 xmlParseTryOrFinish(ctxt, terminate);
8318 } else if (ctxt->instate != XML_PARSER_EOF) {
8319 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8320 xmlParserInputBufferPtr in = ctxt->input->buf;
8321 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8322 (in->raw != NULL)) {
8323 int nbchars;
8324
8325 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8326 if (nbchars < 0) {
8327 xmlGenericError(xmlGenericErrorContext,
8328 "xmlParseChunk: encoder error\n");
8329 return(XML_ERR_INVALID_ENCODING);
8330 }
8331 }
8332 }
8333 }
8334 xmlParseTryOrFinish(ctxt, terminate);
8335 if (terminate) {
8336 /*
8337 * Check for termination
8338 */
8339 if ((ctxt->instate != XML_PARSER_EOF) &&
8340 (ctxt->instate != XML_PARSER_EPILOG)) {
8341 ctxt->errNo = XML_ERR_DOCUMENT_END;
8342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8343 ctxt->sax->error(ctxt->userData,
8344 "Extra content at the end of the document\n");
8345 ctxt->wellFormed = 0;
8346 ctxt->disableSAX = 1;
8347 }
8348 if (ctxt->instate != XML_PARSER_EOF) {
8349 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8350 (!ctxt->disableSAX))
8351 ctxt->sax->endDocument(ctxt->userData);
8352 }
8353 ctxt->instate = XML_PARSER_EOF;
8354 }
8355 return((xmlParserErrors) ctxt->errNo);
8356}
8357
8358/************************************************************************
8359 * *
8360 * I/O front end functions to the parser *
8361 * *
8362 ************************************************************************/
8363
8364/**
8365 * xmlStopParser:
8366 * @ctxt: an XML parser context
8367 *
8368 * Blocks further parser processing
8369 */
8370void
8371xmlStopParser(xmlParserCtxtPtr ctxt) {
8372 ctxt->instate = XML_PARSER_EOF;
8373 if (ctxt->input != NULL)
8374 ctxt->input->cur = BAD_CAST"";
8375}
8376
8377/**
8378 * xmlCreatePushParserCtxt:
8379 * @sax: a SAX handler
8380 * @user_data: The user data returned on SAX callbacks
8381 * @chunk: a pointer to an array of chars
8382 * @size: number of chars in the array
8383 * @filename: an optional file name or URI
8384 *
8385 * Create a parser context for using the XML parser in push mode
8386 * To allow content encoding detection, @size should be >= 4
8387 * The value of @filename is used for fetching external entities
8388 * and error/warning reports.
8389 *
8390 * Returns the new parser context or NULL
8391 */
8392xmlParserCtxtPtr
8393xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8394 const char *chunk, int size, const char *filename) {
8395 xmlParserCtxtPtr ctxt;
8396 xmlParserInputPtr inputStream;
8397 xmlParserInputBufferPtr buf;
8398 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8399
8400 /*
8401 * plug some encoding conversion routines
8402 */
8403 if ((chunk != NULL) && (size >= 4))
8404 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8405
8406 buf = xmlAllocParserInputBuffer(enc);
8407 if (buf == NULL) return(NULL);
8408
8409 ctxt = xmlNewParserCtxt();
8410 if (ctxt == NULL) {
8411 xmlFree(buf);
8412 return(NULL);
8413 }
8414 if (sax != NULL) {
8415 if (ctxt->sax != &xmlDefaultSAXHandler)
8416 xmlFree(ctxt->sax);
8417 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8418 if (ctxt->sax == NULL) {
8419 xmlFree(buf);
8420 xmlFree(ctxt);
8421 return(NULL);
8422 }
8423 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8424 if (user_data != NULL)
8425 ctxt->userData = user_data;
8426 }
8427 if (filename == NULL) {
8428 ctxt->directory = NULL;
8429 } else {
8430 ctxt->directory = xmlParserGetDirectory(filename);
8431 }
8432
8433 inputStream = xmlNewInputStream(ctxt);
8434 if (inputStream == NULL) {
8435 xmlFreeParserCtxt(ctxt);
8436 return(NULL);
8437 }
8438
8439 if (filename == NULL)
8440 inputStream->filename = NULL;
8441 else
8442 inputStream->filename = xmlMemStrdup(filename);
8443 inputStream->buf = buf;
8444 inputStream->base = inputStream->buf->buffer->content;
8445 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008446 inputStream->end =
8447 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008448 if (enc != XML_CHAR_ENCODING_NONE) {
8449 xmlSwitchEncoding(ctxt, enc);
8450 }
8451
8452 inputPush(ctxt, inputStream);
8453
8454 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8455 (ctxt->input->buf != NULL)) {
8456 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8457#ifdef DEBUG_PUSH
8458 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8459#endif
8460 }
8461
8462 return(ctxt);
8463}
8464
8465/**
8466 * xmlCreateIOParserCtxt:
8467 * @sax: a SAX handler
8468 * @user_data: The user data returned on SAX callbacks
8469 * @ioread: an I/O read function
8470 * @ioclose: an I/O close function
8471 * @ioctx: an I/O handler
8472 * @enc: the charset encoding if known
8473 *
8474 * Create a parser context for using the XML parser with an existing
8475 * I/O stream
8476 *
8477 * Returns the new parser context or NULL
8478 */
8479xmlParserCtxtPtr
8480xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8481 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8482 void *ioctx, xmlCharEncoding enc) {
8483 xmlParserCtxtPtr ctxt;
8484 xmlParserInputPtr inputStream;
8485 xmlParserInputBufferPtr buf;
8486
8487 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8488 if (buf == NULL) return(NULL);
8489
8490 ctxt = xmlNewParserCtxt();
8491 if (ctxt == NULL) {
8492 xmlFree(buf);
8493 return(NULL);
8494 }
8495 if (sax != NULL) {
8496 if (ctxt->sax != &xmlDefaultSAXHandler)
8497 xmlFree(ctxt->sax);
8498 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8499 if (ctxt->sax == NULL) {
8500 xmlFree(buf);
8501 xmlFree(ctxt);
8502 return(NULL);
8503 }
8504 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8505 if (user_data != NULL)
8506 ctxt->userData = user_data;
8507 }
8508
8509 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8510 if (inputStream == NULL) {
8511 xmlFreeParserCtxt(ctxt);
8512 return(NULL);
8513 }
8514 inputPush(ctxt, inputStream);
8515
8516 return(ctxt);
8517}
8518
8519/************************************************************************
8520 * *
8521 * Front ends when parsing a Dtd *
8522 * *
8523 ************************************************************************/
8524
8525/**
8526 * xmlIOParseDTD:
8527 * @sax: the SAX handler block or NULL
8528 * @input: an Input Buffer
8529 * @enc: the charset encoding if known
8530 *
8531 * Load and parse a DTD
8532 *
8533 * Returns the resulting xmlDtdPtr or NULL in case of error.
8534 * @input will be freed at parsing end.
8535 */
8536
8537xmlDtdPtr
8538xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8539 xmlCharEncoding enc) {
8540 xmlDtdPtr ret = NULL;
8541 xmlParserCtxtPtr ctxt;
8542 xmlParserInputPtr pinput = NULL;
8543
8544 if (input == NULL)
8545 return(NULL);
8546
8547 ctxt = xmlNewParserCtxt();
8548 if (ctxt == NULL) {
8549 return(NULL);
8550 }
8551
8552 /*
8553 * Set-up the SAX context
8554 */
8555 if (sax != NULL) {
8556 if (ctxt->sax != NULL)
8557 xmlFree(ctxt->sax);
8558 ctxt->sax = sax;
8559 ctxt->userData = NULL;
8560 }
8561
8562 /*
8563 * generate a parser input from the I/O handler
8564 */
8565
8566 pinput = xmlNewIOInputStream(ctxt, input, enc);
8567 if (pinput == NULL) {
8568 if (sax != NULL) ctxt->sax = NULL;
8569 xmlFreeParserCtxt(ctxt);
8570 return(NULL);
8571 }
8572
8573 /*
8574 * plug some encoding conversion routines here.
8575 */
8576 xmlPushInput(ctxt, pinput);
8577
8578 pinput->filename = NULL;
8579 pinput->line = 1;
8580 pinput->col = 1;
8581 pinput->base = ctxt->input->cur;
8582 pinput->cur = ctxt->input->cur;
8583 pinput->free = NULL;
8584
8585 /*
8586 * let's parse that entity knowing it's an external subset.
8587 */
8588 ctxt->inSubset = 2;
8589 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8590 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8591 BAD_CAST "none", BAD_CAST "none");
8592 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8593
8594 if (ctxt->myDoc != NULL) {
8595 if (ctxt->wellFormed) {
8596 ret = ctxt->myDoc->extSubset;
8597 ctxt->myDoc->extSubset = NULL;
8598 } else {
8599 ret = NULL;
8600 }
8601 xmlFreeDoc(ctxt->myDoc);
8602 ctxt->myDoc = NULL;
8603 }
8604 if (sax != NULL) ctxt->sax = NULL;
8605 xmlFreeParserCtxt(ctxt);
8606
8607 return(ret);
8608}
8609
8610/**
8611 * xmlSAXParseDTD:
8612 * @sax: the SAX handler block
8613 * @ExternalID: a NAME* containing the External ID of the DTD
8614 * @SystemID: a NAME* containing the URL to the DTD
8615 *
8616 * Load and parse an external subset.
8617 *
8618 * Returns the resulting xmlDtdPtr or NULL in case of error.
8619 */
8620
8621xmlDtdPtr
8622xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8623 const xmlChar *SystemID) {
8624 xmlDtdPtr ret = NULL;
8625 xmlParserCtxtPtr ctxt;
8626 xmlParserInputPtr input = NULL;
8627 xmlCharEncoding enc;
8628
8629 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8630
8631 ctxt = xmlNewParserCtxt();
8632 if (ctxt == NULL) {
8633 return(NULL);
8634 }
8635
8636 /*
8637 * Set-up the SAX context
8638 */
8639 if (sax != NULL) {
8640 if (ctxt->sax != NULL)
8641 xmlFree(ctxt->sax);
8642 ctxt->sax = sax;
8643 ctxt->userData = NULL;
8644 }
8645
8646 /*
8647 * Ask the Entity resolver to load the damn thing
8648 */
8649
8650 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8651 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8652 if (input == NULL) {
8653 if (sax != NULL) ctxt->sax = NULL;
8654 xmlFreeParserCtxt(ctxt);
8655 return(NULL);
8656 }
8657
8658 /*
8659 * plug some encoding conversion routines here.
8660 */
8661 xmlPushInput(ctxt, input);
8662 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8663 xmlSwitchEncoding(ctxt, enc);
8664
8665 if (input->filename == NULL)
8666 input->filename = (char *) xmlStrdup(SystemID);
8667 input->line = 1;
8668 input->col = 1;
8669 input->base = ctxt->input->cur;
8670 input->cur = ctxt->input->cur;
8671 input->free = NULL;
8672
8673 /*
8674 * let's parse that entity knowing it's an external subset.
8675 */
8676 ctxt->inSubset = 2;
8677 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8678 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8679 ExternalID, SystemID);
8680 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8681
8682 if (ctxt->myDoc != NULL) {
8683 if (ctxt->wellFormed) {
8684 ret = ctxt->myDoc->extSubset;
8685 ctxt->myDoc->extSubset = NULL;
8686 } else {
8687 ret = NULL;
8688 }
8689 xmlFreeDoc(ctxt->myDoc);
8690 ctxt->myDoc = NULL;
8691 }
8692 if (sax != NULL) ctxt->sax = NULL;
8693 xmlFreeParserCtxt(ctxt);
8694
8695 return(ret);
8696}
8697
8698/**
8699 * xmlParseDTD:
8700 * @ExternalID: a NAME* containing the External ID of the DTD
8701 * @SystemID: a NAME* containing the URL to the DTD
8702 *
8703 * Load and parse an external subset.
8704 *
8705 * Returns the resulting xmlDtdPtr or NULL in case of error.
8706 */
8707
8708xmlDtdPtr
8709xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8710 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8711}
8712
8713/************************************************************************
8714 * *
8715 * Front ends when parsing an Entity *
8716 * *
8717 ************************************************************************/
8718
8719/**
Owen Taylor3473f882001-02-23 17:55:21 +00008720 * xmlParseCtxtExternalEntity:
8721 * @ctx: the existing parsing context
8722 * @URL: the URL for the entity to load
8723 * @ID: the System ID for the entity to load
8724 * @list: the return value for the set of parsed nodes
8725 *
8726 * Parse an external general entity within an existing parsing context
8727 * An external general parsed entity is well-formed if it matches the
8728 * production labeled extParsedEnt.
8729 *
8730 * [78] extParsedEnt ::= TextDecl? content
8731 *
8732 * Returns 0 if the entity is well formed, -1 in case of args problem and
8733 * the parser error code otherwise
8734 */
8735
8736int
8737xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8738 const xmlChar *ID, xmlNodePtr *list) {
8739 xmlParserCtxtPtr ctxt;
8740 xmlDocPtr newDoc;
8741 xmlSAXHandlerPtr oldsax = NULL;
8742 int ret = 0;
8743
8744 if (ctx->depth > 40) {
8745 return(XML_ERR_ENTITY_LOOP);
8746 }
8747
8748 if (list != NULL)
8749 *list = NULL;
8750 if ((URL == NULL) && (ID == NULL))
8751 return(-1);
8752 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8753 return(-1);
8754
8755
8756 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8757 if (ctxt == NULL) return(-1);
8758 ctxt->userData = ctxt;
8759 oldsax = ctxt->sax;
8760 ctxt->sax = ctx->sax;
8761 newDoc = xmlNewDoc(BAD_CAST "1.0");
8762 if (newDoc == NULL) {
8763 xmlFreeParserCtxt(ctxt);
8764 return(-1);
8765 }
8766 if (ctx->myDoc != NULL) {
8767 newDoc->intSubset = ctx->myDoc->intSubset;
8768 newDoc->extSubset = ctx->myDoc->extSubset;
8769 }
8770 if (ctx->myDoc->URL != NULL) {
8771 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8772 }
8773 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8774 if (newDoc->children == NULL) {
8775 ctxt->sax = oldsax;
8776 xmlFreeParserCtxt(ctxt);
8777 newDoc->intSubset = NULL;
8778 newDoc->extSubset = NULL;
8779 xmlFreeDoc(newDoc);
8780 return(-1);
8781 }
8782 nodePush(ctxt, newDoc->children);
8783 if (ctx->myDoc == NULL) {
8784 ctxt->myDoc = newDoc;
8785 } else {
8786 ctxt->myDoc = ctx->myDoc;
8787 newDoc->children->doc = ctx->myDoc;
8788 }
8789
8790 /*
8791 * Parse a possible text declaration first
8792 */
8793 GROW;
8794 if ((RAW == '<') && (NXT(1) == '?') &&
8795 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8796 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8797 xmlParseTextDecl(ctxt);
8798 }
8799
8800 /*
8801 * Doing validity checking on chunk doesn't make sense
8802 */
8803 ctxt->instate = XML_PARSER_CONTENT;
8804 ctxt->validate = ctx->validate;
8805 ctxt->loadsubset = ctx->loadsubset;
8806 ctxt->depth = ctx->depth + 1;
8807 ctxt->replaceEntities = ctx->replaceEntities;
8808 if (ctxt->validate) {
8809 ctxt->vctxt.error = ctx->vctxt.error;
8810 ctxt->vctxt.warning = ctx->vctxt.warning;
8811 /* Allocate the Node stack */
8812 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8813 if (ctxt->vctxt.nodeTab == NULL) {
8814 xmlGenericError(xmlGenericErrorContext,
8815 "xmlParseCtxtExternalEntity: out of memory\n");
8816 ctxt->validate = 0;
8817 ctxt->vctxt.error = NULL;
8818 ctxt->vctxt.warning = NULL;
8819 } else {
8820 ctxt->vctxt.nodeNr = 0;
8821 ctxt->vctxt.nodeMax = 4;
8822 ctxt->vctxt.node = NULL;
8823 }
8824 } else {
8825 ctxt->vctxt.error = NULL;
8826 ctxt->vctxt.warning = NULL;
8827 }
8828
8829 xmlParseContent(ctxt);
8830
8831 if ((RAW == '<') && (NXT(1) == '/')) {
8832 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8834 ctxt->sax->error(ctxt->userData,
8835 "chunk is not well balanced\n");
8836 ctxt->wellFormed = 0;
8837 ctxt->disableSAX = 1;
8838 } else if (RAW != 0) {
8839 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8840 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8841 ctxt->sax->error(ctxt->userData,
8842 "extra content at the end of well balanced chunk\n");
8843 ctxt->wellFormed = 0;
8844 ctxt->disableSAX = 1;
8845 }
8846 if (ctxt->node != newDoc->children) {
8847 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8849 ctxt->sax->error(ctxt->userData,
8850 "chunk is not well balanced\n");
8851 ctxt->wellFormed = 0;
8852 ctxt->disableSAX = 1;
8853 }
8854
8855 if (!ctxt->wellFormed) {
8856 if (ctxt->errNo == 0)
8857 ret = 1;
8858 else
8859 ret = ctxt->errNo;
8860 } else {
8861 if (list != NULL) {
8862 xmlNodePtr cur;
8863
8864 /*
8865 * Return the newly created nodeset after unlinking it from
8866 * they pseudo parent.
8867 */
8868 cur = newDoc->children->children;
8869 *list = cur;
8870 while (cur != NULL) {
8871 cur->parent = NULL;
8872 cur = cur->next;
8873 }
8874 newDoc->children->children = NULL;
8875 }
8876 ret = 0;
8877 }
8878 ctxt->sax = oldsax;
8879 xmlFreeParserCtxt(ctxt);
8880 newDoc->intSubset = NULL;
8881 newDoc->extSubset = NULL;
8882 xmlFreeDoc(newDoc);
8883
8884 return(ret);
8885}
8886
8887/**
8888 * xmlParseExternalEntity:
8889 * @doc: the document the chunk pertains to
8890 * @sax: the SAX handler bloc (possibly NULL)
8891 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8892 * @depth: Used for loop detection, use 0
8893 * @URL: the URL for the entity to load
8894 * @ID: the System ID for the entity to load
8895 * @list: the return value for the set of parsed nodes
8896 *
8897 * Parse an external general entity
8898 * An external general parsed entity is well-formed if it matches the
8899 * production labeled extParsedEnt.
8900 *
8901 * [78] extParsedEnt ::= TextDecl? content
8902 *
8903 * Returns 0 if the entity is well formed, -1 in case of args problem and
8904 * the parser error code otherwise
8905 */
8906
8907int
8908xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
8909 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
8910 xmlParserCtxtPtr ctxt;
8911 xmlDocPtr newDoc;
8912 xmlSAXHandlerPtr oldsax = NULL;
8913 int ret = 0;
8914
8915 if (depth > 40) {
8916 return(XML_ERR_ENTITY_LOOP);
8917 }
8918
8919
8920
8921 if (list != NULL)
8922 *list = NULL;
8923 if ((URL == NULL) && (ID == NULL))
8924 return(-1);
8925 if (doc == NULL) /* @@ relax but check for dereferences */
8926 return(-1);
8927
8928
8929 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8930 if (ctxt == NULL) return(-1);
8931 ctxt->userData = ctxt;
8932 if (sax != NULL) {
8933 oldsax = ctxt->sax;
8934 ctxt->sax = sax;
8935 if (user_data != NULL)
8936 ctxt->userData = user_data;
8937 }
8938 newDoc = xmlNewDoc(BAD_CAST "1.0");
8939 if (newDoc == NULL) {
8940 xmlFreeParserCtxt(ctxt);
8941 return(-1);
8942 }
8943 if (doc != NULL) {
8944 newDoc->intSubset = doc->intSubset;
8945 newDoc->extSubset = doc->extSubset;
8946 }
8947 if (doc->URL != NULL) {
8948 newDoc->URL = xmlStrdup(doc->URL);
8949 }
8950 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8951 if (newDoc->children == NULL) {
8952 if (sax != NULL)
8953 ctxt->sax = oldsax;
8954 xmlFreeParserCtxt(ctxt);
8955 newDoc->intSubset = NULL;
8956 newDoc->extSubset = NULL;
8957 xmlFreeDoc(newDoc);
8958 return(-1);
8959 }
8960 nodePush(ctxt, newDoc->children);
8961 if (doc == NULL) {
8962 ctxt->myDoc = newDoc;
8963 } else {
8964 ctxt->myDoc = doc;
8965 newDoc->children->doc = doc;
8966 }
8967
8968 /*
8969 * Parse a possible text declaration first
8970 */
8971 GROW;
8972 if ((RAW == '<') && (NXT(1) == '?') &&
8973 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8974 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8975 xmlParseTextDecl(ctxt);
8976 }
8977
8978 /*
8979 * Doing validity checking on chunk doesn't make sense
8980 */
8981 ctxt->instate = XML_PARSER_CONTENT;
8982 ctxt->validate = 0;
Daniel Veillarde470df72001-04-18 21:41:07 +00008983 ctxt->external = 2;
Owen Taylor3473f882001-02-23 17:55:21 +00008984 ctxt->loadsubset = 0;
8985 ctxt->depth = depth;
8986
8987 xmlParseContent(ctxt);
8988
8989 if ((RAW == '<') && (NXT(1) == '/')) {
8990 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8992 ctxt->sax->error(ctxt->userData,
8993 "chunk is not well balanced\n");
8994 ctxt->wellFormed = 0;
8995 ctxt->disableSAX = 1;
8996 } else if (RAW != 0) {
8997 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8999 ctxt->sax->error(ctxt->userData,
9000 "extra content at the end of well balanced chunk\n");
9001 ctxt->wellFormed = 0;
9002 ctxt->disableSAX = 1;
9003 }
9004 if (ctxt->node != newDoc->children) {
9005 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9007 ctxt->sax->error(ctxt->userData,
9008 "chunk is not well balanced\n");
9009 ctxt->wellFormed = 0;
9010 ctxt->disableSAX = 1;
9011 }
9012
9013 if (!ctxt->wellFormed) {
9014 if (ctxt->errNo == 0)
9015 ret = 1;
9016 else
9017 ret = ctxt->errNo;
9018 } else {
9019 if (list != NULL) {
9020 xmlNodePtr cur;
9021
9022 /*
9023 * Return the newly created nodeset after unlinking it from
9024 * they pseudo parent.
9025 */
9026 cur = newDoc->children->children;
9027 *list = cur;
9028 while (cur != NULL) {
9029 cur->parent = NULL;
9030 cur = cur->next;
9031 }
9032 newDoc->children->children = NULL;
9033 }
9034 ret = 0;
9035 }
9036 if (sax != NULL)
9037 ctxt->sax = oldsax;
9038 xmlFreeParserCtxt(ctxt);
9039 newDoc->intSubset = NULL;
9040 newDoc->extSubset = NULL;
9041 xmlFreeDoc(newDoc);
9042
9043 return(ret);
9044}
9045
9046/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009047 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009048 * @doc: the document the chunk pertains to
9049 * @sax: the SAX handler bloc (possibly NULL)
9050 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9051 * @depth: Used for loop detection, use 0
9052 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9053 * @list: the return value for the set of parsed nodes
9054 *
9055 * Parse a well-balanced chunk of an XML document
9056 * called by the parser
9057 * The allowed sequence for the Well Balanced Chunk is the one defined by
9058 * the content production in the XML grammar:
9059 *
9060 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9061 *
9062 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9063 * the parser error code otherwise
9064 */
9065
9066int
9067xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9068 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9069 xmlParserCtxtPtr ctxt;
9070 xmlDocPtr newDoc;
9071 xmlSAXHandlerPtr oldsax = NULL;
9072 int size;
9073 int ret = 0;
9074
9075 if (depth > 40) {
9076 return(XML_ERR_ENTITY_LOOP);
9077 }
9078
9079
9080 if (list != NULL)
9081 *list = NULL;
9082 if (string == NULL)
9083 return(-1);
9084
9085 size = xmlStrlen(string);
9086
9087 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9088 if (ctxt == NULL) return(-1);
9089 ctxt->userData = ctxt;
9090 if (sax != NULL) {
9091 oldsax = ctxt->sax;
9092 ctxt->sax = sax;
9093 if (user_data != NULL)
9094 ctxt->userData = user_data;
9095 }
9096 newDoc = xmlNewDoc(BAD_CAST "1.0");
9097 if (newDoc == NULL) {
9098 xmlFreeParserCtxt(ctxt);
9099 return(-1);
9100 }
9101 if (doc != NULL) {
9102 newDoc->intSubset = doc->intSubset;
9103 newDoc->extSubset = doc->extSubset;
9104 }
9105 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9106 if (newDoc->children == NULL) {
9107 if (sax != NULL)
9108 ctxt->sax = oldsax;
9109 xmlFreeParserCtxt(ctxt);
9110 newDoc->intSubset = NULL;
9111 newDoc->extSubset = NULL;
9112 xmlFreeDoc(newDoc);
9113 return(-1);
9114 }
9115 nodePush(ctxt, newDoc->children);
9116 if (doc == NULL) {
9117 ctxt->myDoc = newDoc;
9118 } else {
9119 ctxt->myDoc = doc;
9120 newDoc->children->doc = doc;
9121 }
9122 ctxt->instate = XML_PARSER_CONTENT;
9123 ctxt->depth = depth;
9124
9125 /*
9126 * Doing validity checking on chunk doesn't make sense
9127 */
9128 ctxt->validate = 0;
9129 ctxt->loadsubset = 0;
9130
9131 xmlParseContent(ctxt);
9132
9133 if ((RAW == '<') && (NXT(1) == '/')) {
9134 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9136 ctxt->sax->error(ctxt->userData,
9137 "chunk is not well balanced\n");
9138 ctxt->wellFormed = 0;
9139 ctxt->disableSAX = 1;
9140 } else if (RAW != 0) {
9141 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9143 ctxt->sax->error(ctxt->userData,
9144 "extra content at the end of well balanced chunk\n");
9145 ctxt->wellFormed = 0;
9146 ctxt->disableSAX = 1;
9147 }
9148 if (ctxt->node != newDoc->children) {
9149 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9150 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9151 ctxt->sax->error(ctxt->userData,
9152 "chunk is not well balanced\n");
9153 ctxt->wellFormed = 0;
9154 ctxt->disableSAX = 1;
9155 }
9156
9157 if (!ctxt->wellFormed) {
9158 if (ctxt->errNo == 0)
9159 ret = 1;
9160 else
9161 ret = ctxt->errNo;
9162 } else {
9163 if (list != NULL) {
9164 xmlNodePtr cur;
9165
9166 /*
9167 * Return the newly created nodeset after unlinking it from
9168 * they pseudo parent.
9169 */
9170 cur = newDoc->children->children;
9171 *list = cur;
9172 while (cur != NULL) {
9173 cur->parent = NULL;
9174 cur = cur->next;
9175 }
9176 newDoc->children->children = NULL;
9177 }
9178 ret = 0;
9179 }
9180 if (sax != NULL)
9181 ctxt->sax = oldsax;
9182 xmlFreeParserCtxt(ctxt);
9183 newDoc->intSubset = NULL;
9184 newDoc->extSubset = NULL;
9185 xmlFreeDoc(newDoc);
9186
9187 return(ret);
9188}
9189
9190/**
9191 * xmlSAXParseEntity:
9192 * @sax: the SAX handler block
9193 * @filename: the filename
9194 *
9195 * parse an XML external entity out of context and build a tree.
9196 * It use the given SAX function block to handle the parsing callback.
9197 * If sax is NULL, fallback to the default DOM tree building routines.
9198 *
9199 * [78] extParsedEnt ::= TextDecl? content
9200 *
9201 * This correspond to a "Well Balanced" chunk
9202 *
9203 * Returns the resulting document tree
9204 */
9205
9206xmlDocPtr
9207xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9208 xmlDocPtr ret;
9209 xmlParserCtxtPtr ctxt;
9210 char *directory = NULL;
9211
9212 ctxt = xmlCreateFileParserCtxt(filename);
9213 if (ctxt == NULL) {
9214 return(NULL);
9215 }
9216 if (sax != NULL) {
9217 if (ctxt->sax != NULL)
9218 xmlFree(ctxt->sax);
9219 ctxt->sax = sax;
9220 ctxt->userData = NULL;
9221 }
9222
9223 if ((ctxt->directory == NULL) && (directory == NULL))
9224 directory = xmlParserGetDirectory(filename);
9225
9226 xmlParseExtParsedEnt(ctxt);
9227
9228 if (ctxt->wellFormed)
9229 ret = ctxt->myDoc;
9230 else {
9231 ret = NULL;
9232 xmlFreeDoc(ctxt->myDoc);
9233 ctxt->myDoc = NULL;
9234 }
9235 if (sax != NULL)
9236 ctxt->sax = NULL;
9237 xmlFreeParserCtxt(ctxt);
9238
9239 return(ret);
9240}
9241
9242/**
9243 * xmlParseEntity:
9244 * @filename: the filename
9245 *
9246 * parse an XML external entity out of context and build a tree.
9247 *
9248 * [78] extParsedEnt ::= TextDecl? content
9249 *
9250 * This correspond to a "Well Balanced" chunk
9251 *
9252 * Returns the resulting document tree
9253 */
9254
9255xmlDocPtr
9256xmlParseEntity(const char *filename) {
9257 return(xmlSAXParseEntity(NULL, filename));
9258}
9259
9260/**
9261 * xmlCreateEntityParserCtxt:
9262 * @URL: the entity URL
9263 * @ID: the entity PUBLIC ID
9264 * @base: a posible base for the target URI
9265 *
9266 * Create a parser context for an external entity
9267 * Automatic support for ZLIB/Compress compressed document is provided
9268 * by default if found at compile-time.
9269 *
9270 * Returns the new parser context or NULL
9271 */
9272xmlParserCtxtPtr
9273xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9274 const xmlChar *base) {
9275 xmlParserCtxtPtr ctxt;
9276 xmlParserInputPtr inputStream;
9277 char *directory = NULL;
9278 xmlChar *uri;
9279
9280 ctxt = xmlNewParserCtxt();
9281 if (ctxt == NULL) {
9282 return(NULL);
9283 }
9284
9285 uri = xmlBuildURI(URL, base);
9286
9287 if (uri == NULL) {
9288 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9289 if (inputStream == NULL) {
9290 xmlFreeParserCtxt(ctxt);
9291 return(NULL);
9292 }
9293
9294 inputPush(ctxt, inputStream);
9295
9296 if ((ctxt->directory == NULL) && (directory == NULL))
9297 directory = xmlParserGetDirectory((char *)URL);
9298 if ((ctxt->directory == NULL) && (directory != NULL))
9299 ctxt->directory = directory;
9300 } else {
9301 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9302 if (inputStream == NULL) {
9303 xmlFree(uri);
9304 xmlFreeParserCtxt(ctxt);
9305 return(NULL);
9306 }
9307
9308 inputPush(ctxt, inputStream);
9309
9310 if ((ctxt->directory == NULL) && (directory == NULL))
9311 directory = xmlParserGetDirectory((char *)uri);
9312 if ((ctxt->directory == NULL) && (directory != NULL))
9313 ctxt->directory = directory;
9314 xmlFree(uri);
9315 }
9316
9317 return(ctxt);
9318}
9319
9320/************************************************************************
9321 * *
9322 * Front ends when parsing from a file *
9323 * *
9324 ************************************************************************/
9325
9326/**
9327 * xmlCreateFileParserCtxt:
9328 * @filename: the filename
9329 *
9330 * Create a parser context for a file content.
9331 * Automatic support for ZLIB/Compress compressed document is provided
9332 * by default if found at compile-time.
9333 *
9334 * Returns the new parser context or NULL
9335 */
9336xmlParserCtxtPtr
9337xmlCreateFileParserCtxt(const char *filename)
9338{
9339 xmlParserCtxtPtr ctxt;
9340 xmlParserInputPtr inputStream;
9341 xmlParserInputBufferPtr buf;
9342 char *directory = NULL;
9343
9344 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9345 if (buf == NULL) {
9346 return(NULL);
9347 }
9348
9349 ctxt = xmlNewParserCtxt();
9350 if (ctxt == NULL) {
9351 if (xmlDefaultSAXHandler.error != NULL) {
9352 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9353 }
9354 return(NULL);
9355 }
9356
9357 inputStream = xmlNewInputStream(ctxt);
9358 if (inputStream == NULL) {
9359 xmlFreeParserCtxt(ctxt);
9360 return(NULL);
9361 }
9362
9363 inputStream->filename = xmlMemStrdup(filename);
9364 inputStream->buf = buf;
9365 inputStream->base = inputStream->buf->buffer->content;
9366 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009367 inputStream->end =
9368 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009369
9370 inputPush(ctxt, inputStream);
9371 if ((ctxt->directory == NULL) && (directory == NULL))
9372 directory = xmlParserGetDirectory(filename);
9373 if ((ctxt->directory == NULL) && (directory != NULL))
9374 ctxt->directory = directory;
9375
9376 return(ctxt);
9377}
9378
9379/**
9380 * xmlSAXParseFile:
9381 * @sax: the SAX handler block
9382 * @filename: the filename
9383 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9384 * documents
9385 *
9386 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9387 * compressed document is provided by default if found at compile-time.
9388 * It use the given SAX function block to handle the parsing callback.
9389 * If sax is NULL, fallback to the default DOM tree building routines.
9390 *
9391 * Returns the resulting document tree
9392 */
9393
9394xmlDocPtr
9395xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9396 int recovery) {
9397 xmlDocPtr ret;
9398 xmlParserCtxtPtr ctxt;
9399 char *directory = NULL;
9400
9401 ctxt = xmlCreateFileParserCtxt(filename);
9402 if (ctxt == NULL) {
9403 return(NULL);
9404 }
9405 if (sax != NULL) {
9406 if (ctxt->sax != NULL)
9407 xmlFree(ctxt->sax);
9408 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009409 }
9410
9411 if ((ctxt->directory == NULL) && (directory == NULL))
9412 directory = xmlParserGetDirectory(filename);
9413 if ((ctxt->directory == NULL) && (directory != NULL))
9414 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9415
9416 xmlParseDocument(ctxt);
9417
9418 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9419 else {
9420 ret = NULL;
9421 xmlFreeDoc(ctxt->myDoc);
9422 ctxt->myDoc = NULL;
9423 }
9424 if (sax != NULL)
9425 ctxt->sax = NULL;
9426 xmlFreeParserCtxt(ctxt);
9427
9428 return(ret);
9429}
9430
9431/**
9432 * xmlRecoverDoc:
9433 * @cur: a pointer to an array of xmlChar
9434 *
9435 * parse an XML in-memory document and build a tree.
9436 * In the case the document is not Well Formed, a tree is built anyway
9437 *
9438 * Returns the resulting document tree
9439 */
9440
9441xmlDocPtr
9442xmlRecoverDoc(xmlChar *cur) {
9443 return(xmlSAXParseDoc(NULL, cur, 1));
9444}
9445
9446/**
9447 * xmlParseFile:
9448 * @filename: the filename
9449 *
9450 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9451 * compressed document is provided by default if found at compile-time.
9452 *
9453 * Returns the resulting document tree
9454 */
9455
9456xmlDocPtr
9457xmlParseFile(const char *filename) {
9458 return(xmlSAXParseFile(NULL, filename, 0));
9459}
9460
9461/**
9462 * xmlRecoverFile:
9463 * @filename: the filename
9464 *
9465 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9466 * compressed document is provided by default if found at compile-time.
9467 * In the case the document is not Well Formed, a tree is built anyway
9468 *
9469 * Returns the resulting document tree
9470 */
9471
9472xmlDocPtr
9473xmlRecoverFile(const char *filename) {
9474 return(xmlSAXParseFile(NULL, filename, 1));
9475}
9476
9477
9478/**
9479 * xmlSetupParserForBuffer:
9480 * @ctxt: an XML parser context
9481 * @buffer: a xmlChar * buffer
9482 * @filename: a file name
9483 *
9484 * Setup the parser context to parse a new buffer; Clears any prior
9485 * contents from the parser context. The buffer parameter must not be
9486 * NULL, but the filename parameter can be
9487 */
9488void
9489xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9490 const char* filename)
9491{
9492 xmlParserInputPtr input;
9493
9494 input = xmlNewInputStream(ctxt);
9495 if (input == NULL) {
9496 perror("malloc");
9497 xmlFree(ctxt);
9498 return;
9499 }
9500
9501 xmlClearParserCtxt(ctxt);
9502 if (filename != NULL)
9503 input->filename = xmlMemStrdup(filename);
9504 input->base = buffer;
9505 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009506 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009507 inputPush(ctxt, input);
9508}
9509
9510/**
9511 * xmlSAXUserParseFile:
9512 * @sax: a SAX handler
9513 * @user_data: The user data returned on SAX callbacks
9514 * @filename: a file name
9515 *
9516 * parse an XML file and call the given SAX handler routines.
9517 * Automatic support for ZLIB/Compress compressed document is provided
9518 *
9519 * Returns 0 in case of success or a error number otherwise
9520 */
9521int
9522xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9523 const char *filename) {
9524 int ret = 0;
9525 xmlParserCtxtPtr ctxt;
9526
9527 ctxt = xmlCreateFileParserCtxt(filename);
9528 if (ctxt == NULL) return -1;
9529 if (ctxt->sax != &xmlDefaultSAXHandler)
9530 xmlFree(ctxt->sax);
9531 ctxt->sax = sax;
9532 if (user_data != NULL)
9533 ctxt->userData = user_data;
9534
9535 xmlParseDocument(ctxt);
9536
9537 if (ctxt->wellFormed)
9538 ret = 0;
9539 else {
9540 if (ctxt->errNo != 0)
9541 ret = ctxt->errNo;
9542 else
9543 ret = -1;
9544 }
9545 if (sax != NULL)
9546 ctxt->sax = NULL;
9547 xmlFreeParserCtxt(ctxt);
9548
9549 return ret;
9550}
9551
9552/************************************************************************
9553 * *
9554 * Front ends when parsing from memory *
9555 * *
9556 ************************************************************************/
9557
9558/**
9559 * xmlCreateMemoryParserCtxt:
9560 * @buffer: a pointer to a char array
9561 * @size: the size of the array
9562 *
9563 * Create a parser context for an XML in-memory document.
9564 *
9565 * Returns the new parser context or NULL
9566 */
9567xmlParserCtxtPtr
9568xmlCreateMemoryParserCtxt(char *buffer, int size) {
9569 xmlParserCtxtPtr ctxt;
9570 xmlParserInputPtr input;
9571 xmlParserInputBufferPtr buf;
9572
9573 if (buffer == NULL)
9574 return(NULL);
9575 if (size <= 0)
9576 return(NULL);
9577
9578 ctxt = xmlNewParserCtxt();
9579 if (ctxt == NULL)
9580 return(NULL);
9581
9582 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9583 if (buf == NULL) return(NULL);
9584
9585 input = xmlNewInputStream(ctxt);
9586 if (input == NULL) {
9587 xmlFreeParserCtxt(ctxt);
9588 return(NULL);
9589 }
9590
9591 input->filename = NULL;
9592 input->buf = buf;
9593 input->base = input->buf->buffer->content;
9594 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009595 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009596
9597 inputPush(ctxt, input);
9598 return(ctxt);
9599}
9600
9601/**
9602 * xmlSAXParseMemory:
9603 * @sax: the SAX handler block
9604 * @buffer: an pointer to a char array
9605 * @size: the size of the array
9606 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9607 * documents
9608 *
9609 * parse an XML in-memory block and use the given SAX function block
9610 * to handle the parsing callback. If sax is NULL, fallback to the default
9611 * DOM tree building routines.
9612 *
9613 * Returns the resulting document tree
9614 */
9615xmlDocPtr
9616xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9617 xmlDocPtr ret;
9618 xmlParserCtxtPtr ctxt;
9619
9620 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9621 if (ctxt == NULL) return(NULL);
9622 if (sax != NULL) {
9623 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009624 }
9625
9626 xmlParseDocument(ctxt);
9627
9628 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9629 else {
9630 ret = NULL;
9631 xmlFreeDoc(ctxt->myDoc);
9632 ctxt->myDoc = NULL;
9633 }
9634 if (sax != NULL)
9635 ctxt->sax = NULL;
9636 xmlFreeParserCtxt(ctxt);
9637
9638 return(ret);
9639}
9640
9641/**
9642 * xmlParseMemory:
9643 * @buffer: an pointer to a char array
9644 * @size: the size of the array
9645 *
9646 * parse an XML in-memory block and build a tree.
9647 *
9648 * Returns the resulting document tree
9649 */
9650
9651xmlDocPtr xmlParseMemory(char *buffer, int size) {
9652 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9653}
9654
9655/**
9656 * xmlRecoverMemory:
9657 * @buffer: an pointer to a char array
9658 * @size: the size of the array
9659 *
9660 * parse an XML in-memory block and build a tree.
9661 * In the case the document is not Well Formed, a tree is built anyway
9662 *
9663 * Returns the resulting document tree
9664 */
9665
9666xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9667 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9668}
9669
9670/**
9671 * xmlSAXUserParseMemory:
9672 * @sax: a SAX handler
9673 * @user_data: The user data returned on SAX callbacks
9674 * @buffer: an in-memory XML document input
9675 * @size: the length of the XML document in bytes
9676 *
9677 * A better SAX parsing routine.
9678 * parse an XML in-memory buffer and call the given SAX handler routines.
9679 *
9680 * Returns 0 in case of success or a error number otherwise
9681 */
9682int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9683 char *buffer, int size) {
9684 int ret = 0;
9685 xmlParserCtxtPtr ctxt;
9686 xmlSAXHandlerPtr oldsax = NULL;
9687
9688 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9689 if (ctxt == NULL) return -1;
9690 if (sax != NULL) {
9691 oldsax = ctxt->sax;
9692 ctxt->sax = sax;
9693 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009694 if (user_data != NULL)
9695 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009696
9697 xmlParseDocument(ctxt);
9698
9699 if (ctxt->wellFormed)
9700 ret = 0;
9701 else {
9702 if (ctxt->errNo != 0)
9703 ret = ctxt->errNo;
9704 else
9705 ret = -1;
9706 }
9707 if (sax != NULL) {
9708 ctxt->sax = oldsax;
9709 }
9710 xmlFreeParserCtxt(ctxt);
9711
9712 return ret;
9713}
9714
9715/**
9716 * xmlCreateDocParserCtxt:
9717 * @cur: a pointer to an array of xmlChar
9718 *
9719 * Creates a parser context for an XML in-memory document.
9720 *
9721 * Returns the new parser context or NULL
9722 */
9723xmlParserCtxtPtr
9724xmlCreateDocParserCtxt(xmlChar *cur) {
9725 int len;
9726
9727 if (cur == NULL)
9728 return(NULL);
9729 len = xmlStrlen(cur);
9730 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9731}
9732
9733/**
9734 * xmlSAXParseDoc:
9735 * @sax: the SAX handler block
9736 * @cur: a pointer to an array of xmlChar
9737 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9738 * documents
9739 *
9740 * parse an XML in-memory document and build a tree.
9741 * It use the given SAX function block to handle the parsing callback.
9742 * If sax is NULL, fallback to the default DOM tree building routines.
9743 *
9744 * Returns the resulting document tree
9745 */
9746
9747xmlDocPtr
9748xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9749 xmlDocPtr ret;
9750 xmlParserCtxtPtr ctxt;
9751
9752 if (cur == NULL) return(NULL);
9753
9754
9755 ctxt = xmlCreateDocParserCtxt(cur);
9756 if (ctxt == NULL) return(NULL);
9757 if (sax != NULL) {
9758 ctxt->sax = sax;
9759 ctxt->userData = NULL;
9760 }
9761
9762 xmlParseDocument(ctxt);
9763 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9764 else {
9765 ret = NULL;
9766 xmlFreeDoc(ctxt->myDoc);
9767 ctxt->myDoc = NULL;
9768 }
9769 if (sax != NULL)
9770 ctxt->sax = NULL;
9771 xmlFreeParserCtxt(ctxt);
9772
9773 return(ret);
9774}
9775
9776/**
9777 * xmlParseDoc:
9778 * @cur: a pointer to an array of xmlChar
9779 *
9780 * parse an XML in-memory document and build a tree.
9781 *
9782 * Returns the resulting document tree
9783 */
9784
9785xmlDocPtr
9786xmlParseDoc(xmlChar *cur) {
9787 return(xmlSAXParseDoc(NULL, cur, 0));
9788}
9789
9790
9791/************************************************************************
9792 * *
9793 * Miscellaneous *
9794 * *
9795 ************************************************************************/
9796
9797#ifdef LIBXML_XPATH_ENABLED
9798#include <libxml/xpath.h>
9799#endif
9800
9801static int xmlParserInitialized = 0;
9802
9803/**
9804 * xmlInitParser:
9805 *
9806 * Initialization function for the XML parser.
9807 * This is not reentrant. Call once before processing in case of
9808 * use in multithreaded programs.
9809 */
9810
9811void
9812xmlInitParser(void) {
9813 if (xmlParserInitialized) return;
9814
9815 xmlInitCharEncodingHandlers();
9816 xmlInitializePredefinedEntities();
9817 xmlDefaultSAXHandlerInit();
9818 xmlRegisterDefaultInputCallbacks();
9819 xmlRegisterDefaultOutputCallbacks();
9820#ifdef LIBXML_HTML_ENABLED
9821 htmlInitAutoClose();
9822 htmlDefaultSAXHandlerInit();
9823#endif
9824#ifdef LIBXML_XPATH_ENABLED
9825 xmlXPathInit();
9826#endif
9827 xmlParserInitialized = 1;
9828}
9829
9830/**
9831 * xmlCleanupParser:
9832 *
9833 * Cleanup function for the XML parser. It tries to reclaim all
9834 * parsing related global memory allocated for the parser processing.
9835 * It doesn't deallocate any document related memory. Calling this
9836 * function should not prevent reusing the parser.
9837 */
9838
9839void
9840xmlCleanupParser(void) {
9841 xmlParserInitialized = 0;
9842 xmlCleanupCharEncodingHandlers();
9843 xmlCleanupPredefinedEntities();
9844}
9845
9846/**
9847 * xmlPedanticParserDefault:
9848 * @val: int 0 or 1
9849 *
9850 * Set and return the previous value for enabling pedantic warnings.
9851 *
9852 * Returns the last value for 0 for no substitution, 1 for substitution.
9853 */
9854
9855int
9856xmlPedanticParserDefault(int val) {
9857 int old = xmlPedanticParserDefaultValue;
9858
9859 xmlPedanticParserDefaultValue = val;
9860 return(old);
9861}
9862
9863/**
9864 * xmlSubstituteEntitiesDefault:
9865 * @val: int 0 or 1
9866 *
9867 * Set and return the previous value for default entity support.
9868 * Initially the parser always keep entity references instead of substituting
9869 * entity values in the output. This function has to be used to change the
9870 * default parser behaviour
9871 * SAX::subtituteEntities() has to be used for changing that on a file by
9872 * file basis.
9873 *
9874 * Returns the last value for 0 for no substitution, 1 for substitution.
9875 */
9876
9877int
9878xmlSubstituteEntitiesDefault(int val) {
9879 int old = xmlSubstituteEntitiesDefaultValue;
9880
9881 xmlSubstituteEntitiesDefaultValue = val;
9882 return(old);
9883}
9884
9885/**
9886 * xmlKeepBlanksDefault:
9887 * @val: int 0 or 1
9888 *
9889 * Set and return the previous value for default blanks text nodes support.
9890 * The 1.x version of the parser used an heuristic to try to detect
9891 * ignorable white spaces. As a result the SAX callback was generating
9892 * ignorableWhitespace() callbacks instead of characters() one, and when
9893 * using the DOM output text nodes containing those blanks were not generated.
9894 * The 2.x and later version will switch to the XML standard way and
9895 * ignorableWhitespace() are only generated when running the parser in
9896 * validating mode and when the current element doesn't allow CDATA or
9897 * mixed content.
9898 * This function is provided as a way to force the standard behaviour
9899 * on 1.X libs and to switch back to the old mode for compatibility when
9900 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9901 * by using xmlIsBlankNode() commodity function to detect the "empty"
9902 * nodes generated.
9903 * This value also affect autogeneration of indentation when saving code
9904 * if blanks sections are kept, indentation is not generated.
9905 *
9906 * Returns the last value for 0 for no substitution, 1 for substitution.
9907 */
9908
9909int
9910xmlKeepBlanksDefault(int val) {
9911 int old = xmlKeepBlanksDefaultValue;
9912
9913 xmlKeepBlanksDefaultValue = val;
9914 xmlIndentTreeOutput = !val;
9915 return(old);
9916}
9917