blob: 4b8adba0d0d1642f8da5e6493023171d0043d6ff [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
36#ifdef WIN32
37#include "win32config.h"
38#define XML_DIR_SEP '\\'
39#else
40#include "config.h"
41#define XML_DIR_SEP '/'
42#endif
43
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <libxml/xmlmemory.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57
58#ifdef HAVE_CTYPE_H
59#include <ctype.h>
60#endif
61#ifdef HAVE_STDLIB_H
62#include <stdlib.h>
63#endif
64#ifdef HAVE_SYS_STAT_H
65#include <sys/stat.h>
66#endif
67#ifdef HAVE_FCNTL_H
68#include <fcntl.h>
69#endif
70#ifdef HAVE_UNISTD_H
71#include <unistd.h>
72#endif
73#ifdef HAVE_ZLIB_H
74#include <zlib.h>
75#endif
76
77
Daniel Veillard21a0f912001-02-25 19:54:14 +000078#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000079#define XML_PARSER_BUFFER_SIZE 100
80
81/*
82 * Various global defaults for parsing
83 */
84int xmlGetWarningsDefaultValue = 1;
85int xmlParserDebugEntities = 0;
86#ifdef VMS
87int xmlSubstituteEntitiesDefaultVal = 0;
88#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
89int xmlDoValidityCheckingDefaultVal = 0;
90#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
91#else
92int xmlSubstituteEntitiesDefaultValue = 0;
93int xmlDoValidityCheckingDefaultValue = 0;
94#endif
95int xmlLoadExtDtdDefaultValue = 0;
96int xmlPedanticParserDefaultValue = 0;
97int xmlKeepBlanksDefaultValue = 1;
98
99/*
100 * List of XML prefixed PI allowed by W3C specs
101 */
102
103const char *xmlW3CPIs[] = {
104 "xml-stylesheet",
105 NULL
106};
107
108/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
109void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
113
114/************************************************************************
115 * *
116 * Parser stacks related functions and macros *
117 * *
118 ************************************************************************/
119
120xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
121 const xmlChar ** str);
122
123/*
124 * Generic function for accessing stacks in the Parser Context
125 */
126
127#define PUSH_AND_POP(scope, type, name) \
128scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
129 if (ctxt->name##Nr >= ctxt->name##Max) { \
130 ctxt->name##Max *= 2; \
131 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
132 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
133 if (ctxt->name##Tab == NULL) { \
134 xmlGenericError(xmlGenericErrorContext, \
135 "realloc failed !\n"); \
136 return(0); \
137 } \
138 } \
139 ctxt->name##Tab[ctxt->name##Nr] = value; \
140 ctxt->name = value; \
141 return(ctxt->name##Nr++); \
142} \
143scope type name##Pop(xmlParserCtxtPtr ctxt) { \
144 type ret; \
145 if (ctxt->name##Nr <= 0) return(0); \
146 ctxt->name##Nr--; \
147 if (ctxt->name##Nr > 0) \
148 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
149 else \
150 ctxt->name = NULL; \
151 ret = ctxt->name##Tab[ctxt->name##Nr]; \
152 ctxt->name##Tab[ctxt->name##Nr] = 0; \
153 return(ret); \
154} \
155
156/*
157 * Those macros actually generate the functions
158 */
159PUSH_AND_POP(extern, xmlParserInputPtr, input)
160PUSH_AND_POP(extern, xmlNodePtr, node)
161PUSH_AND_POP(extern, xmlChar*, name)
162
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000163static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000164 if (ctxt->spaceNr >= ctxt->spaceMax) {
165 ctxt->spaceMax *= 2;
166 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
167 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
168 if (ctxt->spaceTab == NULL) {
169 xmlGenericError(xmlGenericErrorContext,
170 "realloc failed !\n");
171 return(0);
172 }
173 }
174 ctxt->spaceTab[ctxt->spaceNr] = val;
175 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
176 return(ctxt->spaceNr++);
177}
178
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000179static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000180 int ret;
181 if (ctxt->spaceNr <= 0) return(0);
182 ctxt->spaceNr--;
183 if (ctxt->spaceNr > 0)
184 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
185 else
186 ctxt->space = NULL;
187 ret = ctxt->spaceTab[ctxt->spaceNr];
188 ctxt->spaceTab[ctxt->spaceNr] = -1;
189 return(ret);
190}
191
192/*
193 * Macros for accessing the content. Those should be used only by the parser,
194 * and not exported.
195 *
196 * Dirty macros, i.e. one often need to make assumption on the context to
197 * use them
198 *
199 * CUR_PTR return the current pointer to the xmlChar to be parsed.
200 * To be used with extreme caution since operations consuming
201 * characters may move the input buffer to a different location !
202 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
203 * This should be used internally by the parser
204 * only to compare to ASCII values otherwise it would break when
205 * running with UTF-8 encoding.
206 * RAW same as CUR but in the input buffer, bypass any token
207 * extraction that may have been done
208 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
209 * to compare on ASCII based substring.
210 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
211 * strings within the parser.
212 *
213 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
214 *
215 * NEXT Skip to the next character, this does the proper decoding
216 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
217 * NEXTL(l) Skip l xmlChars in the input buffer
218 * CUR_CHAR(l) returns the current unicode character (int), set l
219 * to the number of xmlChars used for the encoding [0-5].
220 * CUR_SCHAR same but operate on a string instead of the context
221 * COPY_BUF copy the current unicode char to the target buffer, increment
222 * the index
223 * GROW, SHRINK handling of input buffers
224 */
225
226#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
227#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
228#define NXT(val) ctxt->input->cur[(val)]
229#define CUR_PTR ctxt->input->cur
230
231#define SKIP(val) do { \
232 ctxt->nbChars += (val),ctxt->input->cur += (val); \
233 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000234 if ((*ctxt->input->cur == 0) && \
235 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
236 xmlPopInput(ctxt); \
237 } while (0)
238
Daniel Veillard48b2f892001-02-25 16:11:03 +0000239#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000240 xmlParserInputShrink(ctxt->input); \
241 if ((*ctxt->input->cur == 0) && \
242 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
243 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000244 }
Owen Taylor3473f882001-02-23 17:55:21 +0000245
Daniel Veillard48b2f892001-02-25 16:11:03 +0000246#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000247 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
248 if ((*ctxt->input->cur == 0) && \
249 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
250 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000251 }
Owen Taylor3473f882001-02-23 17:55:21 +0000252
253#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
254
255#define NEXT xmlNextChar(ctxt)
256
Daniel Veillard21a0f912001-02-25 19:54:14 +0000257#define NEXT1 { \
258 ctxt->input->cur++; \
259 ctxt->nbChars++; \
260 if (*ctxt->input->cur == 0) \
261 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
262 }
263
Owen Taylor3473f882001-02-23 17:55:21 +0000264#define NEXTL(l) do { \
265 if (*(ctxt->input->cur) == '\n') { \
266 ctxt->input->line++; ctxt->input->col = 1; \
267 } else ctxt->input->col++; \
268 ctxt->token = 0; ctxt->input->cur += l; \
269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000270 } while (0)
271
272#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
273#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
274
275#define COPY_BUF(l,b,i,v) \
276 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000277 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000278
279/**
280 * xmlSkipBlankChars:
281 * @ctxt: the XML parser context
282 *
283 * skip all blanks character found at that point in the input streams.
284 * It pops up finished entities in the process if allowable at that point.
285 *
286 * Returns the number of space chars skipped
287 */
288
289int
290xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
291 int cur, res = 0;
292
293 /*
294 * It's Okay to use CUR/NEXT here since all the blanks are on
295 * the ASCII range.
296 */
297 do {
298 cur = CUR;
299 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
300 NEXT;
301 cur = CUR;
302 res++;
303 }
304 while ((cur == 0) && (ctxt->inputNr > 1) &&
305 (ctxt->instate != XML_PARSER_COMMENT)) {
306 xmlPopInput(ctxt);
307 cur = CUR;
308 }
309 /*
310 * Need to handle support of entities branching here
311 */
312 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
313 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
314 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
315 return(res);
316}
317
318/************************************************************************
319 * *
320 * Commodity functions to handle entities *
321 * *
322 ************************************************************************/
323
324/**
325 * xmlPopInput:
326 * @ctxt: an XML parser context
327 *
328 * xmlPopInput: the current input pointed by ctxt->input came to an end
329 * pop it and return the next char.
330 *
331 * Returns the current xmlChar in the parser context
332 */
333xmlChar
334xmlPopInput(xmlParserCtxtPtr ctxt) {
335 if (ctxt->inputNr == 1) return(0); /* End of main Input */
336 if (xmlParserDebugEntities)
337 xmlGenericError(xmlGenericErrorContext,
338 "Popping input %d\n", ctxt->inputNr);
339 xmlFreeInputStream(inputPop(ctxt));
340 if ((*ctxt->input->cur == 0) &&
341 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
342 return(xmlPopInput(ctxt));
343 return(CUR);
344}
345
346/**
347 * xmlPushInput:
348 * @ctxt: an XML parser context
349 * @input: an XML parser input fragment (entity, XML fragment ...).
350 *
351 * xmlPushInput: switch to a new input stream which is stacked on top
352 * of the previous one(s).
353 */
354void
355xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
356 if (input == NULL) return;
357
358 if (xmlParserDebugEntities) {
359 if ((ctxt->input != NULL) && (ctxt->input->filename))
360 xmlGenericError(xmlGenericErrorContext,
361 "%s(%d): ", ctxt->input->filename,
362 ctxt->input->line);
363 xmlGenericError(xmlGenericErrorContext,
364 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
365 }
366 inputPush(ctxt, input);
367 GROW;
368}
369
370/**
371 * xmlParseCharRef:
372 * @ctxt: an XML parser context
373 *
374 * parse Reference declarations
375 *
376 * [66] CharRef ::= '&#' [0-9]+ ';' |
377 * '&#x' [0-9a-fA-F]+ ';'
378 *
379 * [ WFC: Legal Character ]
380 * Characters referred to using character references must match the
381 * production for Char.
382 *
383 * Returns the value parsed (as an int), 0 in case of error
384 */
385int
386xmlParseCharRef(xmlParserCtxtPtr ctxt) {
387 int val = 0;
388 int count = 0;
389
390 if (ctxt->token != 0) {
391 val = ctxt->token;
392 ctxt->token = 0;
393 return(val);
394 }
395 /*
396 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
397 */
398 if ((RAW == '&') && (NXT(1) == '#') &&
399 (NXT(2) == 'x')) {
400 SKIP(3);
401 GROW;
402 while (RAW != ';') { /* loop blocked by count */
403 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
404 val = val * 16 + (CUR - '0');
405 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
406 val = val * 16 + (CUR - 'a') + 10;
407 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
408 val = val * 16 + (CUR - 'A') + 10;
409 else {
410 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
412 ctxt->sax->error(ctxt->userData,
413 "xmlParseCharRef: invalid hexadecimal value\n");
414 ctxt->wellFormed = 0;
415 ctxt->disableSAX = 1;
416 val = 0;
417 break;
418 }
419 NEXT;
420 count++;
421 }
422 if (RAW == ';') {
423 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
424 ctxt->nbChars ++;
425 ctxt->input->cur++;
426 }
427 } else if ((RAW == '&') && (NXT(1) == '#')) {
428 SKIP(2);
429 GROW;
430 while (RAW != ';') { /* loop blocked by count */
431 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
432 val = val * 10 + (CUR - '0');
433 else {
434 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
436 ctxt->sax->error(ctxt->userData,
437 "xmlParseCharRef: invalid decimal value\n");
438 ctxt->wellFormed = 0;
439 ctxt->disableSAX = 1;
440 val = 0;
441 break;
442 }
443 NEXT;
444 count++;
445 }
446 if (RAW == ';') {
447 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
448 ctxt->nbChars ++;
449 ctxt->input->cur++;
450 }
451 } else {
452 ctxt->errNo = XML_ERR_INVALID_CHARREF;
453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
454 ctxt->sax->error(ctxt->userData,
455 "xmlParseCharRef: invalid value\n");
456 ctxt->wellFormed = 0;
457 ctxt->disableSAX = 1;
458 }
459
460 /*
461 * [ WFC: Legal Character ]
462 * Characters referred to using character references must match the
463 * production for Char.
464 */
465 if (IS_CHAR(val)) {
466 return(val);
467 } else {
468 ctxt->errNo = XML_ERR_INVALID_CHAR;
469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
470 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
471 val);
472 ctxt->wellFormed = 0;
473 ctxt->disableSAX = 1;
474 }
475 return(0);
476}
477
478/**
479 * xmlParseStringCharRef:
480 * @ctxt: an XML parser context
481 * @str: a pointer to an index in the string
482 *
483 * parse Reference declarations, variant parsing from a string rather
484 * than an an input flow.
485 *
486 * [66] CharRef ::= '&#' [0-9]+ ';' |
487 * '&#x' [0-9a-fA-F]+ ';'
488 *
489 * [ WFC: Legal Character ]
490 * Characters referred to using character references must match the
491 * production for Char.
492 *
493 * Returns the value parsed (as an int), 0 in case of error, str will be
494 * updated to the current value of the index
495 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000496static int
Owen Taylor3473f882001-02-23 17:55:21 +0000497xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
498 const xmlChar *ptr;
499 xmlChar cur;
500 int val = 0;
501
502 if ((str == NULL) || (*str == NULL)) return(0);
503 ptr = *str;
504 cur = *ptr;
505 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
506 ptr += 3;
507 cur = *ptr;
508 while (cur != ';') { /* Non input consuming loop */
509 if ((cur >= '0') && (cur <= '9'))
510 val = val * 16 + (cur - '0');
511 else if ((cur >= 'a') && (cur <= 'f'))
512 val = val * 16 + (cur - 'a') + 10;
513 else if ((cur >= 'A') && (cur <= 'F'))
514 val = val * 16 + (cur - 'A') + 10;
515 else {
516 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
518 ctxt->sax->error(ctxt->userData,
519 "xmlParseStringCharRef: invalid hexadecimal value\n");
520 ctxt->wellFormed = 0;
521 ctxt->disableSAX = 1;
522 val = 0;
523 break;
524 }
525 ptr++;
526 cur = *ptr;
527 }
528 if (cur == ';')
529 ptr++;
530 } else if ((cur == '&') && (ptr[1] == '#')){
531 ptr += 2;
532 cur = *ptr;
533 while (cur != ';') { /* Non input consuming loops */
534 if ((cur >= '0') && (cur <= '9'))
535 val = val * 10 + (cur - '0');
536 else {
537 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
539 ctxt->sax->error(ctxt->userData,
540 "xmlParseStringCharRef: invalid decimal value\n");
541 ctxt->wellFormed = 0;
542 ctxt->disableSAX = 1;
543 val = 0;
544 break;
545 }
546 ptr++;
547 cur = *ptr;
548 }
549 if (cur == ';')
550 ptr++;
551 } else {
552 ctxt->errNo = XML_ERR_INVALID_CHARREF;
553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
554 ctxt->sax->error(ctxt->userData,
555 "xmlParseCharRef: invalid value\n");
556 ctxt->wellFormed = 0;
557 ctxt->disableSAX = 1;
558 return(0);
559 }
560 *str = ptr;
561
562 /*
563 * [ WFC: Legal Character ]
564 * Characters referred to using character references must match the
565 * production for Char.
566 */
567 if (IS_CHAR(val)) {
568 return(val);
569 } else {
570 ctxt->errNo = XML_ERR_INVALID_CHAR;
571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
572 ctxt->sax->error(ctxt->userData,
573 "CharRef: invalid xmlChar value %d\n", val);
574 ctxt->wellFormed = 0;
575 ctxt->disableSAX = 1;
576 }
577 return(0);
578}
579
580/**
581 * xmlParserHandlePEReference:
582 * @ctxt: the parser context
583 *
584 * [69] PEReference ::= '%' Name ';'
585 *
586 * [ WFC: No Recursion ]
587 * A parsed entity must not contain a recursive
588 * reference to itself, either directly or indirectly.
589 *
590 * [ WFC: Entity Declared ]
591 * In a document without any DTD, a document with only an internal DTD
592 * subset which contains no parameter entity references, or a document
593 * with "standalone='yes'", ... ... The declaration of a parameter
594 * entity must precede any reference to it...
595 *
596 * [ VC: Entity Declared ]
597 * In a document with an external subset or external parameter entities
598 * with "standalone='no'", ... ... The declaration of a parameter entity
599 * must precede any reference to it...
600 *
601 * [ WFC: In DTD ]
602 * Parameter-entity references may only appear in the DTD.
603 * NOTE: misleading but this is handled.
604 *
605 * A PEReference may have been detected in the current input stream
606 * the handling is done accordingly to
607 * http://www.w3.org/TR/REC-xml#entproc
608 * i.e.
609 * - Included in literal in entity values
610 * - Included as Paraemeter Entity reference within DTDs
611 */
612void
613xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
614 xmlChar *name;
615 xmlEntityPtr entity = NULL;
616 xmlParserInputPtr input;
617
618 if (ctxt->token != 0) {
619 return;
620 }
621 if (RAW != '%') return;
622 switch(ctxt->instate) {
623 case XML_PARSER_CDATA_SECTION:
624 return;
625 case XML_PARSER_COMMENT:
626 return;
627 case XML_PARSER_START_TAG:
628 return;
629 case XML_PARSER_END_TAG:
630 return;
631 case XML_PARSER_EOF:
632 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
634 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
635 ctxt->wellFormed = 0;
636 ctxt->disableSAX = 1;
637 return;
638 case XML_PARSER_PROLOG:
639 case XML_PARSER_START:
640 case XML_PARSER_MISC:
641 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
643 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
644 ctxt->wellFormed = 0;
645 ctxt->disableSAX = 1;
646 return;
647 case XML_PARSER_ENTITY_DECL:
648 case XML_PARSER_CONTENT:
649 case XML_PARSER_ATTRIBUTE_VALUE:
650 case XML_PARSER_PI:
651 case XML_PARSER_SYSTEM_LITERAL:
652 /* we just ignore it there */
653 return;
654 case XML_PARSER_EPILOG:
655 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
657 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
658 ctxt->wellFormed = 0;
659 ctxt->disableSAX = 1;
660 return;
661 case XML_PARSER_ENTITY_VALUE:
662 /*
663 * NOTE: in the case of entity values, we don't do the
664 * substitution here since we need the literal
665 * entity value to be able to save the internal
666 * subset of the document.
667 * This will be handled by xmlStringDecodeEntities
668 */
669 return;
670 case XML_PARSER_DTD:
671 /*
672 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
673 * In the internal DTD subset, parameter-entity references
674 * can occur only where markup declarations can occur, not
675 * within markup declarations.
676 * In that case this is handled in xmlParseMarkupDecl
677 */
678 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
679 return;
680 break;
681 case XML_PARSER_IGNORE:
682 return;
683 }
684
685 NEXT;
686 name = xmlParseName(ctxt);
687 if (xmlParserDebugEntities)
688 xmlGenericError(xmlGenericErrorContext,
689 "PE Reference: %s\n", name);
690 if (name == NULL) {
691 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
693 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
694 ctxt->wellFormed = 0;
695 ctxt->disableSAX = 1;
696 } else {
697 if (RAW == ';') {
698 NEXT;
699 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
700 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
701 if (entity == NULL) {
702
703 /*
704 * [ WFC: Entity Declared ]
705 * In a document without any DTD, a document with only an
706 * internal DTD subset which contains no parameter entity
707 * references, or a document with "standalone='yes'", ...
708 * ... The declaration of a parameter entity must precede
709 * any reference to it...
710 */
711 if ((ctxt->standalone == 1) ||
712 ((ctxt->hasExternalSubset == 0) &&
713 (ctxt->hasPErefs == 0))) {
714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
715 ctxt->sax->error(ctxt->userData,
716 "PEReference: %%%s; not found\n", name);
717 ctxt->wellFormed = 0;
718 ctxt->disableSAX = 1;
719 } else {
720 /*
721 * [ VC: Entity Declared ]
722 * In a document with an external subset or external
723 * parameter entities with "standalone='no'", ...
724 * ... The declaration of a parameter entity must precede
725 * any reference to it...
726 */
727 if ((!ctxt->disableSAX) &&
728 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
729 ctxt->vctxt.error(ctxt->vctxt.userData,
730 "PEReference: %%%s; not found\n", name);
731 } else if ((!ctxt->disableSAX) &&
732 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
733 ctxt->sax->warning(ctxt->userData,
734 "PEReference: %%%s; not found\n", name);
735 ctxt->valid = 0;
736 }
737 } else {
738 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
739 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
740 /*
741 * handle the extra spaces added before and after
742 * c.f. http://www.w3.org/TR/REC-xml#as-PE
743 * this is done independantly.
744 */
745 input = xmlNewEntityInputStream(ctxt, entity);
746 xmlPushInput(ctxt, input);
747 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
748 (RAW == '<') && (NXT(1) == '?') &&
749 (NXT(2) == 'x') && (NXT(3) == 'm') &&
750 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
751 xmlParseTextDecl(ctxt);
752 }
753 if (ctxt->token == 0)
754 ctxt->token = ' ';
755 } else {
756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
757 ctxt->sax->error(ctxt->userData,
758 "xmlHandlePEReference: %s is not a parameter entity\n",
759 name);
760 ctxt->wellFormed = 0;
761 ctxt->disableSAX = 1;
762 }
763 }
764 } else {
765 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
767 ctxt->sax->error(ctxt->userData,
768 "xmlHandlePEReference: expecting ';'\n");
769 ctxt->wellFormed = 0;
770 ctxt->disableSAX = 1;
771 }
772 xmlFree(name);
773 }
774}
775
776/*
777 * Macro used to grow the current buffer.
778 */
779#define growBuffer(buffer) { \
780 buffer##_size *= 2; \
781 buffer = (xmlChar *) \
782 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
783 if (buffer == NULL) { \
784 perror("realloc failed"); \
785 return(NULL); \
786 } \
787}
788
789/**
790 * xmlStringDecodeEntities:
791 * @ctxt: the parser context
792 * @str: the input string
793 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
794 * @end: an end marker xmlChar, 0 if none
795 * @end2: an end marker xmlChar, 0 if none
796 * @end3: an end marker xmlChar, 0 if none
797 *
798 * Takes a entity string content and process to do the adequate subtitutions.
799 *
800 * [67] Reference ::= EntityRef | CharRef
801 *
802 * [69] PEReference ::= '%' Name ';'
803 *
804 * Returns A newly allocated string with the substitution done. The caller
805 * must deallocate it !
806 */
807xmlChar *
808xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
809 xmlChar end, xmlChar end2, xmlChar end3) {
810 xmlChar *buffer = NULL;
811 int buffer_size = 0;
812
813 xmlChar *current = NULL;
814 xmlEntityPtr ent;
815 int c,l;
816 int nbchars = 0;
817
818 if (str == NULL)
819 return(NULL);
820
821 if (ctxt->depth > 40) {
822 ctxt->errNo = XML_ERR_ENTITY_LOOP;
823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
824 ctxt->sax->error(ctxt->userData,
825 "Detected entity reference loop\n");
826 ctxt->wellFormed = 0;
827 ctxt->disableSAX = 1;
828 return(NULL);
829 }
830
831 /*
832 * allocate a translation buffer.
833 */
834 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
835 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
836 if (buffer == NULL) {
837 perror("xmlDecodeEntities: malloc failed");
838 return(NULL);
839 }
840
841 /*
842 * Ok loop until we reach one of the ending char or a size limit.
843 * we are operating on already parsed values.
844 */
845 c = CUR_SCHAR(str, l);
846 while ((c != 0) && (c != end) && /* non input consuming loop */
847 (c != end2) && (c != end3)) {
848
849 if (c == 0) break;
850 if ((c == '&') && (str[1] == '#')) {
851 int val = xmlParseStringCharRef(ctxt, &str);
852 if (val != 0) {
853 COPY_BUF(0,buffer,nbchars,val);
854 }
855 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
856 if (xmlParserDebugEntities)
857 xmlGenericError(xmlGenericErrorContext,
858 "String decoding Entity Reference: %.30s\n",
859 str);
860 ent = xmlParseStringEntityRef(ctxt, &str);
861 if ((ent != NULL) &&
862 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
863 if (ent->content != NULL) {
864 COPY_BUF(0,buffer,nbchars,ent->content[0]);
865 } else {
866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
867 ctxt->sax->error(ctxt->userData,
868 "internal error entity has no content\n");
869 }
870 } else if ((ent != NULL) && (ent->content != NULL)) {
871 xmlChar *rep;
872
873 ctxt->depth++;
874 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
875 0, 0, 0);
876 ctxt->depth--;
877 if (rep != NULL) {
878 current = rep;
879 while (*current != 0) { /* non input consuming loop */
880 buffer[nbchars++] = *current++;
881 if (nbchars >
882 buffer_size - XML_PARSER_BUFFER_SIZE) {
883 growBuffer(buffer);
884 }
885 }
886 xmlFree(rep);
887 }
888 } else if (ent != NULL) {
889 int i = xmlStrlen(ent->name);
890 const xmlChar *cur = ent->name;
891
892 buffer[nbchars++] = '&';
893 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
894 growBuffer(buffer);
895 }
896 for (;i > 0;i--)
897 buffer[nbchars++] = *cur++;
898 buffer[nbchars++] = ';';
899 }
900 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
901 if (xmlParserDebugEntities)
902 xmlGenericError(xmlGenericErrorContext,
903 "String decoding PE Reference: %.30s\n", str);
904 ent = xmlParseStringPEReference(ctxt, &str);
905 if (ent != NULL) {
906 xmlChar *rep;
907
908 ctxt->depth++;
909 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
910 0, 0, 0);
911 ctxt->depth--;
912 if (rep != NULL) {
913 current = rep;
914 while (*current != 0) { /* non input consuming loop */
915 buffer[nbchars++] = *current++;
916 if (nbchars >
917 buffer_size - XML_PARSER_BUFFER_SIZE) {
918 growBuffer(buffer);
919 }
920 }
921 xmlFree(rep);
922 }
923 }
924 } else {
925 COPY_BUF(l,buffer,nbchars,c);
926 str += l;
927 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
928 growBuffer(buffer);
929 }
930 }
931 c = CUR_SCHAR(str, l);
932 }
933 buffer[nbchars++] = 0;
934 return(buffer);
935}
936
937
938/************************************************************************
939 * *
940 * Commodity functions to handle xmlChars *
941 * *
942 ************************************************************************/
943
944/**
945 * xmlStrndup:
946 * @cur: the input xmlChar *
947 * @len: the len of @cur
948 *
949 * a strndup for array of xmlChar's
950 *
951 * Returns a new xmlChar * or NULL
952 */
953xmlChar *
954xmlStrndup(const xmlChar *cur, int len) {
955 xmlChar *ret;
956
957 if ((cur == NULL) || (len < 0)) return(NULL);
958 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
959 if (ret == NULL) {
960 xmlGenericError(xmlGenericErrorContext,
961 "malloc of %ld byte failed\n",
962 (len + 1) * (long)sizeof(xmlChar));
963 return(NULL);
964 }
965 memcpy(ret, cur, len * sizeof(xmlChar));
966 ret[len] = 0;
967 return(ret);
968}
969
970/**
971 * xmlStrdup:
972 * @cur: the input xmlChar *
973 *
974 * a strdup for array of xmlChar's. Since they are supposed to be
975 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
976 * a termination mark of '0'.
977 *
978 * Returns a new xmlChar * or NULL
979 */
980xmlChar *
981xmlStrdup(const xmlChar *cur) {
982 const xmlChar *p = cur;
983
984 if (cur == NULL) return(NULL);
985 while (*p != 0) p++; /* non input consuming */
986 return(xmlStrndup(cur, p - cur));
987}
988
989/**
990 * xmlCharStrndup:
991 * @cur: the input char *
992 * @len: the len of @cur
993 *
994 * a strndup for char's to xmlChar's
995 *
996 * Returns a new xmlChar * or NULL
997 */
998
999xmlChar *
1000xmlCharStrndup(const char *cur, int len) {
1001 int i;
1002 xmlChar *ret;
1003
1004 if ((cur == NULL) || (len < 0)) return(NULL);
1005 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1006 if (ret == NULL) {
1007 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1008 (len + 1) * (long)sizeof(xmlChar));
1009 return(NULL);
1010 }
1011 for (i = 0;i < len;i++)
1012 ret[i] = (xmlChar) cur[i];
1013 ret[len] = 0;
1014 return(ret);
1015}
1016
1017/**
1018 * xmlCharStrdup:
1019 * @cur: the input char *
1020 * @len: the len of @cur
1021 *
1022 * a strdup for char's to xmlChar's
1023 *
1024 * Returns a new xmlChar * or NULL
1025 */
1026
1027xmlChar *
1028xmlCharStrdup(const char *cur) {
1029 const char *p = cur;
1030
1031 if (cur == NULL) return(NULL);
1032 while (*p != '\0') p++; /* non input consuming */
1033 return(xmlCharStrndup(cur, p - cur));
1034}
1035
1036/**
1037 * xmlStrcmp:
1038 * @str1: the first xmlChar *
1039 * @str2: the second xmlChar *
1040 *
1041 * a strcmp for xmlChar's
1042 *
1043 * Returns the integer result of the comparison
1044 */
1045
1046int
1047xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1048 register int tmp;
1049
1050 if (str1 == str2) return(0);
1051 if (str1 == NULL) return(-1);
1052 if (str2 == NULL) return(1);
1053 do {
1054 tmp = *str1++ - *str2;
1055 if (tmp != 0) return(tmp);
1056 } while (*str2++ != 0);
1057 return 0;
1058}
1059
1060/**
1061 * xmlStrEqual:
1062 * @str1: the first xmlChar *
1063 * @str2: the second xmlChar *
1064 *
1065 * Check if both string are equal of have same content
1066 * Should be a bit more readable and faster than xmlStrEqual()
1067 *
1068 * Returns 1 if they are equal, 0 if they are different
1069 */
1070
1071int
1072xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1073 if (str1 == str2) return(1);
1074 if (str1 == NULL) return(0);
1075 if (str2 == NULL) return(0);
1076 do {
1077 if (*str1++ != *str2) return(0);
1078 } while (*str2++);
1079 return(1);
1080}
1081
1082/**
1083 * xmlStrncmp:
1084 * @str1: the first xmlChar *
1085 * @str2: the second xmlChar *
1086 * @len: the max comparison length
1087 *
1088 * a strncmp for xmlChar's
1089 *
1090 * Returns the integer result of the comparison
1091 */
1092
1093int
1094xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1095 register int tmp;
1096
1097 if (len <= 0) return(0);
1098 if (str1 == str2) return(0);
1099 if (str1 == NULL) return(-1);
1100 if (str2 == NULL) return(1);
1101 do {
1102 tmp = *str1++ - *str2;
1103 if (tmp != 0 || --len == 0) return(tmp);
1104 } while (*str2++ != 0);
1105 return 0;
1106}
1107
1108static xmlChar casemap[256] = {
1109 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1110 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1111 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1112 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1113 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1114 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1115 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1116 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1117 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1118 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1119 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1120 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1121 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1122 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1123 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1124 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1125 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1126 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1127 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1128 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1129 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1130 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1131 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1132 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1133 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1134 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1135 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1136 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1137 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1138 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1139 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1140 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1141};
1142
1143/**
1144 * xmlStrcasecmp:
1145 * @str1: the first xmlChar *
1146 * @str2: the second xmlChar *
1147 *
1148 * a strcasecmp for xmlChar's
1149 *
1150 * Returns the integer result of the comparison
1151 */
1152
1153int
1154xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1155 register int tmp;
1156
1157 if (str1 == str2) return(0);
1158 if (str1 == NULL) return(-1);
1159 if (str2 == NULL) return(1);
1160 do {
1161 tmp = casemap[*str1++] - casemap[*str2];
1162 if (tmp != 0) return(tmp);
1163 } while (*str2++ != 0);
1164 return 0;
1165}
1166
1167/**
1168 * xmlStrncasecmp:
1169 * @str1: the first xmlChar *
1170 * @str2: the second xmlChar *
1171 * @len: the max comparison length
1172 *
1173 * a strncasecmp for xmlChar's
1174 *
1175 * Returns the integer result of the comparison
1176 */
1177
1178int
1179xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1180 register int tmp;
1181
1182 if (len <= 0) return(0);
1183 if (str1 == str2) return(0);
1184 if (str1 == NULL) return(-1);
1185 if (str2 == NULL) return(1);
1186 do {
1187 tmp = casemap[*str1++] - casemap[*str2];
1188 if (tmp != 0 || --len == 0) return(tmp);
1189 } while (*str2++ != 0);
1190 return 0;
1191}
1192
1193/**
1194 * xmlStrchr:
1195 * @str: the xmlChar * array
1196 * @val: the xmlChar to search
1197 *
1198 * a strchr for xmlChar's
1199 *
1200 * Returns the xmlChar * for the first occurence or NULL.
1201 */
1202
1203const xmlChar *
1204xmlStrchr(const xmlChar *str, xmlChar val) {
1205 if (str == NULL) return(NULL);
1206 while (*str != 0) { /* non input consuming */
1207 if (*str == val) return((xmlChar *) str);
1208 str++;
1209 }
1210 return(NULL);
1211}
1212
1213/**
1214 * xmlStrstr:
1215 * @str: the xmlChar * array (haystack)
1216 * @val: the xmlChar to search (needle)
1217 *
1218 * a strstr for xmlChar's
1219 *
1220 * Returns the xmlChar * for the first occurence or NULL.
1221 */
1222
1223const xmlChar *
1224xmlStrstr(const xmlChar *str, xmlChar *val) {
1225 int n;
1226
1227 if (str == NULL) return(NULL);
1228 if (val == NULL) return(NULL);
1229 n = xmlStrlen(val);
1230
1231 if (n == 0) return(str);
1232 while (*str != 0) { /* non input consuming */
1233 if (*str == *val) {
1234 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1235 }
1236 str++;
1237 }
1238 return(NULL);
1239}
1240
1241/**
1242 * xmlStrcasestr:
1243 * @str: the xmlChar * array (haystack)
1244 * @val: the xmlChar to search (needle)
1245 *
1246 * a case-ignoring strstr for xmlChar's
1247 *
1248 * Returns the xmlChar * for the first occurence or NULL.
1249 */
1250
1251const xmlChar *
1252xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1253 int n;
1254
1255 if (str == NULL) return(NULL);
1256 if (val == NULL) return(NULL);
1257 n = xmlStrlen(val);
1258
1259 if (n == 0) return(str);
1260 while (*str != 0) { /* non input consuming */
1261 if (casemap[*str] == casemap[*val])
1262 if (!xmlStrncasecmp(str, val, n)) return(str);
1263 str++;
1264 }
1265 return(NULL);
1266}
1267
1268/**
1269 * xmlStrsub:
1270 * @str: the xmlChar * array (haystack)
1271 * @start: the index of the first char (zero based)
1272 * @len: the length of the substring
1273 *
1274 * Extract a substring of a given string
1275 *
1276 * Returns the xmlChar * for the first occurence or NULL.
1277 */
1278
1279xmlChar *
1280xmlStrsub(const xmlChar *str, int start, int len) {
1281 int i;
1282
1283 if (str == NULL) return(NULL);
1284 if (start < 0) return(NULL);
1285 if (len < 0) return(NULL);
1286
1287 for (i = 0;i < start;i++) {
1288 if (*str == 0) return(NULL);
1289 str++;
1290 }
1291 if (*str == 0) return(NULL);
1292 return(xmlStrndup(str, len));
1293}
1294
1295/**
1296 * xmlStrlen:
1297 * @str: the xmlChar * array
1298 *
1299 * length of a xmlChar's string
1300 *
1301 * Returns the number of xmlChar contained in the ARRAY.
1302 */
1303
1304int
1305xmlStrlen(const xmlChar *str) {
1306 int len = 0;
1307
1308 if (str == NULL) return(0);
1309 while (*str != 0) { /* non input consuming */
1310 str++;
1311 len++;
1312 }
1313 return(len);
1314}
1315
1316/**
1317 * xmlStrncat:
1318 * @cur: the original xmlChar * array
1319 * @add: the xmlChar * array added
1320 * @len: the length of @add
1321 *
1322 * a strncat for array of xmlChar's, it will extend cur with the len
1323 * first bytes of @add.
1324 *
1325 * Returns a new xmlChar *, the original @cur is reallocated if needed
1326 * and should not be freed
1327 */
1328
1329xmlChar *
1330xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1331 int size;
1332 xmlChar *ret;
1333
1334 if ((add == NULL) || (len == 0))
1335 return(cur);
1336 if (cur == NULL)
1337 return(xmlStrndup(add, len));
1338
1339 size = xmlStrlen(cur);
1340 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1341 if (ret == NULL) {
1342 xmlGenericError(xmlGenericErrorContext,
1343 "xmlStrncat: realloc of %ld byte failed\n",
1344 (size + len + 1) * (long)sizeof(xmlChar));
1345 return(cur);
1346 }
1347 memcpy(&ret[size], add, len * sizeof(xmlChar));
1348 ret[size + len] = 0;
1349 return(ret);
1350}
1351
1352/**
1353 * xmlStrcat:
1354 * @cur: the original xmlChar * array
1355 * @add: the xmlChar * array added
1356 *
1357 * a strcat for array of xmlChar's. Since they are supposed to be
1358 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1359 * a termination mark of '0'.
1360 *
1361 * Returns a new xmlChar * containing the concatenated string.
1362 */
1363xmlChar *
1364xmlStrcat(xmlChar *cur, const xmlChar *add) {
1365 const xmlChar *p = add;
1366
1367 if (add == NULL) return(cur);
1368 if (cur == NULL)
1369 return(xmlStrdup(add));
1370
1371 while (*p != 0) p++; /* non input consuming */
1372 return(xmlStrncat(cur, add, p - add));
1373}
1374
1375/************************************************************************
1376 * *
1377 * Commodity functions, cleanup needed ? *
1378 * *
1379 ************************************************************************/
1380
1381/**
1382 * areBlanks:
1383 * @ctxt: an XML parser context
1384 * @str: a xmlChar *
1385 * @len: the size of @str
1386 *
1387 * Is this a sequence of blank chars that one can ignore ?
1388 *
1389 * Returns 1 if ignorable 0 otherwise.
1390 */
1391
1392static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1393 int i, ret;
1394 xmlNodePtr lastChild;
1395
Daniel Veillard2f362242001-03-02 17:36:21 +00001396 if (ctxt->keepBlanks)
1397 return(0);
1398
Owen Taylor3473f882001-02-23 17:55:21 +00001399 /*
1400 * Check for xml:space value.
1401 */
1402 if (*(ctxt->space) == 1)
1403 return(0);
1404
1405 /*
1406 * Check that the string is made of blanks
1407 */
1408 for (i = 0;i < len;i++)
1409 if (!(IS_BLANK(str[i]))) return(0);
1410
1411 /*
1412 * Look if the element is mixed content in the Dtd if available
1413 */
1414 if (ctxt->myDoc != NULL) {
1415 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1416 if (ret == 0) return(1);
1417 if (ret == 1) return(0);
1418 }
1419
1420 /*
1421 * Otherwise, heuristic :-\
1422 */
Owen Taylor3473f882001-02-23 17:55:21 +00001423 if (RAW != '<') return(0);
1424 if (ctxt->node == NULL) return(0);
1425 if ((ctxt->node->children == NULL) &&
1426 (RAW == '<') && (NXT(1) == '/')) return(0);
1427
1428 lastChild = xmlGetLastChild(ctxt->node);
1429 if (lastChild == NULL) {
1430 if (ctxt->node->content != NULL) return(0);
1431 } else if (xmlNodeIsText(lastChild))
1432 return(0);
1433 else if ((ctxt->node->children != NULL) &&
1434 (xmlNodeIsText(ctxt->node->children)))
1435 return(0);
1436 return(1);
1437}
1438
1439/*
1440 * Forward definition for recusive behaviour.
1441 */
1442void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1443void xmlParseReference(xmlParserCtxtPtr ctxt);
1444
1445/************************************************************************
1446 * *
1447 * Extra stuff for namespace support *
1448 * Relates to http://www.w3.org/TR/WD-xml-names *
1449 * *
1450 ************************************************************************/
1451
1452/**
1453 * xmlSplitQName:
1454 * @ctxt: an XML parser context
1455 * @name: an XML parser context
1456 * @prefix: a xmlChar **
1457 *
1458 * parse an UTF8 encoded XML qualified name string
1459 *
1460 * [NS 5] QName ::= (Prefix ':')? LocalPart
1461 *
1462 * [NS 6] Prefix ::= NCName
1463 *
1464 * [NS 7] LocalPart ::= NCName
1465 *
1466 * Returns the local part, and prefix is updated
1467 * to get the Prefix if any.
1468 */
1469
1470xmlChar *
1471xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1472 xmlChar buf[XML_MAX_NAMELEN + 5];
1473 xmlChar *buffer = NULL;
1474 int len = 0;
1475 int max = XML_MAX_NAMELEN;
1476 xmlChar *ret = NULL;
1477 const xmlChar *cur = name;
1478 int c;
1479
1480 *prefix = NULL;
1481
1482#ifndef XML_XML_NAMESPACE
1483 /* xml: prefix is not really a namespace */
1484 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1485 (cur[2] == 'l') && (cur[3] == ':'))
1486 return(xmlStrdup(name));
1487#endif
1488
1489 /* nasty but valid */
1490 if (cur[0] == ':')
1491 return(xmlStrdup(name));
1492
1493 c = *cur++;
1494 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1495 buf[len++] = c;
1496 c = *cur++;
1497 }
1498 if (len >= max) {
1499 /*
1500 * Okay someone managed to make a huge name, so he's ready to pay
1501 * for the processing speed.
1502 */
1503 max = len * 2;
1504
1505 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1506 if (buffer == NULL) {
1507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1508 ctxt->sax->error(ctxt->userData,
1509 "xmlSplitQName: out of memory\n");
1510 return(NULL);
1511 }
1512 memcpy(buffer, buf, len);
1513 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1514 if (len + 10 > max) {
1515 max *= 2;
1516 buffer = (xmlChar *) xmlRealloc(buffer,
1517 max * sizeof(xmlChar));
1518 if (buffer == NULL) {
1519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1520 ctxt->sax->error(ctxt->userData,
1521 "xmlSplitQName: out of memory\n");
1522 return(NULL);
1523 }
1524 }
1525 buffer[len++] = c;
1526 c = *cur++;
1527 }
1528 buffer[len] = 0;
1529 }
1530
1531 if (buffer == NULL)
1532 ret = xmlStrndup(buf, len);
1533 else {
1534 ret = buffer;
1535 buffer = NULL;
1536 max = XML_MAX_NAMELEN;
1537 }
1538
1539
1540 if (c == ':') {
1541 c = *cur++;
1542 if (c == 0) return(ret);
1543 *prefix = ret;
1544 len = 0;
1545
1546 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1547 buf[len++] = c;
1548 c = *cur++;
1549 }
1550 if (len >= max) {
1551 /*
1552 * Okay someone managed to make a huge name, so he's ready to pay
1553 * for the processing speed.
1554 */
1555 max = len * 2;
1556
1557 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1558 if (buffer == NULL) {
1559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1560 ctxt->sax->error(ctxt->userData,
1561 "xmlSplitQName: out of memory\n");
1562 return(NULL);
1563 }
1564 memcpy(buffer, buf, len);
1565 while (c != 0) { /* tested bigname2.xml */
1566 if (len + 10 > max) {
1567 max *= 2;
1568 buffer = (xmlChar *) xmlRealloc(buffer,
1569 max * sizeof(xmlChar));
1570 if (buffer == NULL) {
1571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1572 ctxt->sax->error(ctxt->userData,
1573 "xmlSplitQName: out of memory\n");
1574 return(NULL);
1575 }
1576 }
1577 buffer[len++] = c;
1578 c = *cur++;
1579 }
1580 buffer[len] = 0;
1581 }
1582
1583 if (buffer == NULL)
1584 ret = xmlStrndup(buf, len);
1585 else {
1586 ret = buffer;
1587 }
1588 }
1589
1590 return(ret);
1591}
1592
1593/************************************************************************
1594 * *
1595 * The parser itself *
1596 * Relates to http://www.w3.org/TR/REC-xml *
1597 * *
1598 ************************************************************************/
1599
Daniel Veillard21a0f912001-02-25 19:54:14 +00001600xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001601/**
1602 * xmlParseName:
1603 * @ctxt: an XML parser context
1604 *
1605 * parse an XML name.
1606 *
1607 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1608 * CombiningChar | Extender
1609 *
1610 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1611 *
1612 * [6] Names ::= Name (S Name)*
1613 *
1614 * Returns the Name parsed or NULL
1615 */
1616
1617xmlChar *
1618xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001619 const xmlChar *in;
1620 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001621 int count = 0;
1622
1623 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001624
1625 /*
1626 * Accelerator for simple ASCII names
1627 */
1628 in = ctxt->input->cur;
1629 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1630 ((*in >= 0x41) && (*in <= 0x5A)) ||
1631 (*in == '_') || (*in == ':')) {
1632 in++;
1633 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1634 ((*in >= 0x41) && (*in <= 0x5A)) ||
1635 ((*in >= 0x30) && (*in <= 0x39)) ||
1636 (*in == '_') || (*in == ':'))
1637 in++;
1638 if ((*in == ' ') || (*in == '>') || (*in == '/')) {
1639 count = in - ctxt->input->cur;
1640 ret = xmlStrndup(ctxt->input->cur, count);
1641 ctxt->input->cur = in;
1642 return(ret);
1643 }
1644 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001645 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001646}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001647
Daniel Veillard21a0f912001-02-25 19:54:14 +00001648xmlChar *
1649xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1650 xmlChar buf[XML_MAX_NAMELEN + 5];
1651 int len = 0, l;
1652 int c;
1653 int count = 0;
1654
1655 /*
1656 * Handler for more complex cases
1657 */
1658 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001659 c = CUR_CHAR(l);
1660 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1661 (!IS_LETTER(c) && (c != '_') &&
1662 (c != ':'))) {
1663 return(NULL);
1664 }
1665
1666 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1667 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1668 (c == '.') || (c == '-') ||
1669 (c == '_') || (c == ':') ||
1670 (IS_COMBINING(c)) ||
1671 (IS_EXTENDER(c)))) {
1672 if (count++ > 100) {
1673 count = 0;
1674 GROW;
1675 }
1676 COPY_BUF(l,buf,len,c);
1677 NEXTL(l);
1678 c = CUR_CHAR(l);
1679 if (len >= XML_MAX_NAMELEN) {
1680 /*
1681 * Okay someone managed to make a huge name, so he's ready to pay
1682 * for the processing speed.
1683 */
1684 xmlChar *buffer;
1685 int max = len * 2;
1686
1687 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1688 if (buffer == NULL) {
1689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1690 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001691 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001692 return(NULL);
1693 }
1694 memcpy(buffer, buf, len);
1695 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1696 (c == '.') || (c == '-') ||
1697 (c == '_') || (c == ':') ||
1698 (IS_COMBINING(c)) ||
1699 (IS_EXTENDER(c))) {
1700 if (count++ > 100) {
1701 count = 0;
1702 GROW;
1703 }
1704 if (len + 10 > max) {
1705 max *= 2;
1706 buffer = (xmlChar *) xmlRealloc(buffer,
1707 max * sizeof(xmlChar));
1708 if (buffer == NULL) {
1709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1710 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001711 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001712 return(NULL);
1713 }
1714 }
1715 COPY_BUF(l,buffer,len,c);
1716 NEXTL(l);
1717 c = CUR_CHAR(l);
1718 }
1719 buffer[len] = 0;
1720 return(buffer);
1721 }
1722 }
1723 return(xmlStrndup(buf, len));
1724}
1725
1726/**
1727 * xmlParseStringName:
1728 * @ctxt: an XML parser context
1729 * @str: a pointer to the string pointer (IN/OUT)
1730 *
1731 * parse an XML name.
1732 *
1733 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1734 * CombiningChar | Extender
1735 *
1736 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1737 *
1738 * [6] Names ::= Name (S Name)*
1739 *
1740 * Returns the Name parsed or NULL. The str pointer
1741 * is updated to the current location in the string.
1742 */
1743
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001744static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001745xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1746 xmlChar buf[XML_MAX_NAMELEN + 5];
1747 const xmlChar *cur = *str;
1748 int len = 0, l;
1749 int c;
1750
1751 c = CUR_SCHAR(cur, l);
1752 if (!IS_LETTER(c) && (c != '_') &&
1753 (c != ':')) {
1754 return(NULL);
1755 }
1756
1757 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1758 (c == '.') || (c == '-') ||
1759 (c == '_') || (c == ':') ||
1760 (IS_COMBINING(c)) ||
1761 (IS_EXTENDER(c))) {
1762 COPY_BUF(l,buf,len,c);
1763 cur += l;
1764 c = CUR_SCHAR(cur, l);
1765 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1766 /*
1767 * Okay someone managed to make a huge name, so he's ready to pay
1768 * for the processing speed.
1769 */
1770 xmlChar *buffer;
1771 int max = len * 2;
1772
1773 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1774 if (buffer == NULL) {
1775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1776 ctxt->sax->error(ctxt->userData,
1777 "xmlParseStringName: out of memory\n");
1778 return(NULL);
1779 }
1780 memcpy(buffer, buf, len);
1781 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1782 (c == '.') || (c == '-') ||
1783 (c == '_') || (c == ':') ||
1784 (IS_COMBINING(c)) ||
1785 (IS_EXTENDER(c))) {
1786 if (len + 10 > max) {
1787 max *= 2;
1788 buffer = (xmlChar *) xmlRealloc(buffer,
1789 max * sizeof(xmlChar));
1790 if (buffer == NULL) {
1791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1792 ctxt->sax->error(ctxt->userData,
1793 "xmlParseStringName: out of memory\n");
1794 return(NULL);
1795 }
1796 }
1797 COPY_BUF(l,buffer,len,c);
1798 cur += l;
1799 c = CUR_SCHAR(cur, l);
1800 }
1801 buffer[len] = 0;
1802 *str = cur;
1803 return(buffer);
1804 }
1805 }
1806 *str = cur;
1807 return(xmlStrndup(buf, len));
1808}
1809
1810/**
1811 * xmlParseNmtoken:
1812 * @ctxt: an XML parser context
1813 *
1814 * parse an XML Nmtoken.
1815 *
1816 * [7] Nmtoken ::= (NameChar)+
1817 *
1818 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1819 *
1820 * Returns the Nmtoken parsed or NULL
1821 */
1822
1823xmlChar *
1824xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1825 xmlChar buf[XML_MAX_NAMELEN + 5];
1826 int len = 0, l;
1827 int c;
1828 int count = 0;
1829
1830 GROW;
1831 c = CUR_CHAR(l);
1832
1833 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1834 (c == '.') || (c == '-') ||
1835 (c == '_') || (c == ':') ||
1836 (IS_COMBINING(c)) ||
1837 (IS_EXTENDER(c))) {
1838 if (count++ > 100) {
1839 count = 0;
1840 GROW;
1841 }
1842 COPY_BUF(l,buf,len,c);
1843 NEXTL(l);
1844 c = CUR_CHAR(l);
1845 if (len >= XML_MAX_NAMELEN) {
1846 /*
1847 * Okay someone managed to make a huge token, so he's ready to pay
1848 * for the processing speed.
1849 */
1850 xmlChar *buffer;
1851 int max = len * 2;
1852
1853 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1854 if (buffer == NULL) {
1855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1856 ctxt->sax->error(ctxt->userData,
1857 "xmlParseNmtoken: out of memory\n");
1858 return(NULL);
1859 }
1860 memcpy(buffer, buf, len);
1861 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1862 (c == '.') || (c == '-') ||
1863 (c == '_') || (c == ':') ||
1864 (IS_COMBINING(c)) ||
1865 (IS_EXTENDER(c))) {
1866 if (count++ > 100) {
1867 count = 0;
1868 GROW;
1869 }
1870 if (len + 10 > max) {
1871 max *= 2;
1872 buffer = (xmlChar *) xmlRealloc(buffer,
1873 max * sizeof(xmlChar));
1874 if (buffer == NULL) {
1875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1876 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001877 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001878 return(NULL);
1879 }
1880 }
1881 COPY_BUF(l,buffer,len,c);
1882 NEXTL(l);
1883 c = CUR_CHAR(l);
1884 }
1885 buffer[len] = 0;
1886 return(buffer);
1887 }
1888 }
1889 if (len == 0)
1890 return(NULL);
1891 return(xmlStrndup(buf, len));
1892}
1893
1894/**
1895 * xmlParseEntityValue:
1896 * @ctxt: an XML parser context
1897 * @orig: if non-NULL store a copy of the original entity value
1898 *
1899 * parse a value for ENTITY declarations
1900 *
1901 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1902 * "'" ([^%&'] | PEReference | Reference)* "'"
1903 *
1904 * Returns the EntityValue parsed with reference substitued or NULL
1905 */
1906
1907xmlChar *
1908xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1909 xmlChar *buf = NULL;
1910 int len = 0;
1911 int size = XML_PARSER_BUFFER_SIZE;
1912 int c, l;
1913 xmlChar stop;
1914 xmlChar *ret = NULL;
1915 const xmlChar *cur = NULL;
1916 xmlParserInputPtr input;
1917
1918 if (RAW == '"') stop = '"';
1919 else if (RAW == '\'') stop = '\'';
1920 else {
1921 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1923 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1924 ctxt->wellFormed = 0;
1925 ctxt->disableSAX = 1;
1926 return(NULL);
1927 }
1928 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1929 if (buf == NULL) {
1930 xmlGenericError(xmlGenericErrorContext,
1931 "malloc of %d byte failed\n", size);
1932 return(NULL);
1933 }
1934
1935 /*
1936 * The content of the entity definition is copied in a buffer.
1937 */
1938
1939 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1940 input = ctxt->input;
1941 GROW;
1942 NEXT;
1943 c = CUR_CHAR(l);
1944 /*
1945 * NOTE: 4.4.5 Included in Literal
1946 * When a parameter entity reference appears in a literal entity
1947 * value, ... a single or double quote character in the replacement
1948 * text is always treated as a normal data character and will not
1949 * terminate the literal.
1950 * In practice it means we stop the loop only when back at parsing
1951 * the initial entity and the quote is found
1952 */
1953 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1954 (ctxt->input != input))) {
1955 if (len + 5 >= size) {
1956 size *= 2;
1957 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1958 if (buf == NULL) {
1959 xmlGenericError(xmlGenericErrorContext,
1960 "realloc of %d byte failed\n", size);
1961 return(NULL);
1962 }
1963 }
1964 COPY_BUF(l,buf,len,c);
1965 NEXTL(l);
1966 /*
1967 * Pop-up of finished entities.
1968 */
1969 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1970 xmlPopInput(ctxt);
1971
1972 GROW;
1973 c = CUR_CHAR(l);
1974 if (c == 0) {
1975 GROW;
1976 c = CUR_CHAR(l);
1977 }
1978 }
1979 buf[len] = 0;
1980
1981 /*
1982 * Raise problem w.r.t. '&' and '%' being used in non-entities
1983 * reference constructs. Note Charref will be handled in
1984 * xmlStringDecodeEntities()
1985 */
1986 cur = buf;
1987 while (*cur != 0) { /* non input consuming */
1988 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1989 xmlChar *name;
1990 xmlChar tmp = *cur;
1991
1992 cur++;
1993 name = xmlParseStringName(ctxt, &cur);
1994 if ((name == NULL) || (*cur != ';')) {
1995 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1997 ctxt->sax->error(ctxt->userData,
1998 "EntityValue: '%c' forbidden except for entities references\n",
1999 tmp);
2000 ctxt->wellFormed = 0;
2001 ctxt->disableSAX = 1;
2002 }
2003 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2004 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2006 ctxt->sax->error(ctxt->userData,
2007 "EntityValue: PEReferences forbidden in internal subset\n",
2008 tmp);
2009 ctxt->wellFormed = 0;
2010 ctxt->disableSAX = 1;
2011 }
2012 if (name != NULL)
2013 xmlFree(name);
2014 }
2015 cur++;
2016 }
2017
2018 /*
2019 * Then PEReference entities are substituted.
2020 */
2021 if (c != stop) {
2022 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2024 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2025 ctxt->wellFormed = 0;
2026 ctxt->disableSAX = 1;
2027 xmlFree(buf);
2028 } else {
2029 NEXT;
2030 /*
2031 * NOTE: 4.4.7 Bypassed
2032 * When a general entity reference appears in the EntityValue in
2033 * an entity declaration, it is bypassed and left as is.
2034 * so XML_SUBSTITUTE_REF is not set here.
2035 */
2036 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2037 0, 0, 0);
2038 if (orig != NULL)
2039 *orig = buf;
2040 else
2041 xmlFree(buf);
2042 }
2043
2044 return(ret);
2045}
2046
2047/**
2048 * xmlParseAttValue:
2049 * @ctxt: an XML parser context
2050 *
2051 * parse a value for an attribute
2052 * Note: the parser won't do substitution of entities here, this
2053 * will be handled later in xmlStringGetNodeList
2054 *
2055 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2056 * "'" ([^<&'] | Reference)* "'"
2057 *
2058 * 3.3.3 Attribute-Value Normalization:
2059 * Before the value of an attribute is passed to the application or
2060 * checked for validity, the XML processor must normalize it as follows:
2061 * - a character reference is processed by appending the referenced
2062 * character to the attribute value
2063 * - an entity reference is processed by recursively processing the
2064 * replacement text of the entity
2065 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2066 * appending #x20 to the normalized value, except that only a single
2067 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2068 * parsed entity or the literal entity value of an internal parsed entity
2069 * - other characters are processed by appending them to the normalized value
2070 * If the declared value is not CDATA, then the XML processor must further
2071 * process the normalized attribute value by discarding any leading and
2072 * trailing space (#x20) characters, and by replacing sequences of space
2073 * (#x20) characters by a single space (#x20) character.
2074 * All attributes for which no declaration has been read should be treated
2075 * by a non-validating parser as if declared CDATA.
2076 *
2077 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2078 */
2079
2080xmlChar *
2081xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2082 xmlChar limit = 0;
2083 xmlChar *buf = NULL;
2084 int len = 0;
2085 int buf_size = 0;
2086 int c, l;
2087 xmlChar *current = NULL;
2088 xmlEntityPtr ent;
2089
2090
2091 SHRINK;
2092 if (NXT(0) == '"') {
2093 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2094 limit = '"';
2095 NEXT;
2096 } else if (NXT(0) == '\'') {
2097 limit = '\'';
2098 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2099 NEXT;
2100 } else {
2101 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2103 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2104 ctxt->wellFormed = 0;
2105 ctxt->disableSAX = 1;
2106 return(NULL);
2107 }
2108
2109 /*
2110 * allocate a translation buffer.
2111 */
2112 buf_size = XML_PARSER_BUFFER_SIZE;
2113 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2114 if (buf == NULL) {
2115 perror("xmlParseAttValue: malloc failed");
2116 return(NULL);
2117 }
2118
2119 /*
2120 * Ok loop until we reach one of the ending char or a size limit.
2121 */
2122 c = CUR_CHAR(l);
2123 while (((NXT(0) != limit) && /* checked */
2124 (c != '<')) || (ctxt->token != 0)) {
2125 if (c == 0) break;
2126 if (ctxt->token == '&') {
2127 /*
2128 * The reparsing will be done in xmlStringGetNodeList()
2129 * called by the attribute() function in SAX.c
2130 */
2131 static xmlChar buffer[6] = "&#38;";
2132
2133 if (len > buf_size - 10) {
2134 growBuffer(buf);
2135 }
2136 current = &buffer[0];
2137 while (*current != 0) { /* non input consuming */
2138 buf[len++] = *current++;
2139 }
2140 ctxt->token = 0;
2141 } else if (c == '&') {
2142 if (NXT(1) == '#') {
2143 int val = xmlParseCharRef(ctxt);
2144 if (val == '&') {
2145 /*
2146 * The reparsing will be done in xmlStringGetNodeList()
2147 * called by the attribute() function in SAX.c
2148 */
2149 static xmlChar buffer[6] = "&#38;";
2150
2151 if (len > buf_size - 10) {
2152 growBuffer(buf);
2153 }
2154 current = &buffer[0];
2155 while (*current != 0) { /* non input consuming */
2156 buf[len++] = *current++;
2157 }
2158 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002159 if (len > buf_size - 10) {
2160 growBuffer(buf);
2161 }
Owen Taylor3473f882001-02-23 17:55:21 +00002162 len += xmlCopyChar(0, &buf[len], val);
2163 }
2164 } else {
2165 ent = xmlParseEntityRef(ctxt);
2166 if ((ent != NULL) &&
2167 (ctxt->replaceEntities != 0)) {
2168 xmlChar *rep;
2169
2170 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2171 rep = xmlStringDecodeEntities(ctxt, ent->content,
2172 XML_SUBSTITUTE_REF, 0, 0, 0);
2173 if (rep != NULL) {
2174 current = rep;
2175 while (*current != 0) { /* non input consuming */
2176 buf[len++] = *current++;
2177 if (len > buf_size - 10) {
2178 growBuffer(buf);
2179 }
2180 }
2181 xmlFree(rep);
2182 }
2183 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002184 if (len > buf_size - 10) {
2185 growBuffer(buf);
2186 }
Owen Taylor3473f882001-02-23 17:55:21 +00002187 if (ent->content != NULL)
2188 buf[len++] = ent->content[0];
2189 }
2190 } else if (ent != NULL) {
2191 int i = xmlStrlen(ent->name);
2192 const xmlChar *cur = ent->name;
2193
2194 /*
2195 * This may look absurd but is needed to detect
2196 * entities problems
2197 */
2198 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2199 (ent->content != NULL)) {
2200 xmlChar *rep;
2201 rep = xmlStringDecodeEntities(ctxt, ent->content,
2202 XML_SUBSTITUTE_REF, 0, 0, 0);
2203 if (rep != NULL)
2204 xmlFree(rep);
2205 }
2206
2207 /*
2208 * Just output the reference
2209 */
2210 buf[len++] = '&';
2211 if (len > buf_size - i - 10) {
2212 growBuffer(buf);
2213 }
2214 for (;i > 0;i--)
2215 buf[len++] = *cur++;
2216 buf[len++] = ';';
2217 }
2218 }
2219 } else {
2220 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2221 COPY_BUF(l,buf,len,0x20);
2222 if (len > buf_size - 10) {
2223 growBuffer(buf);
2224 }
2225 } else {
2226 COPY_BUF(l,buf,len,c);
2227 if (len > buf_size - 10) {
2228 growBuffer(buf);
2229 }
2230 }
2231 NEXTL(l);
2232 }
2233 GROW;
2234 c = CUR_CHAR(l);
2235 }
2236 buf[len++] = 0;
2237 if (RAW == '<') {
2238 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2240 ctxt->sax->error(ctxt->userData,
2241 "Unescaped '<' not allowed in attributes values\n");
2242 ctxt->wellFormed = 0;
2243 ctxt->disableSAX = 1;
2244 } else if (RAW != limit) {
2245 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2247 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2248 ctxt->wellFormed = 0;
2249 ctxt->disableSAX = 1;
2250 } else
2251 NEXT;
2252 return(buf);
2253}
2254
2255/**
2256 * xmlParseSystemLiteral:
2257 * @ctxt: an XML parser context
2258 *
2259 * parse an XML Literal
2260 *
2261 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2262 *
2263 * Returns the SystemLiteral parsed or NULL
2264 */
2265
2266xmlChar *
2267xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2268 xmlChar *buf = NULL;
2269 int len = 0;
2270 int size = XML_PARSER_BUFFER_SIZE;
2271 int cur, l;
2272 xmlChar stop;
2273 int state = ctxt->instate;
2274 int count = 0;
2275
2276 SHRINK;
2277 if (RAW == '"') {
2278 NEXT;
2279 stop = '"';
2280 } else if (RAW == '\'') {
2281 NEXT;
2282 stop = '\'';
2283 } else {
2284 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2286 ctxt->sax->error(ctxt->userData,
2287 "SystemLiteral \" or ' expected\n");
2288 ctxt->wellFormed = 0;
2289 ctxt->disableSAX = 1;
2290 return(NULL);
2291 }
2292
2293 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2294 if (buf == NULL) {
2295 xmlGenericError(xmlGenericErrorContext,
2296 "malloc of %d byte failed\n", size);
2297 return(NULL);
2298 }
2299 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2300 cur = CUR_CHAR(l);
2301 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2302 if (len + 5 >= size) {
2303 size *= 2;
2304 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2305 if (buf == NULL) {
2306 xmlGenericError(xmlGenericErrorContext,
2307 "realloc of %d byte failed\n", size);
2308 ctxt->instate = (xmlParserInputState) state;
2309 return(NULL);
2310 }
2311 }
2312 count++;
2313 if (count > 50) {
2314 GROW;
2315 count = 0;
2316 }
2317 COPY_BUF(l,buf,len,cur);
2318 NEXTL(l);
2319 cur = CUR_CHAR(l);
2320 if (cur == 0) {
2321 GROW;
2322 SHRINK;
2323 cur = CUR_CHAR(l);
2324 }
2325 }
2326 buf[len] = 0;
2327 ctxt->instate = (xmlParserInputState) state;
2328 if (!IS_CHAR(cur)) {
2329 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2331 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2332 ctxt->wellFormed = 0;
2333 ctxt->disableSAX = 1;
2334 } else {
2335 NEXT;
2336 }
2337 return(buf);
2338}
2339
2340/**
2341 * xmlParsePubidLiteral:
2342 * @ctxt: an XML parser context
2343 *
2344 * parse an XML public literal
2345 *
2346 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2347 *
2348 * Returns the PubidLiteral parsed or NULL.
2349 */
2350
2351xmlChar *
2352xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2353 xmlChar *buf = NULL;
2354 int len = 0;
2355 int size = XML_PARSER_BUFFER_SIZE;
2356 xmlChar cur;
2357 xmlChar stop;
2358 int count = 0;
2359
2360 SHRINK;
2361 if (RAW == '"') {
2362 NEXT;
2363 stop = '"';
2364 } else if (RAW == '\'') {
2365 NEXT;
2366 stop = '\'';
2367 } else {
2368 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2370 ctxt->sax->error(ctxt->userData,
2371 "SystemLiteral \" or ' expected\n");
2372 ctxt->wellFormed = 0;
2373 ctxt->disableSAX = 1;
2374 return(NULL);
2375 }
2376 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2377 if (buf == NULL) {
2378 xmlGenericError(xmlGenericErrorContext,
2379 "malloc of %d byte failed\n", size);
2380 return(NULL);
2381 }
2382 cur = CUR;
2383 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2384 if (len + 1 >= size) {
2385 size *= 2;
2386 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2387 if (buf == NULL) {
2388 xmlGenericError(xmlGenericErrorContext,
2389 "realloc of %d byte failed\n", size);
2390 return(NULL);
2391 }
2392 }
2393 buf[len++] = cur;
2394 count++;
2395 if (count > 50) {
2396 GROW;
2397 count = 0;
2398 }
2399 NEXT;
2400 cur = CUR;
2401 if (cur == 0) {
2402 GROW;
2403 SHRINK;
2404 cur = CUR;
2405 }
2406 }
2407 buf[len] = 0;
2408 if (cur != stop) {
2409 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2411 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2412 ctxt->wellFormed = 0;
2413 ctxt->disableSAX = 1;
2414 } else {
2415 NEXT;
2416 }
2417 return(buf);
2418}
2419
Daniel Veillard48b2f892001-02-25 16:11:03 +00002420void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002421/**
2422 * xmlParseCharData:
2423 * @ctxt: an XML parser context
2424 * @cdata: int indicating whether we are within a CDATA section
2425 *
2426 * parse a CharData section.
2427 * if we are within a CDATA section ']]>' marks an end of section.
2428 *
2429 * The right angle bracket (>) may be represented using the string "&gt;",
2430 * and must, for compatibility, be escaped using "&gt;" or a character
2431 * reference when it appears in the string "]]>" in content, when that
2432 * string is not marking the end of a CDATA section.
2433 *
2434 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2435 */
2436
2437void
2438xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002439 const xmlChar *in;
2440 int nbchar = 0;
2441
2442 SHRINK;
2443 GROW;
2444 /*
2445 * Accelerated common case where input don't need to be
2446 * modified before passing it to the handler.
2447 */
2448 if ((ctxt->token == 0) && (!cdata)) {
2449 in = ctxt->input->cur;
2450 do {
2451 while (((*in >= 0x20) && (*in != '<') &&
2452 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2453 in++;
2454 if (*in == 0xA) {
2455 ctxt->input->line++;
2456 continue; /* while */
2457 }
2458 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002459 if (nbchar > 0) {
2460 if (IS_BLANK(*ctxt->input->cur) &&
2461 areBlanks(ctxt, ctxt->input->cur, nbchar)) {
2462 if (ctxt->sax->ignorableWhitespace != NULL)
2463 ctxt->sax->ignorableWhitespace(ctxt->userData,
2464 ctxt->input->cur, nbchar);
2465 } else {
2466 if (ctxt->sax->characters != NULL)
2467 ctxt->sax->characters(ctxt->userData,
2468 ctxt->input->cur, nbchar);
2469 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002470 }
2471 ctxt->input->cur = in;
2472 if (*in == 0xD) {
2473 in++;
2474 if (*in == 0xA) {
2475 ctxt->input->cur = in;
2476 in++;
2477 ctxt->input->line++;
2478 continue; /* while */
2479 }
2480 in--;
2481 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002482 if (*in == '<') {
2483 return;
2484 }
2485 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002486 return;
2487 }
2488 SHRINK;
2489 GROW;
2490 in = ctxt->input->cur;
2491 } while ((*in >= 0x20) && (*in <= 0x7F));
2492 nbchar = 0;
2493 }
2494 xmlParseCharDataComplex(ctxt, cdata);
2495}
2496
2497void
2498xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002499 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2500 int nbchar = 0;
2501 int cur, l;
2502 int count = 0;
2503
2504 SHRINK;
2505 GROW;
2506 cur = CUR_CHAR(l);
2507 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2508 ((cur != '&') || (ctxt->token == '&')) &&
2509 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2510 if ((cur == ']') && (NXT(1) == ']') &&
2511 (NXT(2) == '>')) {
2512 if (cdata) break;
2513 else {
2514 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2516 ctxt->sax->error(ctxt->userData,
2517 "Sequence ']]>' not allowed in content\n");
2518 /* Should this be relaxed ??? I see a "must here */
2519 ctxt->wellFormed = 0;
2520 ctxt->disableSAX = 1;
2521 }
2522 }
2523 COPY_BUF(l,buf,nbchar,cur);
2524 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2525 /*
2526 * Ok the segment is to be consumed as chars.
2527 */
2528 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2529 if (areBlanks(ctxt, buf, nbchar)) {
2530 if (ctxt->sax->ignorableWhitespace != NULL)
2531 ctxt->sax->ignorableWhitespace(ctxt->userData,
2532 buf, nbchar);
2533 } else {
2534 if (ctxt->sax->characters != NULL)
2535 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2536 }
2537 }
2538 nbchar = 0;
2539 }
2540 count++;
2541 if (count > 50) {
2542 GROW;
2543 count = 0;
2544 }
2545 NEXTL(l);
2546 cur = CUR_CHAR(l);
2547 }
2548 if (nbchar != 0) {
2549 /*
2550 * Ok the segment is to be consumed as chars.
2551 */
2552 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2553 if (areBlanks(ctxt, buf, nbchar)) {
2554 if (ctxt->sax->ignorableWhitespace != NULL)
2555 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2556 } else {
2557 if (ctxt->sax->characters != NULL)
2558 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2559 }
2560 }
2561 }
2562}
2563
2564/**
2565 * xmlParseExternalID:
2566 * @ctxt: an XML parser context
2567 * @publicID: a xmlChar** receiving PubidLiteral
2568 * @strict: indicate whether we should restrict parsing to only
2569 * production [75], see NOTE below
2570 *
2571 * Parse an External ID or a Public ID
2572 *
2573 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2574 * 'PUBLIC' S PubidLiteral S SystemLiteral
2575 *
2576 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2577 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2578 *
2579 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2580 *
2581 * Returns the function returns SystemLiteral and in the second
2582 * case publicID receives PubidLiteral, is strict is off
2583 * it is possible to return NULL and have publicID set.
2584 */
2585
2586xmlChar *
2587xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2588 xmlChar *URI = NULL;
2589
2590 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002591
2592 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002593 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2594 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2595 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2596 SKIP(6);
2597 if (!IS_BLANK(CUR)) {
2598 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2600 ctxt->sax->error(ctxt->userData,
2601 "Space required after 'SYSTEM'\n");
2602 ctxt->wellFormed = 0;
2603 ctxt->disableSAX = 1;
2604 }
2605 SKIP_BLANKS;
2606 URI = xmlParseSystemLiteral(ctxt);
2607 if (URI == NULL) {
2608 ctxt->errNo = XML_ERR_URI_REQUIRED;
2609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2610 ctxt->sax->error(ctxt->userData,
2611 "xmlParseExternalID: SYSTEM, no URI\n");
2612 ctxt->wellFormed = 0;
2613 ctxt->disableSAX = 1;
2614 }
2615 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2616 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2617 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2618 SKIP(6);
2619 if (!IS_BLANK(CUR)) {
2620 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2622 ctxt->sax->error(ctxt->userData,
2623 "Space required after 'PUBLIC'\n");
2624 ctxt->wellFormed = 0;
2625 ctxt->disableSAX = 1;
2626 }
2627 SKIP_BLANKS;
2628 *publicID = xmlParsePubidLiteral(ctxt);
2629 if (*publicID == NULL) {
2630 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2632 ctxt->sax->error(ctxt->userData,
2633 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2634 ctxt->wellFormed = 0;
2635 ctxt->disableSAX = 1;
2636 }
2637 if (strict) {
2638 /*
2639 * We don't handle [83] so "S SystemLiteral" is required.
2640 */
2641 if (!IS_BLANK(CUR)) {
2642 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2644 ctxt->sax->error(ctxt->userData,
2645 "Space required after the Public Identifier\n");
2646 ctxt->wellFormed = 0;
2647 ctxt->disableSAX = 1;
2648 }
2649 } else {
2650 /*
2651 * We handle [83] so we return immediately, if
2652 * "S SystemLiteral" is not detected. From a purely parsing
2653 * point of view that's a nice mess.
2654 */
2655 const xmlChar *ptr;
2656 GROW;
2657
2658 ptr = CUR_PTR;
2659 if (!IS_BLANK(*ptr)) return(NULL);
2660
2661 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2662 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2663 }
2664 SKIP_BLANKS;
2665 URI = xmlParseSystemLiteral(ctxt);
2666 if (URI == NULL) {
2667 ctxt->errNo = XML_ERR_URI_REQUIRED;
2668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2669 ctxt->sax->error(ctxt->userData,
2670 "xmlParseExternalID: PUBLIC, no URI\n");
2671 ctxt->wellFormed = 0;
2672 ctxt->disableSAX = 1;
2673 }
2674 }
2675 return(URI);
2676}
2677
2678/**
2679 * xmlParseComment:
2680 * @ctxt: an XML parser context
2681 *
2682 * Skip an XML (SGML) comment <!-- .... -->
2683 * The spec says that "For compatibility, the string "--" (double-hyphen)
2684 * must not occur within comments. "
2685 *
2686 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2687 */
2688void
2689xmlParseComment(xmlParserCtxtPtr ctxt) {
2690 xmlChar *buf = NULL;
2691 int len;
2692 int size = XML_PARSER_BUFFER_SIZE;
2693 int q, ql;
2694 int r, rl;
2695 int cur, l;
2696 xmlParserInputState state;
2697 xmlParserInputPtr input = ctxt->input;
2698 int count = 0;
2699
2700 /*
2701 * Check that there is a comment right here.
2702 */
2703 if ((RAW != '<') || (NXT(1) != '!') ||
2704 (NXT(2) != '-') || (NXT(3) != '-')) return;
2705
2706 state = ctxt->instate;
2707 ctxt->instate = XML_PARSER_COMMENT;
2708 SHRINK;
2709 SKIP(4);
2710 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2711 if (buf == NULL) {
2712 xmlGenericError(xmlGenericErrorContext,
2713 "malloc of %d byte failed\n", size);
2714 ctxt->instate = state;
2715 return;
2716 }
2717 q = CUR_CHAR(ql);
2718 NEXTL(ql);
2719 r = CUR_CHAR(rl);
2720 NEXTL(rl);
2721 cur = CUR_CHAR(l);
2722 len = 0;
2723 while (IS_CHAR(cur) && /* checked */
2724 ((cur != '>') ||
2725 (r != '-') || (q != '-'))) {
2726 if ((r == '-') && (q == '-') && (len > 1)) {
2727 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2729 ctxt->sax->error(ctxt->userData,
2730 "Comment must not contain '--' (double-hyphen)`\n");
2731 ctxt->wellFormed = 0;
2732 ctxt->disableSAX = 1;
2733 }
2734 if (len + 5 >= size) {
2735 size *= 2;
2736 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2737 if (buf == NULL) {
2738 xmlGenericError(xmlGenericErrorContext,
2739 "realloc of %d byte failed\n", size);
2740 ctxt->instate = state;
2741 return;
2742 }
2743 }
2744 COPY_BUF(ql,buf,len,q);
2745 q = r;
2746 ql = rl;
2747 r = cur;
2748 rl = l;
2749
2750 count++;
2751 if (count > 50) {
2752 GROW;
2753 count = 0;
2754 }
2755 NEXTL(l);
2756 cur = CUR_CHAR(l);
2757 if (cur == 0) {
2758 SHRINK;
2759 GROW;
2760 cur = CUR_CHAR(l);
2761 }
2762 }
2763 buf[len] = 0;
2764 if (!IS_CHAR(cur)) {
2765 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2767 ctxt->sax->error(ctxt->userData,
2768 "Comment not terminated \n<!--%.50s\n", buf);
2769 ctxt->wellFormed = 0;
2770 ctxt->disableSAX = 1;
2771 xmlFree(buf);
2772 } else {
2773 if (input != ctxt->input) {
2774 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2776 ctxt->sax->error(ctxt->userData,
2777"Comment doesn't start and stop in the same entity\n");
2778 ctxt->wellFormed = 0;
2779 ctxt->disableSAX = 1;
2780 }
2781 NEXT;
2782 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2783 (!ctxt->disableSAX))
2784 ctxt->sax->comment(ctxt->userData, buf);
2785 xmlFree(buf);
2786 }
2787 ctxt->instate = state;
2788}
2789
2790/**
2791 * xmlParsePITarget:
2792 * @ctxt: an XML parser context
2793 *
2794 * parse the name of a PI
2795 *
2796 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2797 *
2798 * Returns the PITarget name or NULL
2799 */
2800
2801xmlChar *
2802xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2803 xmlChar *name;
2804
2805 name = xmlParseName(ctxt);
2806 if ((name != NULL) &&
2807 ((name[0] == 'x') || (name[0] == 'X')) &&
2808 ((name[1] == 'm') || (name[1] == 'M')) &&
2809 ((name[2] == 'l') || (name[2] == 'L'))) {
2810 int i;
2811 if ((name[0] == 'x') && (name[1] == 'm') &&
2812 (name[2] == 'l') && (name[3] == 0)) {
2813 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2815 ctxt->sax->error(ctxt->userData,
2816 "XML declaration allowed only at the start of the document\n");
2817 ctxt->wellFormed = 0;
2818 ctxt->disableSAX = 1;
2819 return(name);
2820 } else if (name[3] == 0) {
2821 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2823 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2824 ctxt->wellFormed = 0;
2825 ctxt->disableSAX = 1;
2826 return(name);
2827 }
2828 for (i = 0;;i++) {
2829 if (xmlW3CPIs[i] == NULL) break;
2830 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2831 return(name);
2832 }
2833 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2834 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2835 ctxt->sax->warning(ctxt->userData,
2836 "xmlParsePItarget: invalid name prefix 'xml'\n");
2837 }
2838 }
2839 return(name);
2840}
2841
2842/**
2843 * xmlParsePI:
2844 * @ctxt: an XML parser context
2845 *
2846 * parse an XML Processing Instruction.
2847 *
2848 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2849 *
2850 * The processing is transfered to SAX once parsed.
2851 */
2852
2853void
2854xmlParsePI(xmlParserCtxtPtr ctxt) {
2855 xmlChar *buf = NULL;
2856 int len = 0;
2857 int size = XML_PARSER_BUFFER_SIZE;
2858 int cur, l;
2859 xmlChar *target;
2860 xmlParserInputState state;
2861 int count = 0;
2862
2863 if ((RAW == '<') && (NXT(1) == '?')) {
2864 xmlParserInputPtr input = ctxt->input;
2865 state = ctxt->instate;
2866 ctxt->instate = XML_PARSER_PI;
2867 /*
2868 * this is a Processing Instruction.
2869 */
2870 SKIP(2);
2871 SHRINK;
2872
2873 /*
2874 * Parse the target name and check for special support like
2875 * namespace.
2876 */
2877 target = xmlParsePITarget(ctxt);
2878 if (target != NULL) {
2879 if ((RAW == '?') && (NXT(1) == '>')) {
2880 if (input != ctxt->input) {
2881 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2883 ctxt->sax->error(ctxt->userData,
2884 "PI declaration doesn't start and stop in the same entity\n");
2885 ctxt->wellFormed = 0;
2886 ctxt->disableSAX = 1;
2887 }
2888 SKIP(2);
2889
2890 /*
2891 * SAX: PI detected.
2892 */
2893 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2894 (ctxt->sax->processingInstruction != NULL))
2895 ctxt->sax->processingInstruction(ctxt->userData,
2896 target, NULL);
2897 ctxt->instate = state;
2898 xmlFree(target);
2899 return;
2900 }
2901 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2902 if (buf == NULL) {
2903 xmlGenericError(xmlGenericErrorContext,
2904 "malloc of %d byte failed\n", size);
2905 ctxt->instate = state;
2906 return;
2907 }
2908 cur = CUR;
2909 if (!IS_BLANK(cur)) {
2910 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2912 ctxt->sax->error(ctxt->userData,
2913 "xmlParsePI: PI %s space expected\n", target);
2914 ctxt->wellFormed = 0;
2915 ctxt->disableSAX = 1;
2916 }
2917 SKIP_BLANKS;
2918 cur = CUR_CHAR(l);
2919 while (IS_CHAR(cur) && /* checked */
2920 ((cur != '?') || (NXT(1) != '>'))) {
2921 if (len + 5 >= size) {
2922 size *= 2;
2923 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2924 if (buf == NULL) {
2925 xmlGenericError(xmlGenericErrorContext,
2926 "realloc of %d byte failed\n", size);
2927 ctxt->instate = state;
2928 return;
2929 }
2930 }
2931 count++;
2932 if (count > 50) {
2933 GROW;
2934 count = 0;
2935 }
2936 COPY_BUF(l,buf,len,cur);
2937 NEXTL(l);
2938 cur = CUR_CHAR(l);
2939 if (cur == 0) {
2940 SHRINK;
2941 GROW;
2942 cur = CUR_CHAR(l);
2943 }
2944 }
2945 buf[len] = 0;
2946 if (cur != '?') {
2947 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2949 ctxt->sax->error(ctxt->userData,
2950 "xmlParsePI: PI %s never end ...\n", target);
2951 ctxt->wellFormed = 0;
2952 ctxt->disableSAX = 1;
2953 } else {
2954 if (input != ctxt->input) {
2955 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2957 ctxt->sax->error(ctxt->userData,
2958 "PI declaration doesn't start and stop in the same entity\n");
2959 ctxt->wellFormed = 0;
2960 ctxt->disableSAX = 1;
2961 }
2962 SKIP(2);
2963
2964 /*
2965 * SAX: PI detected.
2966 */
2967 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2968 (ctxt->sax->processingInstruction != NULL))
2969 ctxt->sax->processingInstruction(ctxt->userData,
2970 target, buf);
2971 }
2972 xmlFree(buf);
2973 xmlFree(target);
2974 } else {
2975 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
2976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2977 ctxt->sax->error(ctxt->userData,
2978 "xmlParsePI : no target name\n");
2979 ctxt->wellFormed = 0;
2980 ctxt->disableSAX = 1;
2981 }
2982 ctxt->instate = state;
2983 }
2984}
2985
2986/**
2987 * xmlParseNotationDecl:
2988 * @ctxt: an XML parser context
2989 *
2990 * parse a notation declaration
2991 *
2992 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2993 *
2994 * Hence there is actually 3 choices:
2995 * 'PUBLIC' S PubidLiteral
2996 * 'PUBLIC' S PubidLiteral S SystemLiteral
2997 * and 'SYSTEM' S SystemLiteral
2998 *
2999 * See the NOTE on xmlParseExternalID().
3000 */
3001
3002void
3003xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3004 xmlChar *name;
3005 xmlChar *Pubid;
3006 xmlChar *Systemid;
3007
3008 if ((RAW == '<') && (NXT(1) == '!') &&
3009 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3010 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3011 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3012 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3013 xmlParserInputPtr input = ctxt->input;
3014 SHRINK;
3015 SKIP(10);
3016 if (!IS_BLANK(CUR)) {
3017 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3019 ctxt->sax->error(ctxt->userData,
3020 "Space required after '<!NOTATION'\n");
3021 ctxt->wellFormed = 0;
3022 ctxt->disableSAX = 1;
3023 return;
3024 }
3025 SKIP_BLANKS;
3026
Daniel Veillard29631a82001-03-05 09:49:20 +00003027 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003028 if (name == NULL) {
3029 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3031 ctxt->sax->error(ctxt->userData,
3032 "NOTATION: Name expected here\n");
3033 ctxt->wellFormed = 0;
3034 ctxt->disableSAX = 1;
3035 return;
3036 }
3037 if (!IS_BLANK(CUR)) {
3038 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3040 ctxt->sax->error(ctxt->userData,
3041 "Space required after the NOTATION name'\n");
3042 ctxt->wellFormed = 0;
3043 ctxt->disableSAX = 1;
3044 return;
3045 }
3046 SKIP_BLANKS;
3047
3048 /*
3049 * Parse the IDs.
3050 */
3051 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3052 SKIP_BLANKS;
3053
3054 if (RAW == '>') {
3055 if (input != ctxt->input) {
3056 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3058 ctxt->sax->error(ctxt->userData,
3059"Notation declaration doesn't start and stop in the same entity\n");
3060 ctxt->wellFormed = 0;
3061 ctxt->disableSAX = 1;
3062 }
3063 NEXT;
3064 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3065 (ctxt->sax->notationDecl != NULL))
3066 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3067 } else {
3068 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3070 ctxt->sax->error(ctxt->userData,
3071 "'>' required to close NOTATION declaration\n");
3072 ctxt->wellFormed = 0;
3073 ctxt->disableSAX = 1;
3074 }
3075 xmlFree(name);
3076 if (Systemid != NULL) xmlFree(Systemid);
3077 if (Pubid != NULL) xmlFree(Pubid);
3078 }
3079}
3080
3081/**
3082 * xmlParseEntityDecl:
3083 * @ctxt: an XML parser context
3084 *
3085 * parse <!ENTITY declarations
3086 *
3087 * [70] EntityDecl ::= GEDecl | PEDecl
3088 *
3089 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3090 *
3091 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3092 *
3093 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3094 *
3095 * [74] PEDef ::= EntityValue | ExternalID
3096 *
3097 * [76] NDataDecl ::= S 'NDATA' S Name
3098 *
3099 * [ VC: Notation Declared ]
3100 * The Name must match the declared name of a notation.
3101 */
3102
3103void
3104xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3105 xmlChar *name = NULL;
3106 xmlChar *value = NULL;
3107 xmlChar *URI = NULL, *literal = NULL;
3108 xmlChar *ndata = NULL;
3109 int isParameter = 0;
3110 xmlChar *orig = NULL;
3111
3112 GROW;
3113 if ((RAW == '<') && (NXT(1) == '!') &&
3114 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3115 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3116 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3117 xmlParserInputPtr input = ctxt->input;
3118 ctxt->instate = XML_PARSER_ENTITY_DECL;
3119 SHRINK;
3120 SKIP(8);
3121 if (!IS_BLANK(CUR)) {
3122 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3123 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3124 ctxt->sax->error(ctxt->userData,
3125 "Space required after '<!ENTITY'\n");
3126 ctxt->wellFormed = 0;
3127 ctxt->disableSAX = 1;
3128 }
3129 SKIP_BLANKS;
3130
3131 if (RAW == '%') {
3132 NEXT;
3133 if (!IS_BLANK(CUR)) {
3134 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3136 ctxt->sax->error(ctxt->userData,
3137 "Space required after '%'\n");
3138 ctxt->wellFormed = 0;
3139 ctxt->disableSAX = 1;
3140 }
3141 SKIP_BLANKS;
3142 isParameter = 1;
3143 }
3144
Daniel Veillard29631a82001-03-05 09:49:20 +00003145 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003146 if (name == NULL) {
3147 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3149 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3150 ctxt->wellFormed = 0;
3151 ctxt->disableSAX = 1;
3152 return;
3153 }
3154 if (!IS_BLANK(CUR)) {
3155 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3157 ctxt->sax->error(ctxt->userData,
3158 "Space required after the entity name\n");
3159 ctxt->wellFormed = 0;
3160 ctxt->disableSAX = 1;
3161 }
3162 SKIP_BLANKS;
3163
3164 /*
3165 * handle the various case of definitions...
3166 */
3167 if (isParameter) {
3168 if ((RAW == '"') || (RAW == '\'')) {
3169 value = xmlParseEntityValue(ctxt, &orig);
3170 if (value) {
3171 if ((ctxt->sax != NULL) &&
3172 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3173 ctxt->sax->entityDecl(ctxt->userData, name,
3174 XML_INTERNAL_PARAMETER_ENTITY,
3175 NULL, NULL, value);
3176 }
3177 } else {
3178 URI = xmlParseExternalID(ctxt, &literal, 1);
3179 if ((URI == NULL) && (literal == NULL)) {
3180 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3181 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3182 ctxt->sax->error(ctxt->userData,
3183 "Entity value required\n");
3184 ctxt->wellFormed = 0;
3185 ctxt->disableSAX = 1;
3186 }
3187 if (URI) {
3188 xmlURIPtr uri;
3189
3190 uri = xmlParseURI((const char *) URI);
3191 if (uri == NULL) {
3192 ctxt->errNo = XML_ERR_INVALID_URI;
3193 if ((ctxt->sax != NULL) &&
3194 (!ctxt->disableSAX) &&
3195 (ctxt->sax->error != NULL))
3196 ctxt->sax->error(ctxt->userData,
3197 "Invalid URI: %s\n", URI);
3198 ctxt->wellFormed = 0;
3199 } else {
3200 if (uri->fragment != NULL) {
3201 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3202 if ((ctxt->sax != NULL) &&
3203 (!ctxt->disableSAX) &&
3204 (ctxt->sax->error != NULL))
3205 ctxt->sax->error(ctxt->userData,
3206 "Fragment not allowed: %s\n", URI);
3207 ctxt->wellFormed = 0;
3208 } else {
3209 if ((ctxt->sax != NULL) &&
3210 (!ctxt->disableSAX) &&
3211 (ctxt->sax->entityDecl != NULL))
3212 ctxt->sax->entityDecl(ctxt->userData, name,
3213 XML_EXTERNAL_PARAMETER_ENTITY,
3214 literal, URI, NULL);
3215 }
3216 xmlFreeURI(uri);
3217 }
3218 }
3219 }
3220 } else {
3221 if ((RAW == '"') || (RAW == '\'')) {
3222 value = xmlParseEntityValue(ctxt, &orig);
3223 if ((ctxt->sax != NULL) &&
3224 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3225 ctxt->sax->entityDecl(ctxt->userData, name,
3226 XML_INTERNAL_GENERAL_ENTITY,
3227 NULL, NULL, value);
3228 } else {
3229 URI = xmlParseExternalID(ctxt, &literal, 1);
3230 if ((URI == NULL) && (literal == NULL)) {
3231 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3233 ctxt->sax->error(ctxt->userData,
3234 "Entity value required\n");
3235 ctxt->wellFormed = 0;
3236 ctxt->disableSAX = 1;
3237 }
3238 if (URI) {
3239 xmlURIPtr uri;
3240
3241 uri = xmlParseURI((const char *)URI);
3242 if (uri == NULL) {
3243 ctxt->errNo = XML_ERR_INVALID_URI;
3244 if ((ctxt->sax != NULL) &&
3245 (!ctxt->disableSAX) &&
3246 (ctxt->sax->error != NULL))
3247 ctxt->sax->error(ctxt->userData,
3248 "Invalid URI: %s\n", URI);
3249 ctxt->wellFormed = 0;
3250 } else {
3251 if (uri->fragment != NULL) {
3252 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3253 if ((ctxt->sax != NULL) &&
3254 (!ctxt->disableSAX) &&
3255 (ctxt->sax->error != NULL))
3256 ctxt->sax->error(ctxt->userData,
3257 "Fragment not allowed: %s\n", URI);
3258 ctxt->wellFormed = 0;
3259 }
3260 xmlFreeURI(uri);
3261 }
3262 }
3263 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3264 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3266 ctxt->sax->error(ctxt->userData,
3267 "Space required before 'NDATA'\n");
3268 ctxt->wellFormed = 0;
3269 ctxt->disableSAX = 1;
3270 }
3271 SKIP_BLANKS;
3272 if ((RAW == 'N') && (NXT(1) == 'D') &&
3273 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3274 (NXT(4) == 'A')) {
3275 SKIP(5);
3276 if (!IS_BLANK(CUR)) {
3277 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3279 ctxt->sax->error(ctxt->userData,
3280 "Space required after 'NDATA'\n");
3281 ctxt->wellFormed = 0;
3282 ctxt->disableSAX = 1;
3283 }
3284 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003285 ndata = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003286 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3287 (ctxt->sax->unparsedEntityDecl != NULL))
3288 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3289 literal, URI, ndata);
3290 } else {
3291 if ((ctxt->sax != NULL) &&
3292 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3293 ctxt->sax->entityDecl(ctxt->userData, name,
3294 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3295 literal, URI, NULL);
3296 }
3297 }
3298 }
3299 SKIP_BLANKS;
3300 if (RAW != '>') {
3301 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3303 ctxt->sax->error(ctxt->userData,
3304 "xmlParseEntityDecl: entity %s not terminated\n", name);
3305 ctxt->wellFormed = 0;
3306 ctxt->disableSAX = 1;
3307 } else {
3308 if (input != ctxt->input) {
3309 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3311 ctxt->sax->error(ctxt->userData,
3312"Entity declaration doesn't start and stop in the same entity\n");
3313 ctxt->wellFormed = 0;
3314 ctxt->disableSAX = 1;
3315 }
3316 NEXT;
3317 }
3318 if (orig != NULL) {
3319 /*
3320 * Ugly mechanism to save the raw entity value.
3321 */
3322 xmlEntityPtr cur = NULL;
3323
3324 if (isParameter) {
3325 if ((ctxt->sax != NULL) &&
3326 (ctxt->sax->getParameterEntity != NULL))
3327 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3328 } else {
3329 if ((ctxt->sax != NULL) &&
3330 (ctxt->sax->getEntity != NULL))
3331 cur = ctxt->sax->getEntity(ctxt->userData, name);
3332 }
3333 if (cur != NULL) {
3334 if (cur->orig != NULL)
3335 xmlFree(orig);
3336 else
3337 cur->orig = orig;
3338 } else
3339 xmlFree(orig);
3340 }
3341 if (name != NULL) xmlFree(name);
3342 if (value != NULL) xmlFree(value);
3343 if (URI != NULL) xmlFree(URI);
3344 if (literal != NULL) xmlFree(literal);
3345 if (ndata != NULL) xmlFree(ndata);
3346 }
3347}
3348
3349/**
3350 * xmlParseDefaultDecl:
3351 * @ctxt: an XML parser context
3352 * @value: Receive a possible fixed default value for the attribute
3353 *
3354 * Parse an attribute default declaration
3355 *
3356 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3357 *
3358 * [ VC: Required Attribute ]
3359 * if the default declaration is the keyword #REQUIRED, then the
3360 * attribute must be specified for all elements of the type in the
3361 * attribute-list declaration.
3362 *
3363 * [ VC: Attribute Default Legal ]
3364 * The declared default value must meet the lexical constraints of
3365 * the declared attribute type c.f. xmlValidateAttributeDecl()
3366 *
3367 * [ VC: Fixed Attribute Default ]
3368 * if an attribute has a default value declared with the #FIXED
3369 * keyword, instances of that attribute must match the default value.
3370 *
3371 * [ WFC: No < in Attribute Values ]
3372 * handled in xmlParseAttValue()
3373 *
3374 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3375 * or XML_ATTRIBUTE_FIXED.
3376 */
3377
3378int
3379xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3380 int val;
3381 xmlChar *ret;
3382
3383 *value = NULL;
3384 if ((RAW == '#') && (NXT(1) == 'R') &&
3385 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3386 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3387 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3388 (NXT(8) == 'D')) {
3389 SKIP(9);
3390 return(XML_ATTRIBUTE_REQUIRED);
3391 }
3392 if ((RAW == '#') && (NXT(1) == 'I') &&
3393 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3394 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3395 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3396 SKIP(8);
3397 return(XML_ATTRIBUTE_IMPLIED);
3398 }
3399 val = XML_ATTRIBUTE_NONE;
3400 if ((RAW == '#') && (NXT(1) == 'F') &&
3401 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3402 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3403 SKIP(6);
3404 val = XML_ATTRIBUTE_FIXED;
3405 if (!IS_BLANK(CUR)) {
3406 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3408 ctxt->sax->error(ctxt->userData,
3409 "Space required after '#FIXED'\n");
3410 ctxt->wellFormed = 0;
3411 ctxt->disableSAX = 1;
3412 }
3413 SKIP_BLANKS;
3414 }
3415 ret = xmlParseAttValue(ctxt);
3416 ctxt->instate = XML_PARSER_DTD;
3417 if (ret == NULL) {
3418 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3419 ctxt->sax->error(ctxt->userData,
3420 "Attribute default value declaration error\n");
3421 ctxt->wellFormed = 0;
3422 ctxt->disableSAX = 1;
3423 } else
3424 *value = ret;
3425 return(val);
3426}
3427
3428/**
3429 * xmlParseNotationType:
3430 * @ctxt: an XML parser context
3431 *
3432 * parse an Notation attribute type.
3433 *
3434 * Note: the leading 'NOTATION' S part has already being parsed...
3435 *
3436 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3437 *
3438 * [ VC: Notation Attributes ]
3439 * Values of this type must match one of the notation names included
3440 * in the declaration; all notation names in the declaration must be declared.
3441 *
3442 * Returns: the notation attribute tree built while parsing
3443 */
3444
3445xmlEnumerationPtr
3446xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3447 xmlChar *name;
3448 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3449
3450 if (RAW != '(') {
3451 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3453 ctxt->sax->error(ctxt->userData,
3454 "'(' required to start 'NOTATION'\n");
3455 ctxt->wellFormed = 0;
3456 ctxt->disableSAX = 1;
3457 return(NULL);
3458 }
3459 SHRINK;
3460 do {
3461 NEXT;
3462 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003463 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003464 if (name == NULL) {
3465 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3467 ctxt->sax->error(ctxt->userData,
3468 "Name expected in NOTATION declaration\n");
3469 ctxt->wellFormed = 0;
3470 ctxt->disableSAX = 1;
3471 return(ret);
3472 }
3473 cur = xmlCreateEnumeration(name);
3474 xmlFree(name);
3475 if (cur == NULL) return(ret);
3476 if (last == NULL) ret = last = cur;
3477 else {
3478 last->next = cur;
3479 last = cur;
3480 }
3481 SKIP_BLANKS;
3482 } while (RAW == '|');
3483 if (RAW != ')') {
3484 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3486 ctxt->sax->error(ctxt->userData,
3487 "')' required to finish NOTATION declaration\n");
3488 ctxt->wellFormed = 0;
3489 ctxt->disableSAX = 1;
3490 if ((last != NULL) && (last != ret))
3491 xmlFreeEnumeration(last);
3492 return(ret);
3493 }
3494 NEXT;
3495 return(ret);
3496}
3497
3498/**
3499 * xmlParseEnumerationType:
3500 * @ctxt: an XML parser context
3501 *
3502 * parse an Enumeration attribute type.
3503 *
3504 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3505 *
3506 * [ VC: Enumeration ]
3507 * Values of this type must match one of the Nmtoken tokens in
3508 * the declaration
3509 *
3510 * Returns: the enumeration attribute tree built while parsing
3511 */
3512
3513xmlEnumerationPtr
3514xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3515 xmlChar *name;
3516 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3517
3518 if (RAW != '(') {
3519 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3521 ctxt->sax->error(ctxt->userData,
3522 "'(' required to start ATTLIST enumeration\n");
3523 ctxt->wellFormed = 0;
3524 ctxt->disableSAX = 1;
3525 return(NULL);
3526 }
3527 SHRINK;
3528 do {
3529 NEXT;
3530 SKIP_BLANKS;
3531 name = xmlParseNmtoken(ctxt);
3532 if (name == NULL) {
3533 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3535 ctxt->sax->error(ctxt->userData,
3536 "NmToken expected in ATTLIST enumeration\n");
3537 ctxt->wellFormed = 0;
3538 ctxt->disableSAX = 1;
3539 return(ret);
3540 }
3541 cur = xmlCreateEnumeration(name);
3542 xmlFree(name);
3543 if (cur == NULL) return(ret);
3544 if (last == NULL) ret = last = cur;
3545 else {
3546 last->next = cur;
3547 last = cur;
3548 }
3549 SKIP_BLANKS;
3550 } while (RAW == '|');
3551 if (RAW != ')') {
3552 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3554 ctxt->sax->error(ctxt->userData,
3555 "')' required to finish ATTLIST enumeration\n");
3556 ctxt->wellFormed = 0;
3557 ctxt->disableSAX = 1;
3558 return(ret);
3559 }
3560 NEXT;
3561 return(ret);
3562}
3563
3564/**
3565 * xmlParseEnumeratedType:
3566 * @ctxt: an XML parser context
3567 * @tree: the enumeration tree built while parsing
3568 *
3569 * parse an Enumerated attribute type.
3570 *
3571 * [57] EnumeratedType ::= NotationType | Enumeration
3572 *
3573 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3574 *
3575 *
3576 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3577 */
3578
3579int
3580xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3581 if ((RAW == 'N') && (NXT(1) == 'O') &&
3582 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3583 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3584 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3585 SKIP(8);
3586 if (!IS_BLANK(CUR)) {
3587 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3589 ctxt->sax->error(ctxt->userData,
3590 "Space required after 'NOTATION'\n");
3591 ctxt->wellFormed = 0;
3592 ctxt->disableSAX = 1;
3593 return(0);
3594 }
3595 SKIP_BLANKS;
3596 *tree = xmlParseNotationType(ctxt);
3597 if (*tree == NULL) return(0);
3598 return(XML_ATTRIBUTE_NOTATION);
3599 }
3600 *tree = xmlParseEnumerationType(ctxt);
3601 if (*tree == NULL) return(0);
3602 return(XML_ATTRIBUTE_ENUMERATION);
3603}
3604
3605/**
3606 * xmlParseAttributeType:
3607 * @ctxt: an XML parser context
3608 * @tree: the enumeration tree built while parsing
3609 *
3610 * parse the Attribute list def for an element
3611 *
3612 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3613 *
3614 * [55] StringType ::= 'CDATA'
3615 *
3616 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3617 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3618 *
3619 * Validity constraints for attribute values syntax are checked in
3620 * xmlValidateAttributeValue()
3621 *
3622 * [ VC: ID ]
3623 * Values of type ID must match the Name production. A name must not
3624 * appear more than once in an XML document as a value of this type;
3625 * i.e., ID values must uniquely identify the elements which bear them.
3626 *
3627 * [ VC: One ID per Element Type ]
3628 * No element type may have more than one ID attribute specified.
3629 *
3630 * [ VC: ID Attribute Default ]
3631 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3632 *
3633 * [ VC: IDREF ]
3634 * Values of type IDREF must match the Name production, and values
3635 * of type IDREFS must match Names; each IDREF Name must match the value
3636 * of an ID attribute on some element in the XML document; i.e. IDREF
3637 * values must match the value of some ID attribute.
3638 *
3639 * [ VC: Entity Name ]
3640 * Values of type ENTITY must match the Name production, values
3641 * of type ENTITIES must match Names; each Entity Name must match the
3642 * name of an unparsed entity declared in the DTD.
3643 *
3644 * [ VC: Name Token ]
3645 * Values of type NMTOKEN must match the Nmtoken production; values
3646 * of type NMTOKENS must match Nmtokens.
3647 *
3648 * Returns the attribute type
3649 */
3650int
3651xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3652 SHRINK;
3653 if ((RAW == 'C') && (NXT(1) == 'D') &&
3654 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3655 (NXT(4) == 'A')) {
3656 SKIP(5);
3657 return(XML_ATTRIBUTE_CDATA);
3658 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3659 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3660 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3661 SKIP(6);
3662 return(XML_ATTRIBUTE_IDREFS);
3663 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3664 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3665 (NXT(4) == 'F')) {
3666 SKIP(5);
3667 return(XML_ATTRIBUTE_IDREF);
3668 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3669 SKIP(2);
3670 return(XML_ATTRIBUTE_ID);
3671 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3672 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3673 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3674 SKIP(6);
3675 return(XML_ATTRIBUTE_ENTITY);
3676 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3677 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3678 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3679 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3680 SKIP(8);
3681 return(XML_ATTRIBUTE_ENTITIES);
3682 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3683 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3684 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3685 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3686 SKIP(8);
3687 return(XML_ATTRIBUTE_NMTOKENS);
3688 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3689 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3690 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3691 (NXT(6) == 'N')) {
3692 SKIP(7);
3693 return(XML_ATTRIBUTE_NMTOKEN);
3694 }
3695 return(xmlParseEnumeratedType(ctxt, tree));
3696}
3697
3698/**
3699 * xmlParseAttributeListDecl:
3700 * @ctxt: an XML parser context
3701 *
3702 * : parse the Attribute list def for an element
3703 *
3704 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3705 *
3706 * [53] AttDef ::= S Name S AttType S DefaultDecl
3707 *
3708 */
3709void
3710xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3711 xmlChar *elemName;
3712 xmlChar *attrName;
3713 xmlEnumerationPtr tree;
3714
3715 if ((RAW == '<') && (NXT(1) == '!') &&
3716 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3717 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3718 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3719 (NXT(8) == 'T')) {
3720 xmlParserInputPtr input = ctxt->input;
3721
3722 SKIP(9);
3723 if (!IS_BLANK(CUR)) {
3724 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3725 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3726 ctxt->sax->error(ctxt->userData,
3727 "Space required after '<!ATTLIST'\n");
3728 ctxt->wellFormed = 0;
3729 ctxt->disableSAX = 1;
3730 }
3731 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003732 elemName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003733 if (elemName == NULL) {
3734 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3736 ctxt->sax->error(ctxt->userData,
3737 "ATTLIST: no name for Element\n");
3738 ctxt->wellFormed = 0;
3739 ctxt->disableSAX = 1;
3740 return;
3741 }
3742 SKIP_BLANKS;
3743 GROW;
3744 while (RAW != '>') {
3745 const xmlChar *check = CUR_PTR;
3746 int type;
3747 int def;
3748 xmlChar *defaultValue = NULL;
3749
3750 GROW;
3751 tree = NULL;
Daniel Veillard29631a82001-03-05 09:49:20 +00003752 attrName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003753 if (attrName == NULL) {
3754 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3756 ctxt->sax->error(ctxt->userData,
3757 "ATTLIST: no name for Attribute\n");
3758 ctxt->wellFormed = 0;
3759 ctxt->disableSAX = 1;
3760 break;
3761 }
3762 GROW;
3763 if (!IS_BLANK(CUR)) {
3764 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3766 ctxt->sax->error(ctxt->userData,
3767 "Space required after the attribute name\n");
3768 ctxt->wellFormed = 0;
3769 ctxt->disableSAX = 1;
3770 if (attrName != NULL)
3771 xmlFree(attrName);
3772 if (defaultValue != NULL)
3773 xmlFree(defaultValue);
3774 break;
3775 }
3776 SKIP_BLANKS;
3777
3778 type = xmlParseAttributeType(ctxt, &tree);
3779 if (type <= 0) {
3780 if (attrName != NULL)
3781 xmlFree(attrName);
3782 if (defaultValue != NULL)
3783 xmlFree(defaultValue);
3784 break;
3785 }
3786
3787 GROW;
3788 if (!IS_BLANK(CUR)) {
3789 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3791 ctxt->sax->error(ctxt->userData,
3792 "Space required after the attribute type\n");
3793 ctxt->wellFormed = 0;
3794 ctxt->disableSAX = 1;
3795 if (attrName != NULL)
3796 xmlFree(attrName);
3797 if (defaultValue != NULL)
3798 xmlFree(defaultValue);
3799 if (tree != NULL)
3800 xmlFreeEnumeration(tree);
3801 break;
3802 }
3803 SKIP_BLANKS;
3804
3805 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3806 if (def <= 0) {
3807 if (attrName != NULL)
3808 xmlFree(attrName);
3809 if (defaultValue != NULL)
3810 xmlFree(defaultValue);
3811 if (tree != NULL)
3812 xmlFreeEnumeration(tree);
3813 break;
3814 }
3815
3816 GROW;
3817 if (RAW != '>') {
3818 if (!IS_BLANK(CUR)) {
3819 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3821 ctxt->sax->error(ctxt->userData,
3822 "Space required after the attribute default value\n");
3823 ctxt->wellFormed = 0;
3824 ctxt->disableSAX = 1;
3825 if (attrName != NULL)
3826 xmlFree(attrName);
3827 if (defaultValue != NULL)
3828 xmlFree(defaultValue);
3829 if (tree != NULL)
3830 xmlFreeEnumeration(tree);
3831 break;
3832 }
3833 SKIP_BLANKS;
3834 }
3835 if (check == CUR_PTR) {
3836 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3838 ctxt->sax->error(ctxt->userData,
3839 "xmlParseAttributeListDecl: detected internal error\n");
3840 if (attrName != NULL)
3841 xmlFree(attrName);
3842 if (defaultValue != NULL)
3843 xmlFree(defaultValue);
3844 if (tree != NULL)
3845 xmlFreeEnumeration(tree);
3846 break;
3847 }
3848 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3849 (ctxt->sax->attributeDecl != NULL))
3850 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3851 type, def, defaultValue, tree);
3852 if (attrName != NULL)
3853 xmlFree(attrName);
3854 if (defaultValue != NULL)
3855 xmlFree(defaultValue);
3856 GROW;
3857 }
3858 if (RAW == '>') {
3859 if (input != ctxt->input) {
3860 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3862 ctxt->sax->error(ctxt->userData,
3863"Attribute list declaration doesn't start and stop in the same entity\n");
3864 ctxt->wellFormed = 0;
3865 ctxt->disableSAX = 1;
3866 }
3867 NEXT;
3868 }
3869
3870 xmlFree(elemName);
3871 }
3872}
3873
3874/**
3875 * xmlParseElementMixedContentDecl:
3876 * @ctxt: an XML parser context
3877 *
3878 * parse the declaration for a Mixed Element content
3879 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3880 *
3881 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3882 * '(' S? '#PCDATA' S? ')'
3883 *
3884 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3885 *
3886 * [ VC: No Duplicate Types ]
3887 * The same name must not appear more than once in a single
3888 * mixed-content declaration.
3889 *
3890 * returns: the list of the xmlElementContentPtr describing the element choices
3891 */
3892xmlElementContentPtr
3893xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3894 xmlElementContentPtr ret = NULL, cur = NULL, n;
3895 xmlChar *elem = NULL;
3896
3897 GROW;
3898 if ((RAW == '#') && (NXT(1) == 'P') &&
3899 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3900 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3901 (NXT(6) == 'A')) {
3902 SKIP(7);
3903 SKIP_BLANKS;
3904 SHRINK;
3905 if (RAW == ')') {
3906 ctxt->entity = ctxt->input;
3907 NEXT;
3908 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3909 if (RAW == '*') {
3910 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3911 NEXT;
3912 }
3913 return(ret);
3914 }
3915 if ((RAW == '(') || (RAW == '|')) {
3916 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3917 if (ret == NULL) return(NULL);
3918 }
3919 while (RAW == '|') {
3920 NEXT;
3921 if (elem == NULL) {
3922 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3923 if (ret == NULL) return(NULL);
3924 ret->c1 = cur;
3925 cur = ret;
3926 } else {
3927 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3928 if (n == NULL) return(NULL);
3929 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3930 cur->c2 = n;
3931 cur = n;
3932 xmlFree(elem);
3933 }
3934 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003935 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003936 if (elem == NULL) {
3937 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3939 ctxt->sax->error(ctxt->userData,
3940 "xmlParseElementMixedContentDecl : Name expected\n");
3941 ctxt->wellFormed = 0;
3942 ctxt->disableSAX = 1;
3943 xmlFreeElementContent(cur);
3944 return(NULL);
3945 }
3946 SKIP_BLANKS;
3947 GROW;
3948 }
3949 if ((RAW == ')') && (NXT(1) == '*')) {
3950 if (elem != NULL) {
3951 cur->c2 = xmlNewElementContent(elem,
3952 XML_ELEMENT_CONTENT_ELEMENT);
3953 xmlFree(elem);
3954 }
3955 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3956 ctxt->entity = ctxt->input;
3957 SKIP(2);
3958 } else {
3959 if (elem != NULL) xmlFree(elem);
3960 xmlFreeElementContent(ret);
3961 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
3962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3963 ctxt->sax->error(ctxt->userData,
3964 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
3965 ctxt->wellFormed = 0;
3966 ctxt->disableSAX = 1;
3967 return(NULL);
3968 }
3969
3970 } else {
3971 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
3972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3973 ctxt->sax->error(ctxt->userData,
3974 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3975 ctxt->wellFormed = 0;
3976 ctxt->disableSAX = 1;
3977 }
3978 return(ret);
3979}
3980
3981/**
3982 * xmlParseElementChildrenContentDecl:
3983 * @ctxt: an XML parser context
3984 *
3985 * parse the declaration for a Mixed Element content
3986 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3987 *
3988 *
3989 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3990 *
3991 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3992 *
3993 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3994 *
3995 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3996 *
3997 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3998 * TODO Parameter-entity replacement text must be properly nested
3999 * with parenthetized groups. That is to say, if either of the
4000 * opening or closing parentheses in a choice, seq, or Mixed
4001 * construct is contained in the replacement text for a parameter
4002 * entity, both must be contained in the same replacement text. For
4003 * interoperability, if a parameter-entity reference appears in a
4004 * choice, seq, or Mixed construct, its replacement text should not
4005 * be empty, and neither the first nor last non-blank character of
4006 * the replacement text should be a connector (| or ,).
4007 *
4008 * returns: the tree of xmlElementContentPtr describing the element
4009 * hierarchy.
4010 */
4011xmlElementContentPtr
4012#ifdef VMS
4013xmlParseElementChildrenContentD
4014#else
4015xmlParseElementChildrenContentDecl
4016#endif
4017(xmlParserCtxtPtr ctxt) {
4018 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4019 xmlChar *elem;
4020 xmlChar type = 0;
4021
4022 SKIP_BLANKS;
4023 GROW;
4024 if (RAW == '(') {
4025 /* Recurse on first child */
4026 NEXT;
4027 SKIP_BLANKS;
4028 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4029 SKIP_BLANKS;
4030 GROW;
4031 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004032 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004033 if (elem == NULL) {
4034 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4036 ctxt->sax->error(ctxt->userData,
4037 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4038 ctxt->wellFormed = 0;
4039 ctxt->disableSAX = 1;
4040 return(NULL);
4041 }
4042 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4043 GROW;
4044 if (RAW == '?') {
4045 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4046 NEXT;
4047 } else if (RAW == '*') {
4048 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4049 NEXT;
4050 } else if (RAW == '+') {
4051 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4052 NEXT;
4053 } else {
4054 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4055 }
4056 xmlFree(elem);
4057 GROW;
4058 }
4059 SKIP_BLANKS;
4060 SHRINK;
4061 while (RAW != ')') {
4062 /*
4063 * Each loop we parse one separator and one element.
4064 */
4065 if (RAW == ',') {
4066 if (type == 0) type = CUR;
4067
4068 /*
4069 * Detect "Name | Name , Name" error
4070 */
4071 else if (type != CUR) {
4072 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4074 ctxt->sax->error(ctxt->userData,
4075 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4076 type);
4077 ctxt->wellFormed = 0;
4078 ctxt->disableSAX = 1;
4079 if ((op != NULL) && (op != ret))
4080 xmlFreeElementContent(op);
4081 if ((last != NULL) && (last != ret) &&
4082 (last != ret->c1) && (last != ret->c2))
4083 xmlFreeElementContent(last);
4084 if (ret != NULL)
4085 xmlFreeElementContent(ret);
4086 return(NULL);
4087 }
4088 NEXT;
4089
4090 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4091 if (op == NULL) {
4092 xmlFreeElementContent(ret);
4093 return(NULL);
4094 }
4095 if (last == NULL) {
4096 op->c1 = ret;
4097 ret = cur = op;
4098 } else {
4099 cur->c2 = op;
4100 op->c1 = last;
4101 cur =op;
4102 last = NULL;
4103 }
4104 } else if (RAW == '|') {
4105 if (type == 0) type = CUR;
4106
4107 /*
4108 * Detect "Name , Name | Name" error
4109 */
4110 else if (type != CUR) {
4111 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4112 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4113 ctxt->sax->error(ctxt->userData,
4114 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4115 type);
4116 ctxt->wellFormed = 0;
4117 ctxt->disableSAX = 1;
4118 if ((op != NULL) && (op != ret) && (op != last))
4119 xmlFreeElementContent(op);
4120 if ((last != NULL) && (last != ret) &&
4121 (last != ret->c1) && (last != ret->c2))
4122 xmlFreeElementContent(last);
4123 if (ret != NULL)
4124 xmlFreeElementContent(ret);
4125 return(NULL);
4126 }
4127 NEXT;
4128
4129 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4130 if (op == NULL) {
4131 if ((op != NULL) && (op != ret))
4132 xmlFreeElementContent(op);
4133 if ((last != NULL) && (last != ret) &&
4134 (last != ret->c1) && (last != ret->c2))
4135 xmlFreeElementContent(last);
4136 if (ret != NULL)
4137 xmlFreeElementContent(ret);
4138 return(NULL);
4139 }
4140 if (last == NULL) {
4141 op->c1 = ret;
4142 ret = cur = op;
4143 } else {
4144 cur->c2 = op;
4145 op->c1 = last;
4146 cur =op;
4147 last = NULL;
4148 }
4149 } else {
4150 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4152 ctxt->sax->error(ctxt->userData,
4153 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4154 ctxt->wellFormed = 0;
4155 ctxt->disableSAX = 1;
4156 if ((op != NULL) && (op != ret))
4157 xmlFreeElementContent(op);
4158 if ((last != NULL) && (last != ret) &&
4159 (last != ret->c1) && (last != ret->c2))
4160 xmlFreeElementContent(last);
4161 if (ret != NULL)
4162 xmlFreeElementContent(ret);
4163 return(NULL);
4164 }
4165 GROW;
4166 SKIP_BLANKS;
4167 GROW;
4168 if (RAW == '(') {
4169 /* Recurse on second child */
4170 NEXT;
4171 SKIP_BLANKS;
4172 last = xmlParseElementChildrenContentDecl(ctxt);
4173 SKIP_BLANKS;
4174 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004175 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004176 if (elem == NULL) {
4177 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4179 ctxt->sax->error(ctxt->userData,
4180 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4181 ctxt->wellFormed = 0;
4182 ctxt->disableSAX = 1;
4183 if ((op != NULL) && (op != ret))
4184 xmlFreeElementContent(op);
4185 if ((last != NULL) && (last != ret) &&
4186 (last != ret->c1) && (last != ret->c2))
4187 xmlFreeElementContent(last);
4188 if (ret != NULL)
4189 xmlFreeElementContent(ret);
4190 return(NULL);
4191 }
4192 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4193 xmlFree(elem);
4194 if (RAW == '?') {
4195 last->ocur = XML_ELEMENT_CONTENT_OPT;
4196 NEXT;
4197 } else if (RAW == '*') {
4198 last->ocur = XML_ELEMENT_CONTENT_MULT;
4199 NEXT;
4200 } else if (RAW == '+') {
4201 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4202 NEXT;
4203 } else {
4204 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4205 }
4206 }
4207 SKIP_BLANKS;
4208 GROW;
4209 }
4210 if ((cur != NULL) && (last != NULL)) {
4211 cur->c2 = last;
4212 }
4213 ctxt->entity = ctxt->input;
4214 NEXT;
4215 if (RAW == '?') {
4216 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4217 NEXT;
4218 } else if (RAW == '*') {
4219 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4220 NEXT;
4221 } else if (RAW == '+') {
4222 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4223 NEXT;
4224 }
4225 return(ret);
4226}
4227
4228/**
4229 * xmlParseElementContentDecl:
4230 * @ctxt: an XML parser context
4231 * @name: the name of the element being defined.
4232 * @result: the Element Content pointer will be stored here if any
4233 *
4234 * parse the declaration for an Element content either Mixed or Children,
4235 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4236 *
4237 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4238 *
4239 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4240 */
4241
4242int
4243xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4244 xmlElementContentPtr *result) {
4245
4246 xmlElementContentPtr tree = NULL;
4247 xmlParserInputPtr input = ctxt->input;
4248 int res;
4249
4250 *result = NULL;
4251
4252 if (RAW != '(') {
4253 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4254 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4255 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004256 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004257 ctxt->wellFormed = 0;
4258 ctxt->disableSAX = 1;
4259 return(-1);
4260 }
4261 NEXT;
4262 GROW;
4263 SKIP_BLANKS;
4264 if ((RAW == '#') && (NXT(1) == 'P') &&
4265 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4266 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4267 (NXT(6) == 'A')) {
4268 tree = xmlParseElementMixedContentDecl(ctxt);
4269 res = XML_ELEMENT_TYPE_MIXED;
4270 } else {
4271 tree = xmlParseElementChildrenContentDecl(ctxt);
4272 res = XML_ELEMENT_TYPE_ELEMENT;
4273 }
4274 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4275 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4277 ctxt->sax->error(ctxt->userData,
4278"Element content declaration doesn't start and stop in the same entity\n");
4279 ctxt->wellFormed = 0;
4280 ctxt->disableSAX = 1;
4281 }
4282 SKIP_BLANKS;
4283 *result = tree;
4284 return(res);
4285}
4286
4287/**
4288 * xmlParseElementDecl:
4289 * @ctxt: an XML parser context
4290 *
4291 * parse an Element declaration.
4292 *
4293 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4294 *
4295 * [ VC: Unique Element Type Declaration ]
4296 * No element type may be declared more than once
4297 *
4298 * Returns the type of the element, or -1 in case of error
4299 */
4300int
4301xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4302 xmlChar *name;
4303 int ret = -1;
4304 xmlElementContentPtr content = NULL;
4305
4306 GROW;
4307 if ((RAW == '<') && (NXT(1) == '!') &&
4308 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4309 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4310 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4311 (NXT(8) == 'T')) {
4312 xmlParserInputPtr input = ctxt->input;
4313
4314 SKIP(9);
4315 if (!IS_BLANK(CUR)) {
4316 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4318 ctxt->sax->error(ctxt->userData,
4319 "Space required after 'ELEMENT'\n");
4320 ctxt->wellFormed = 0;
4321 ctxt->disableSAX = 1;
4322 }
4323 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00004324 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004325 if (name == NULL) {
4326 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4328 ctxt->sax->error(ctxt->userData,
4329 "xmlParseElementDecl: no name for Element\n");
4330 ctxt->wellFormed = 0;
4331 ctxt->disableSAX = 1;
4332 return(-1);
4333 }
4334 while ((RAW == 0) && (ctxt->inputNr > 1))
4335 xmlPopInput(ctxt);
4336 if (!IS_BLANK(CUR)) {
4337 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4339 ctxt->sax->error(ctxt->userData,
4340 "Space required after the element name\n");
4341 ctxt->wellFormed = 0;
4342 ctxt->disableSAX = 1;
4343 }
4344 SKIP_BLANKS;
4345 if ((RAW == 'E') && (NXT(1) == 'M') &&
4346 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4347 (NXT(4) == 'Y')) {
4348 SKIP(5);
4349 /*
4350 * Element must always be empty.
4351 */
4352 ret = XML_ELEMENT_TYPE_EMPTY;
4353 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4354 (NXT(2) == 'Y')) {
4355 SKIP(3);
4356 /*
4357 * Element is a generic container.
4358 */
4359 ret = XML_ELEMENT_TYPE_ANY;
4360 } else if (RAW == '(') {
4361 ret = xmlParseElementContentDecl(ctxt, name, &content);
4362 } else {
4363 /*
4364 * [ WFC: PEs in Internal Subset ] error handling.
4365 */
4366 if ((RAW == '%') && (ctxt->external == 0) &&
4367 (ctxt->inputNr == 1)) {
4368 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4370 ctxt->sax->error(ctxt->userData,
4371 "PEReference: forbidden within markup decl in internal subset\n");
4372 } else {
4373 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4375 ctxt->sax->error(ctxt->userData,
4376 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4377 }
4378 ctxt->wellFormed = 0;
4379 ctxt->disableSAX = 1;
4380 if (name != NULL) xmlFree(name);
4381 return(-1);
4382 }
4383
4384 SKIP_BLANKS;
4385 /*
4386 * Pop-up of finished entities.
4387 */
4388 while ((RAW == 0) && (ctxt->inputNr > 1))
4389 xmlPopInput(ctxt);
4390 SKIP_BLANKS;
4391
4392 if (RAW != '>') {
4393 ctxt->errNo = XML_ERR_GT_REQUIRED;
4394 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4395 ctxt->sax->error(ctxt->userData,
4396 "xmlParseElementDecl: expected '>' at the end\n");
4397 ctxt->wellFormed = 0;
4398 ctxt->disableSAX = 1;
4399 } else {
4400 if (input != ctxt->input) {
4401 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4403 ctxt->sax->error(ctxt->userData,
4404"Element declaration doesn't start and stop in the same entity\n");
4405 ctxt->wellFormed = 0;
4406 ctxt->disableSAX = 1;
4407 }
4408
4409 NEXT;
4410 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4411 (ctxt->sax->elementDecl != NULL))
4412 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4413 content);
4414 }
4415 if (content != NULL) {
4416 xmlFreeElementContent(content);
4417 }
4418 if (name != NULL) {
4419 xmlFree(name);
4420 }
4421 }
4422 return(ret);
4423}
4424
4425/**
4426 * xmlParseMarkupDecl:
4427 * @ctxt: an XML parser context
4428 *
4429 * parse Markup declarations
4430 *
4431 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4432 * NotationDecl | PI | Comment
4433 *
4434 * [ VC: Proper Declaration/PE Nesting ]
4435 * Parameter-entity replacement text must be properly nested with
4436 * markup declarations. That is to say, if either the first character
4437 * or the last character of a markup declaration (markupdecl above) is
4438 * contained in the replacement text for a parameter-entity reference,
4439 * both must be contained in the same replacement text.
4440 *
4441 * [ WFC: PEs in Internal Subset ]
4442 * In the internal DTD subset, parameter-entity references can occur
4443 * only where markup declarations can occur, not within markup declarations.
4444 * (This does not apply to references that occur in external parameter
4445 * entities or to the external subset.)
4446 */
4447void
4448xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4449 GROW;
4450 xmlParseElementDecl(ctxt);
4451 xmlParseAttributeListDecl(ctxt);
4452 xmlParseEntityDecl(ctxt);
4453 xmlParseNotationDecl(ctxt);
4454 xmlParsePI(ctxt);
4455 xmlParseComment(ctxt);
4456 /*
4457 * This is only for internal subset. On external entities,
4458 * the replacement is done before parsing stage
4459 */
4460 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4461 xmlParsePEReference(ctxt);
4462 ctxt->instate = XML_PARSER_DTD;
4463}
4464
4465/**
4466 * xmlParseTextDecl:
4467 * @ctxt: an XML parser context
4468 *
4469 * parse an XML declaration header for external entities
4470 *
4471 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4472 *
4473 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4474 */
4475
4476void
4477xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4478 xmlChar *version;
4479
4480 /*
4481 * We know that '<?xml' is here.
4482 */
4483 if ((RAW == '<') && (NXT(1) == '?') &&
4484 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4485 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4486 SKIP(5);
4487 } else {
4488 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4490 ctxt->sax->error(ctxt->userData,
4491 "Text declaration '<?xml' required\n");
4492 ctxt->wellFormed = 0;
4493 ctxt->disableSAX = 1;
4494
4495 return;
4496 }
4497
4498 if (!IS_BLANK(CUR)) {
4499 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4501 ctxt->sax->error(ctxt->userData,
4502 "Space needed after '<?xml'\n");
4503 ctxt->wellFormed = 0;
4504 ctxt->disableSAX = 1;
4505 }
4506 SKIP_BLANKS;
4507
4508 /*
4509 * We may have the VersionInfo here.
4510 */
4511 version = xmlParseVersionInfo(ctxt);
4512 if (version == NULL)
4513 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4514 ctxt->input->version = version;
4515
4516 /*
4517 * We must have the encoding declaration
4518 */
4519 if (!IS_BLANK(CUR)) {
4520 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4522 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4523 ctxt->wellFormed = 0;
4524 ctxt->disableSAX = 1;
4525 }
4526 xmlParseEncodingDecl(ctxt);
4527 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4528 /*
4529 * The XML REC instructs us to stop parsing right here
4530 */
4531 return;
4532 }
4533
4534 SKIP_BLANKS;
4535 if ((RAW == '?') && (NXT(1) == '>')) {
4536 SKIP(2);
4537 } else if (RAW == '>') {
4538 /* Deprecated old WD ... */
4539 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4540 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4541 ctxt->sax->error(ctxt->userData,
4542 "XML declaration must end-up with '?>'\n");
4543 ctxt->wellFormed = 0;
4544 ctxt->disableSAX = 1;
4545 NEXT;
4546 } else {
4547 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4549 ctxt->sax->error(ctxt->userData,
4550 "parsing XML declaration: '?>' expected\n");
4551 ctxt->wellFormed = 0;
4552 ctxt->disableSAX = 1;
4553 MOVETO_ENDTAG(CUR_PTR);
4554 NEXT;
4555 }
4556}
4557
4558/*
4559 * xmlParseConditionalSections
4560 * @ctxt: an XML parser context
4561 *
4562 * [61] conditionalSect ::= includeSect | ignoreSect
4563 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4564 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4565 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4566 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4567 */
4568
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004569static void
Owen Taylor3473f882001-02-23 17:55:21 +00004570xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4571 SKIP(3);
4572 SKIP_BLANKS;
4573 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4574 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4575 (NXT(6) == 'E')) {
4576 SKIP(7);
4577 SKIP_BLANKS;
4578 if (RAW != '[') {
4579 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4581 ctxt->sax->error(ctxt->userData,
4582 "XML conditional section '[' expected\n");
4583 ctxt->wellFormed = 0;
4584 ctxt->disableSAX = 1;
4585 } else {
4586 NEXT;
4587 }
4588 if (xmlParserDebugEntities) {
4589 if ((ctxt->input != NULL) && (ctxt->input->filename))
4590 xmlGenericError(xmlGenericErrorContext,
4591 "%s(%d): ", ctxt->input->filename,
4592 ctxt->input->line);
4593 xmlGenericError(xmlGenericErrorContext,
4594 "Entering INCLUDE Conditional Section\n");
4595 }
4596
4597 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4598 (NXT(2) != '>'))) {
4599 const xmlChar *check = CUR_PTR;
4600 int cons = ctxt->input->consumed;
4601 int tok = ctxt->token;
4602
4603 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4604 xmlParseConditionalSections(ctxt);
4605 } else if (IS_BLANK(CUR)) {
4606 NEXT;
4607 } else if (RAW == '%') {
4608 xmlParsePEReference(ctxt);
4609 } else
4610 xmlParseMarkupDecl(ctxt);
4611
4612 /*
4613 * Pop-up of finished entities.
4614 */
4615 while ((RAW == 0) && (ctxt->inputNr > 1))
4616 xmlPopInput(ctxt);
4617
4618 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4619 (tok == ctxt->token)) {
4620 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4622 ctxt->sax->error(ctxt->userData,
4623 "Content error in the external subset\n");
4624 ctxt->wellFormed = 0;
4625 ctxt->disableSAX = 1;
4626 break;
4627 }
4628 }
4629 if (xmlParserDebugEntities) {
4630 if ((ctxt->input != NULL) && (ctxt->input->filename))
4631 xmlGenericError(xmlGenericErrorContext,
4632 "%s(%d): ", ctxt->input->filename,
4633 ctxt->input->line);
4634 xmlGenericError(xmlGenericErrorContext,
4635 "Leaving INCLUDE Conditional Section\n");
4636 }
4637
4638 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4639 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4640 int state;
4641 int instate;
4642 int depth = 0;
4643
4644 SKIP(6);
4645 SKIP_BLANKS;
4646 if (RAW != '[') {
4647 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4649 ctxt->sax->error(ctxt->userData,
4650 "XML conditional section '[' expected\n");
4651 ctxt->wellFormed = 0;
4652 ctxt->disableSAX = 1;
4653 } else {
4654 NEXT;
4655 }
4656 if (xmlParserDebugEntities) {
4657 if ((ctxt->input != NULL) && (ctxt->input->filename))
4658 xmlGenericError(xmlGenericErrorContext,
4659 "%s(%d): ", ctxt->input->filename,
4660 ctxt->input->line);
4661 xmlGenericError(xmlGenericErrorContext,
4662 "Entering IGNORE Conditional Section\n");
4663 }
4664
4665 /*
4666 * Parse up to the end of the conditionnal section
4667 * But disable SAX event generating DTD building in the meantime
4668 */
4669 state = ctxt->disableSAX;
4670 instate = ctxt->instate;
4671 ctxt->disableSAX = 1;
4672 ctxt->instate = XML_PARSER_IGNORE;
4673
4674 while (depth >= 0) {
4675 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4676 depth++;
4677 SKIP(3);
4678 continue;
4679 }
4680 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4681 if (--depth >= 0) SKIP(3);
4682 continue;
4683 }
4684 NEXT;
4685 continue;
4686 }
4687
4688 ctxt->disableSAX = state;
4689 ctxt->instate = instate;
4690
4691 if (xmlParserDebugEntities) {
4692 if ((ctxt->input != NULL) && (ctxt->input->filename))
4693 xmlGenericError(xmlGenericErrorContext,
4694 "%s(%d): ", ctxt->input->filename,
4695 ctxt->input->line);
4696 xmlGenericError(xmlGenericErrorContext,
4697 "Leaving IGNORE Conditional Section\n");
4698 }
4699
4700 } else {
4701 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4703 ctxt->sax->error(ctxt->userData,
4704 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4705 ctxt->wellFormed = 0;
4706 ctxt->disableSAX = 1;
4707 }
4708
4709 if (RAW == 0)
4710 SHRINK;
4711
4712 if (RAW == 0) {
4713 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4715 ctxt->sax->error(ctxt->userData,
4716 "XML conditional section not closed\n");
4717 ctxt->wellFormed = 0;
4718 ctxt->disableSAX = 1;
4719 } else {
4720 SKIP(3);
4721 }
4722}
4723
4724/**
4725 * xmlParseExternalSubset:
4726 * @ctxt: an XML parser context
4727 * @ExternalID: the external identifier
4728 * @SystemID: the system identifier (or URL)
4729 *
4730 * parse Markup declarations from an external subset
4731 *
4732 * [30] extSubset ::= textDecl? extSubsetDecl
4733 *
4734 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4735 */
4736void
4737xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4738 const xmlChar *SystemID) {
4739 GROW;
4740 if ((RAW == '<') && (NXT(1) == '?') &&
4741 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4742 (NXT(4) == 'l')) {
4743 xmlParseTextDecl(ctxt);
4744 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4745 /*
4746 * The XML REC instructs us to stop parsing right here
4747 */
4748 ctxt->instate = XML_PARSER_EOF;
4749 return;
4750 }
4751 }
4752 if (ctxt->myDoc == NULL) {
4753 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4754 }
4755 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4756 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4757
4758 ctxt->instate = XML_PARSER_DTD;
4759 ctxt->external = 1;
4760 while (((RAW == '<') && (NXT(1) == '?')) ||
4761 ((RAW == '<') && (NXT(1) == '!')) ||
4762 IS_BLANK(CUR)) {
4763 const xmlChar *check = CUR_PTR;
4764 int cons = ctxt->input->consumed;
4765 int tok = ctxt->token;
4766
4767 GROW;
4768 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4769 xmlParseConditionalSections(ctxt);
4770 } else if (IS_BLANK(CUR)) {
4771 NEXT;
4772 } else if (RAW == '%') {
4773 xmlParsePEReference(ctxt);
4774 } else
4775 xmlParseMarkupDecl(ctxt);
4776
4777 /*
4778 * Pop-up of finished entities.
4779 */
4780 while ((RAW == 0) && (ctxt->inputNr > 1))
4781 xmlPopInput(ctxt);
4782
4783 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4784 (tok == ctxt->token)) {
4785 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4787 ctxt->sax->error(ctxt->userData,
4788 "Content error in the external subset\n");
4789 ctxt->wellFormed = 0;
4790 ctxt->disableSAX = 1;
4791 break;
4792 }
4793 }
4794
4795 if (RAW != 0) {
4796 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4798 ctxt->sax->error(ctxt->userData,
4799 "Extra content at the end of the document\n");
4800 ctxt->wellFormed = 0;
4801 ctxt->disableSAX = 1;
4802 }
4803
4804}
4805
4806/**
4807 * xmlParseReference:
4808 * @ctxt: an XML parser context
4809 *
4810 * parse and handle entity references in content, depending on the SAX
4811 * interface, this may end-up in a call to character() if this is a
4812 * CharRef, a predefined entity, if there is no reference() callback.
4813 * or if the parser was asked to switch to that mode.
4814 *
4815 * [67] Reference ::= EntityRef | CharRef
4816 */
4817void
4818xmlParseReference(xmlParserCtxtPtr ctxt) {
4819 xmlEntityPtr ent;
4820 xmlChar *val;
4821 if (RAW != '&') return;
4822
4823 if (NXT(1) == '#') {
4824 int i = 0;
4825 xmlChar out[10];
4826 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004827 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004828
4829 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4830 /*
4831 * So we are using non-UTF-8 buffers
4832 * Check that the char fit on 8bits, if not
4833 * generate a CharRef.
4834 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004835 if (value <= 0xFF) {
4836 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004837 out[1] = 0;
4838 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4839 (!ctxt->disableSAX))
4840 ctxt->sax->characters(ctxt->userData, out, 1);
4841 } else {
4842 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004843 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004844 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004845 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004846 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4847 (!ctxt->disableSAX))
4848 ctxt->sax->reference(ctxt->userData, out);
4849 }
4850 } else {
4851 /*
4852 * Just encode the value in UTF-8
4853 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004854 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004855 out[i] = 0;
4856 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4857 (!ctxt->disableSAX))
4858 ctxt->sax->characters(ctxt->userData, out, i);
4859 }
4860 } else {
4861 ent = xmlParseEntityRef(ctxt);
4862 if (ent == NULL) return;
4863 if ((ent->name != NULL) &&
4864 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4865 xmlNodePtr list = NULL;
4866 int ret;
4867
4868
4869 /*
4870 * The first reference to the entity trigger a parsing phase
4871 * where the ent->children is filled with the result from
4872 * the parsing.
4873 */
4874 if (ent->children == NULL) {
4875 xmlChar *value;
4876 value = ent->content;
4877
4878 /*
4879 * Check that this entity is well formed
4880 */
4881 if ((value != NULL) &&
4882 (value[1] == 0) && (value[0] == '<') &&
4883 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4884 /*
4885 * DONE: get definite answer on this !!!
4886 * Lots of entity decls are used to declare a single
4887 * char
4888 * <!ENTITY lt "<">
4889 * Which seems to be valid since
4890 * 2.4: The ampersand character (&) and the left angle
4891 * bracket (<) may appear in their literal form only
4892 * when used ... They are also legal within the literal
4893 * entity value of an internal entity declaration;i
4894 * see "4.3.2 Well-Formed Parsed Entities".
4895 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4896 * Looking at the OASIS test suite and James Clark
4897 * tests, this is broken. However the XML REC uses
4898 * it. Is the XML REC not well-formed ????
4899 * This is a hack to avoid this problem
4900 *
4901 * ANSWER: since lt gt amp .. are already defined,
4902 * this is a redefinition and hence the fact that the
4903 * contentis not well balanced is not a Wf error, this
4904 * is lousy but acceptable.
4905 */
4906 list = xmlNewDocText(ctxt->myDoc, value);
4907 if (list != NULL) {
4908 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4909 (ent->children == NULL)) {
4910 ent->children = list;
4911 ent->last = list;
4912 list->parent = (xmlNodePtr) ent;
4913 } else {
4914 xmlFreeNodeList(list);
4915 }
4916 } else if (list != NULL) {
4917 xmlFreeNodeList(list);
4918 }
4919 } else {
4920 /*
4921 * 4.3.2: An internal general parsed entity is well-formed
4922 * if its replacement text matches the production labeled
4923 * content.
4924 */
4925 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4926 ctxt->depth++;
4927 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4928 ctxt->sax, NULL, ctxt->depth,
4929 value, &list);
4930 ctxt->depth--;
4931 } else if (ent->etype ==
4932 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4933 ctxt->depth++;
4934 ret = xmlParseExternalEntity(ctxt->myDoc,
4935 ctxt->sax, NULL, ctxt->depth,
4936 ent->URI, ent->ExternalID, &list);
4937 ctxt->depth--;
4938 } else {
4939 ret = -1;
4940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4941 ctxt->sax->error(ctxt->userData,
4942 "Internal: invalid entity type\n");
4943 }
4944 if (ret == XML_ERR_ENTITY_LOOP) {
4945 ctxt->errNo = XML_ERR_ENTITY_LOOP;
4946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4947 ctxt->sax->error(ctxt->userData,
4948 "Detected entity reference loop\n");
4949 ctxt->wellFormed = 0;
4950 ctxt->disableSAX = 1;
4951 } else if ((ret == 0) && (list != NULL)) {
4952 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4953 (ent->children == NULL)) {
4954 ent->children = list;
4955 while (list != NULL) {
4956 list->parent = (xmlNodePtr) ent;
4957 if (list->next == NULL)
4958 ent->last = list;
4959 list = list->next;
4960 }
4961 } else {
4962 xmlFreeNodeList(list);
4963 }
4964 } else if (ret > 0) {
4965 ctxt->errNo = ret;
4966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4967 ctxt->sax->error(ctxt->userData,
4968 "Entity value required\n");
4969 ctxt->wellFormed = 0;
4970 ctxt->disableSAX = 1;
4971 } else if (list != NULL) {
4972 xmlFreeNodeList(list);
4973 }
4974 }
4975 }
4976 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4977 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
4978 /*
4979 * Create a node.
4980 */
4981 ctxt->sax->reference(ctxt->userData, ent->name);
4982 return;
4983 } else if (ctxt->replaceEntities) {
4984 if ((ctxt->node != NULL) && (ent->children != NULL)) {
4985 /*
4986 * Seems we are generating the DOM content, do
4987 * a simple tree copy
4988 */
4989 xmlNodePtr new;
4990 new = xmlCopyNodeList(ent->children);
4991
4992 xmlAddChildList(ctxt->node, new);
4993 /*
4994 * This is to avoid a nasty side effect, see
4995 * characters() in SAX.c
4996 */
4997 ctxt->nodemem = 0;
4998 ctxt->nodelen = 0;
4999 return;
5000 } else {
5001 /*
5002 * Probably running in SAX mode
5003 */
5004 xmlParserInputPtr input;
5005
5006 input = xmlNewEntityInputStream(ctxt, ent);
5007 xmlPushInput(ctxt, input);
5008 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5009 (RAW == '<') && (NXT(1) == '?') &&
5010 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5011 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5012 xmlParseTextDecl(ctxt);
5013 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5014 /*
5015 * The XML REC instructs us to stop parsing right here
5016 */
5017 ctxt->instate = XML_PARSER_EOF;
5018 return;
5019 }
5020 if (input->standalone == 1) {
5021 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5023 ctxt->sax->error(ctxt->userData,
5024 "external parsed entities cannot be standalone\n");
5025 ctxt->wellFormed = 0;
5026 ctxt->disableSAX = 1;
5027 }
5028 }
5029 return;
5030 }
5031 }
5032 } else {
5033 val = ent->content;
5034 if (val == NULL) return;
5035 /*
5036 * inline the entity.
5037 */
5038 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5039 (!ctxt->disableSAX))
5040 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5041 }
5042 }
5043}
5044
5045/**
5046 * xmlParseEntityRef:
5047 * @ctxt: an XML parser context
5048 *
5049 * parse ENTITY references declarations
5050 *
5051 * [68] EntityRef ::= '&' Name ';'
5052 *
5053 * [ WFC: Entity Declared ]
5054 * In a document without any DTD, a document with only an internal DTD
5055 * subset which contains no parameter entity references, or a document
5056 * with "standalone='yes'", the Name given in the entity reference
5057 * must match that in an entity declaration, except that well-formed
5058 * documents need not declare any of the following entities: amp, lt,
5059 * gt, apos, quot. The declaration of a parameter entity must precede
5060 * any reference to it. Similarly, the declaration of a general entity
5061 * must precede any reference to it which appears in a default value in an
5062 * attribute-list declaration. Note that if entities are declared in the
5063 * external subset or in external parameter entities, a non-validating
5064 * processor is not obligated to read and process their declarations;
5065 * for such documents, the rule that an entity must be declared is a
5066 * well-formedness constraint only if standalone='yes'.
5067 *
5068 * [ WFC: Parsed Entity ]
5069 * An entity reference must not contain the name of an unparsed entity
5070 *
5071 * Returns the xmlEntityPtr if found, or NULL otherwise.
5072 */
5073xmlEntityPtr
5074xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5075 xmlChar *name;
5076 xmlEntityPtr ent = NULL;
5077
5078 GROW;
5079
5080 if (RAW == '&') {
5081 NEXT;
5082 name = xmlParseName(ctxt);
5083 if (name == NULL) {
5084 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5086 ctxt->sax->error(ctxt->userData,
5087 "xmlParseEntityRef: no name\n");
5088 ctxt->wellFormed = 0;
5089 ctxt->disableSAX = 1;
5090 } else {
5091 if (RAW == ';') {
5092 NEXT;
5093 /*
5094 * Ask first SAX for entity resolution, otherwise try the
5095 * predefined set.
5096 */
5097 if (ctxt->sax != NULL) {
5098 if (ctxt->sax->getEntity != NULL)
5099 ent = ctxt->sax->getEntity(ctxt->userData, name);
5100 if (ent == NULL)
5101 ent = xmlGetPredefinedEntity(name);
5102 }
5103 /*
5104 * [ WFC: Entity Declared ]
5105 * In a document without any DTD, a document with only an
5106 * internal DTD subset which contains no parameter entity
5107 * references, or a document with "standalone='yes'", the
5108 * Name given in the entity reference must match that in an
5109 * entity declaration, except that well-formed documents
5110 * need not declare any of the following entities: amp, lt,
5111 * gt, apos, quot.
5112 * The declaration of a parameter entity must precede any
5113 * reference to it.
5114 * Similarly, the declaration of a general entity must
5115 * precede any reference to it which appears in a default
5116 * value in an attribute-list declaration. Note that if
5117 * entities are declared in the external subset or in
5118 * external parameter entities, a non-validating processor
5119 * is not obligated to read and process their declarations;
5120 * for such documents, the rule that an entity must be
5121 * declared is a well-formedness constraint only if
5122 * standalone='yes'.
5123 */
5124 if (ent == NULL) {
5125 if ((ctxt->standalone == 1) ||
5126 ((ctxt->hasExternalSubset == 0) &&
5127 (ctxt->hasPErefs == 0))) {
5128 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5130 ctxt->sax->error(ctxt->userData,
5131 "Entity '%s' not defined\n", name);
5132 ctxt->wellFormed = 0;
5133 ctxt->disableSAX = 1;
5134 } else {
5135 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5136 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5137 ctxt->sax->warning(ctxt->userData,
5138 "Entity '%s' not defined\n", name);
5139 }
5140 }
5141
5142 /*
5143 * [ WFC: Parsed Entity ]
5144 * An entity reference must not contain the name of an
5145 * unparsed entity
5146 */
5147 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5148 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5150 ctxt->sax->error(ctxt->userData,
5151 "Entity reference to unparsed entity %s\n", name);
5152 ctxt->wellFormed = 0;
5153 ctxt->disableSAX = 1;
5154 }
5155
5156 /*
5157 * [ WFC: No External Entity References ]
5158 * Attribute values cannot contain direct or indirect
5159 * entity references to external entities.
5160 */
5161 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5162 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5163 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5165 ctxt->sax->error(ctxt->userData,
5166 "Attribute references external entity '%s'\n", name);
5167 ctxt->wellFormed = 0;
5168 ctxt->disableSAX = 1;
5169 }
5170 /*
5171 * [ WFC: No < in Attribute Values ]
5172 * The replacement text of any entity referred to directly or
5173 * indirectly in an attribute value (other than "&lt;") must
5174 * not contain a <.
5175 */
5176 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5177 (ent != NULL) &&
5178 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5179 (ent->content != NULL) &&
5180 (xmlStrchr(ent->content, '<'))) {
5181 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5183 ctxt->sax->error(ctxt->userData,
5184 "'<' in entity '%s' is not allowed in attributes values\n", name);
5185 ctxt->wellFormed = 0;
5186 ctxt->disableSAX = 1;
5187 }
5188
5189 /*
5190 * Internal check, no parameter entities here ...
5191 */
5192 else {
5193 switch (ent->etype) {
5194 case XML_INTERNAL_PARAMETER_ENTITY:
5195 case XML_EXTERNAL_PARAMETER_ENTITY:
5196 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5198 ctxt->sax->error(ctxt->userData,
5199 "Attempt to reference the parameter entity '%s'\n", name);
5200 ctxt->wellFormed = 0;
5201 ctxt->disableSAX = 1;
5202 break;
5203 default:
5204 break;
5205 }
5206 }
5207
5208 /*
5209 * [ WFC: No Recursion ]
5210 * A parsed entity must not contain a recursive reference
5211 * to itself, either directly or indirectly.
5212 * Done somewhere else
5213 */
5214
5215 } else {
5216 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5217 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5218 ctxt->sax->error(ctxt->userData,
5219 "xmlParseEntityRef: expecting ';'\n");
5220 ctxt->wellFormed = 0;
5221 ctxt->disableSAX = 1;
5222 }
5223 xmlFree(name);
5224 }
5225 }
5226 return(ent);
5227}
5228
5229/**
5230 * xmlParseStringEntityRef:
5231 * @ctxt: an XML parser context
5232 * @str: a pointer to an index in the string
5233 *
5234 * parse ENTITY references declarations, but this version parses it from
5235 * a string value.
5236 *
5237 * [68] EntityRef ::= '&' Name ';'
5238 *
5239 * [ WFC: Entity Declared ]
5240 * In a document without any DTD, a document with only an internal DTD
5241 * subset which contains no parameter entity references, or a document
5242 * with "standalone='yes'", the Name given in the entity reference
5243 * must match that in an entity declaration, except that well-formed
5244 * documents need not declare any of the following entities: amp, lt,
5245 * gt, apos, quot. The declaration of a parameter entity must precede
5246 * any reference to it. Similarly, the declaration of a general entity
5247 * must precede any reference to it which appears in a default value in an
5248 * attribute-list declaration. Note that if entities are declared in the
5249 * external subset or in external parameter entities, a non-validating
5250 * processor is not obligated to read and process their declarations;
5251 * for such documents, the rule that an entity must be declared is a
5252 * well-formedness constraint only if standalone='yes'.
5253 *
5254 * [ WFC: Parsed Entity ]
5255 * An entity reference must not contain the name of an unparsed entity
5256 *
5257 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5258 * is updated to the current location in the string.
5259 */
5260xmlEntityPtr
5261xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5262 xmlChar *name;
5263 const xmlChar *ptr;
5264 xmlChar cur;
5265 xmlEntityPtr ent = NULL;
5266
5267 if ((str == NULL) || (*str == NULL))
5268 return(NULL);
5269 ptr = *str;
5270 cur = *ptr;
5271 if (cur == '&') {
5272 ptr++;
5273 cur = *ptr;
5274 name = xmlParseStringName(ctxt, &ptr);
5275 if (name == NULL) {
5276 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5277 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5278 ctxt->sax->error(ctxt->userData,
5279 "xmlParseEntityRef: no name\n");
5280 ctxt->wellFormed = 0;
5281 ctxt->disableSAX = 1;
5282 } else {
5283 if (*ptr == ';') {
5284 ptr++;
5285 /*
5286 * Ask first SAX for entity resolution, otherwise try the
5287 * predefined set.
5288 */
5289 if (ctxt->sax != NULL) {
5290 if (ctxt->sax->getEntity != NULL)
5291 ent = ctxt->sax->getEntity(ctxt->userData, name);
5292 if (ent == NULL)
5293 ent = xmlGetPredefinedEntity(name);
5294 }
5295 /*
5296 * [ WFC: Entity Declared ]
5297 * In a document without any DTD, a document with only an
5298 * internal DTD subset which contains no parameter entity
5299 * references, or a document with "standalone='yes'", the
5300 * Name given in the entity reference must match that in an
5301 * entity declaration, except that well-formed documents
5302 * need not declare any of the following entities: amp, lt,
5303 * gt, apos, quot.
5304 * The declaration of a parameter entity must precede any
5305 * reference to it.
5306 * Similarly, the declaration of a general entity must
5307 * precede any reference to it which appears in a default
5308 * value in an attribute-list declaration. Note that if
5309 * entities are declared in the external subset or in
5310 * external parameter entities, a non-validating processor
5311 * is not obligated to read and process their declarations;
5312 * for such documents, the rule that an entity must be
5313 * declared is a well-formedness constraint only if
5314 * standalone='yes'.
5315 */
5316 if (ent == NULL) {
5317 if ((ctxt->standalone == 1) ||
5318 ((ctxt->hasExternalSubset == 0) &&
5319 (ctxt->hasPErefs == 0))) {
5320 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5322 ctxt->sax->error(ctxt->userData,
5323 "Entity '%s' not defined\n", name);
5324 ctxt->wellFormed = 0;
5325 ctxt->disableSAX = 1;
5326 } else {
5327 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5328 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5329 ctxt->sax->warning(ctxt->userData,
5330 "Entity '%s' not defined\n", name);
5331 }
5332 }
5333
5334 /*
5335 * [ WFC: Parsed Entity ]
5336 * An entity reference must not contain the name of an
5337 * unparsed entity
5338 */
5339 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5340 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5342 ctxt->sax->error(ctxt->userData,
5343 "Entity reference to unparsed entity %s\n", name);
5344 ctxt->wellFormed = 0;
5345 ctxt->disableSAX = 1;
5346 }
5347
5348 /*
5349 * [ WFC: No External Entity References ]
5350 * Attribute values cannot contain direct or indirect
5351 * entity references to external entities.
5352 */
5353 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5354 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5355 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5357 ctxt->sax->error(ctxt->userData,
5358 "Attribute references external entity '%s'\n", name);
5359 ctxt->wellFormed = 0;
5360 ctxt->disableSAX = 1;
5361 }
5362 /*
5363 * [ WFC: No < in Attribute Values ]
5364 * The replacement text of any entity referred to directly or
5365 * indirectly in an attribute value (other than "&lt;") must
5366 * not contain a <.
5367 */
5368 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5369 (ent != NULL) &&
5370 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5371 (ent->content != NULL) &&
5372 (xmlStrchr(ent->content, '<'))) {
5373 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5375 ctxt->sax->error(ctxt->userData,
5376 "'<' in entity '%s' is not allowed in attributes values\n", name);
5377 ctxt->wellFormed = 0;
5378 ctxt->disableSAX = 1;
5379 }
5380
5381 /*
5382 * Internal check, no parameter entities here ...
5383 */
5384 else {
5385 switch (ent->etype) {
5386 case XML_INTERNAL_PARAMETER_ENTITY:
5387 case XML_EXTERNAL_PARAMETER_ENTITY:
5388 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5389 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5390 ctxt->sax->error(ctxt->userData,
5391 "Attempt to reference the parameter entity '%s'\n", name);
5392 ctxt->wellFormed = 0;
5393 ctxt->disableSAX = 1;
5394 break;
5395 default:
5396 break;
5397 }
5398 }
5399
5400 /*
5401 * [ WFC: No Recursion ]
5402 * A parsed entity must not contain a recursive reference
5403 * to itself, either directly or indirectly.
5404 * Done somewhwere else
5405 */
5406
5407 } else {
5408 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5410 ctxt->sax->error(ctxt->userData,
5411 "xmlParseEntityRef: expecting ';'\n");
5412 ctxt->wellFormed = 0;
5413 ctxt->disableSAX = 1;
5414 }
5415 xmlFree(name);
5416 }
5417 }
5418 *str = ptr;
5419 return(ent);
5420}
5421
5422/**
5423 * xmlParsePEReference:
5424 * @ctxt: an XML parser context
5425 *
5426 * parse PEReference declarations
5427 * The entity content is handled directly by pushing it's content as
5428 * a new input stream.
5429 *
5430 * [69] PEReference ::= '%' Name ';'
5431 *
5432 * [ WFC: No Recursion ]
5433 * A parsed entity must not contain a recursive
5434 * reference to itself, either directly or indirectly.
5435 *
5436 * [ WFC: Entity Declared ]
5437 * In a document without any DTD, a document with only an internal DTD
5438 * subset which contains no parameter entity references, or a document
5439 * with "standalone='yes'", ... ... The declaration of a parameter
5440 * entity must precede any reference to it...
5441 *
5442 * [ VC: Entity Declared ]
5443 * In a document with an external subset or external parameter entities
5444 * with "standalone='no'", ... ... The declaration of a parameter entity
5445 * must precede any reference to it...
5446 *
5447 * [ WFC: In DTD ]
5448 * Parameter-entity references may only appear in the DTD.
5449 * NOTE: misleading but this is handled.
5450 */
5451void
5452xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5453 xmlChar *name;
5454 xmlEntityPtr entity = NULL;
5455 xmlParserInputPtr input;
5456
5457 if (RAW == '%') {
5458 NEXT;
Daniel Veillard29631a82001-03-05 09:49:20 +00005459 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005460 if (name == NULL) {
5461 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5463 ctxt->sax->error(ctxt->userData,
5464 "xmlParsePEReference: no name\n");
5465 ctxt->wellFormed = 0;
5466 ctxt->disableSAX = 1;
5467 } else {
5468 if (RAW == ';') {
5469 NEXT;
5470 if ((ctxt->sax != NULL) &&
5471 (ctxt->sax->getParameterEntity != NULL))
5472 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5473 name);
5474 if (entity == NULL) {
5475 /*
5476 * [ WFC: Entity Declared ]
5477 * In a document without any DTD, a document with only an
5478 * internal DTD subset which contains no parameter entity
5479 * references, or a document with "standalone='yes'", ...
5480 * ... The declaration of a parameter entity must precede
5481 * any reference to it...
5482 */
5483 if ((ctxt->standalone == 1) ||
5484 ((ctxt->hasExternalSubset == 0) &&
5485 (ctxt->hasPErefs == 0))) {
5486 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5487 if ((!ctxt->disableSAX) &&
5488 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5489 ctxt->sax->error(ctxt->userData,
5490 "PEReference: %%%s; not found\n", name);
5491 ctxt->wellFormed = 0;
5492 ctxt->disableSAX = 1;
5493 } else {
5494 /*
5495 * [ VC: Entity Declared ]
5496 * In a document with an external subset or external
5497 * parameter entities with "standalone='no'", ...
5498 * ... The declaration of a parameter entity must precede
5499 * any reference to it...
5500 */
5501 if ((!ctxt->disableSAX) &&
5502 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5503 ctxt->sax->warning(ctxt->userData,
5504 "PEReference: %%%s; not found\n", name);
5505 ctxt->valid = 0;
5506 }
5507 } else {
5508 /*
5509 * Internal checking in case the entity quest barfed
5510 */
5511 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5512 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5513 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5514 ctxt->sax->warning(ctxt->userData,
5515 "Internal: %%%s; is not a parameter entity\n", name);
5516 } else {
5517 /*
5518 * TODO !!!
5519 * handle the extra spaces added before and after
5520 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5521 */
5522 input = xmlNewEntityInputStream(ctxt, entity);
5523 xmlPushInput(ctxt, input);
5524 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5525 (RAW == '<') && (NXT(1) == '?') &&
5526 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5527 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5528 xmlParseTextDecl(ctxt);
5529 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5530 /*
5531 * The XML REC instructs us to stop parsing
5532 * right here
5533 */
5534 ctxt->instate = XML_PARSER_EOF;
5535 xmlFree(name);
5536 return;
5537 }
5538 }
5539 if (ctxt->token == 0)
5540 ctxt->token = ' ';
5541 }
5542 }
5543 ctxt->hasPErefs = 1;
5544 } else {
5545 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5547 ctxt->sax->error(ctxt->userData,
5548 "xmlParsePEReference: expecting ';'\n");
5549 ctxt->wellFormed = 0;
5550 ctxt->disableSAX = 1;
5551 }
5552 xmlFree(name);
5553 }
5554 }
5555}
5556
5557/**
5558 * xmlParseStringPEReference:
5559 * @ctxt: an XML parser context
5560 * @str: a pointer to an index in the string
5561 *
5562 * parse PEReference declarations
5563 *
5564 * [69] PEReference ::= '%' Name ';'
5565 *
5566 * [ WFC: No Recursion ]
5567 * A parsed entity must not contain a recursive
5568 * reference to itself, either directly or indirectly.
5569 *
5570 * [ WFC: Entity Declared ]
5571 * In a document without any DTD, a document with only an internal DTD
5572 * subset which contains no parameter entity references, or a document
5573 * with "standalone='yes'", ... ... The declaration of a parameter
5574 * entity must precede any reference to it...
5575 *
5576 * [ VC: Entity Declared ]
5577 * In a document with an external subset or external parameter entities
5578 * with "standalone='no'", ... ... The declaration of a parameter entity
5579 * must precede any reference to it...
5580 *
5581 * [ WFC: In DTD ]
5582 * Parameter-entity references may only appear in the DTD.
5583 * NOTE: misleading but this is handled.
5584 *
5585 * Returns the string of the entity content.
5586 * str is updated to the current value of the index
5587 */
5588xmlEntityPtr
5589xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5590 const xmlChar *ptr;
5591 xmlChar cur;
5592 xmlChar *name;
5593 xmlEntityPtr entity = NULL;
5594
5595 if ((str == NULL) || (*str == NULL)) return(NULL);
5596 ptr = *str;
5597 cur = *ptr;
5598 if (cur == '%') {
5599 ptr++;
5600 cur = *ptr;
5601 name = xmlParseStringName(ctxt, &ptr);
5602 if (name == NULL) {
5603 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5605 ctxt->sax->error(ctxt->userData,
5606 "xmlParseStringPEReference: no name\n");
5607 ctxt->wellFormed = 0;
5608 ctxt->disableSAX = 1;
5609 } else {
5610 cur = *ptr;
5611 if (cur == ';') {
5612 ptr++;
5613 cur = *ptr;
5614 if ((ctxt->sax != NULL) &&
5615 (ctxt->sax->getParameterEntity != NULL))
5616 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5617 name);
5618 if (entity == NULL) {
5619 /*
5620 * [ WFC: Entity Declared ]
5621 * In a document without any DTD, a document with only an
5622 * internal DTD subset which contains no parameter entity
5623 * references, or a document with "standalone='yes'", ...
5624 * ... The declaration of a parameter entity must precede
5625 * any reference to it...
5626 */
5627 if ((ctxt->standalone == 1) ||
5628 ((ctxt->hasExternalSubset == 0) &&
5629 (ctxt->hasPErefs == 0))) {
5630 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5632 ctxt->sax->error(ctxt->userData,
5633 "PEReference: %%%s; not found\n", name);
5634 ctxt->wellFormed = 0;
5635 ctxt->disableSAX = 1;
5636 } else {
5637 /*
5638 * [ VC: Entity Declared ]
5639 * In a document with an external subset or external
5640 * parameter entities with "standalone='no'", ...
5641 * ... The declaration of a parameter entity must
5642 * precede any reference to it...
5643 */
5644 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5645 ctxt->sax->warning(ctxt->userData,
5646 "PEReference: %%%s; not found\n", name);
5647 ctxt->valid = 0;
5648 }
5649 } else {
5650 /*
5651 * Internal checking in case the entity quest barfed
5652 */
5653 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5654 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5655 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5656 ctxt->sax->warning(ctxt->userData,
5657 "Internal: %%%s; is not a parameter entity\n", name);
5658 }
5659 }
5660 ctxt->hasPErefs = 1;
5661 } else {
5662 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5664 ctxt->sax->error(ctxt->userData,
5665 "xmlParseStringPEReference: expecting ';'\n");
5666 ctxt->wellFormed = 0;
5667 ctxt->disableSAX = 1;
5668 }
5669 xmlFree(name);
5670 }
5671 }
5672 *str = ptr;
5673 return(entity);
5674}
5675
5676/**
5677 * xmlParseDocTypeDecl:
5678 * @ctxt: an XML parser context
5679 *
5680 * parse a DOCTYPE declaration
5681 *
5682 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5683 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5684 *
5685 * [ VC: Root Element Type ]
5686 * The Name in the document type declaration must match the element
5687 * type of the root element.
5688 */
5689
5690void
5691xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5692 xmlChar *name = NULL;
5693 xmlChar *ExternalID = NULL;
5694 xmlChar *URI = NULL;
5695
5696 /*
5697 * We know that '<!DOCTYPE' has been detected.
5698 */
5699 SKIP(9);
5700
5701 SKIP_BLANKS;
5702
5703 /*
5704 * Parse the DOCTYPE name.
5705 */
5706 name = xmlParseName(ctxt);
5707 if (name == NULL) {
5708 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5710 ctxt->sax->error(ctxt->userData,
5711 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5712 ctxt->wellFormed = 0;
5713 ctxt->disableSAX = 1;
5714 }
5715 ctxt->intSubName = name;
5716
5717 SKIP_BLANKS;
5718
5719 /*
5720 * Check for SystemID and ExternalID
5721 */
5722 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5723
5724 if ((URI != NULL) || (ExternalID != NULL)) {
5725 ctxt->hasExternalSubset = 1;
5726 }
5727 ctxt->extSubURI = URI;
5728 ctxt->extSubSystem = ExternalID;
5729
5730 SKIP_BLANKS;
5731
5732 /*
5733 * Create and update the internal subset.
5734 */
5735 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5736 (!ctxt->disableSAX))
5737 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5738
5739 /*
5740 * Is there any internal subset declarations ?
5741 * they are handled separately in xmlParseInternalSubset()
5742 */
5743 if (RAW == '[')
5744 return;
5745
5746 /*
5747 * We should be at the end of the DOCTYPE declaration.
5748 */
5749 if (RAW != '>') {
5750 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5752 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5753 ctxt->wellFormed = 0;
5754 ctxt->disableSAX = 1;
5755 }
5756 NEXT;
5757}
5758
5759/**
5760 * xmlParseInternalsubset:
5761 * @ctxt: an XML parser context
5762 *
5763 * parse the internal subset declaration
5764 *
5765 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5766 */
5767
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005768static void
Owen Taylor3473f882001-02-23 17:55:21 +00005769xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5770 /*
5771 * Is there any DTD definition ?
5772 */
5773 if (RAW == '[') {
5774 ctxt->instate = XML_PARSER_DTD;
5775 NEXT;
5776 /*
5777 * Parse the succession of Markup declarations and
5778 * PEReferences.
5779 * Subsequence (markupdecl | PEReference | S)*
5780 */
5781 while (RAW != ']') {
5782 const xmlChar *check = CUR_PTR;
5783 int cons = ctxt->input->consumed;
5784
5785 SKIP_BLANKS;
5786 xmlParseMarkupDecl(ctxt);
5787 xmlParsePEReference(ctxt);
5788
5789 /*
5790 * Pop-up of finished entities.
5791 */
5792 while ((RAW == 0) && (ctxt->inputNr > 1))
5793 xmlPopInput(ctxt);
5794
5795 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5796 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5798 ctxt->sax->error(ctxt->userData,
5799 "xmlParseInternalSubset: error detected in Markup declaration\n");
5800 ctxt->wellFormed = 0;
5801 ctxt->disableSAX = 1;
5802 break;
5803 }
5804 }
5805 if (RAW == ']') {
5806 NEXT;
5807 SKIP_BLANKS;
5808 }
5809 }
5810
5811 /*
5812 * We should be at the end of the DOCTYPE declaration.
5813 */
5814 if (RAW != '>') {
5815 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5817 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5818 ctxt->wellFormed = 0;
5819 ctxt->disableSAX = 1;
5820 }
5821 NEXT;
5822}
5823
5824/**
5825 * xmlParseAttribute:
5826 * @ctxt: an XML parser context
5827 * @value: a xmlChar ** used to store the value of the attribute
5828 *
5829 * parse an attribute
5830 *
5831 * [41] Attribute ::= Name Eq AttValue
5832 *
5833 * [ WFC: No External Entity References ]
5834 * Attribute values cannot contain direct or indirect entity references
5835 * to external entities.
5836 *
5837 * [ WFC: No < in Attribute Values ]
5838 * The replacement text of any entity referred to directly or indirectly in
5839 * an attribute value (other than "&lt;") must not contain a <.
5840 *
5841 * [ VC: Attribute Value Type ]
5842 * The attribute must have been declared; the value must be of the type
5843 * declared for it.
5844 *
5845 * [25] Eq ::= S? '=' S?
5846 *
5847 * With namespace:
5848 *
5849 * [NS 11] Attribute ::= QName Eq AttValue
5850 *
5851 * Also the case QName == xmlns:??? is handled independently as a namespace
5852 * definition.
5853 *
5854 * Returns the attribute name, and the value in *value.
5855 */
5856
5857xmlChar *
5858xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5859 xmlChar *name, *val;
5860
5861 *value = NULL;
5862 name = xmlParseName(ctxt);
5863 if (name == NULL) {
5864 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5866 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5867 ctxt->wellFormed = 0;
5868 ctxt->disableSAX = 1;
5869 return(NULL);
5870 }
5871
5872 /*
5873 * read the value
5874 */
5875 SKIP_BLANKS;
5876 if (RAW == '=') {
5877 NEXT;
5878 SKIP_BLANKS;
5879 val = xmlParseAttValue(ctxt);
5880 ctxt->instate = XML_PARSER_CONTENT;
5881 } else {
5882 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5884 ctxt->sax->error(ctxt->userData,
5885 "Specification mandate value for attribute %s\n", name);
5886 ctxt->wellFormed = 0;
5887 ctxt->disableSAX = 1;
5888 xmlFree(name);
5889 return(NULL);
5890 }
5891
5892 /*
5893 * Check that xml:lang conforms to the specification
5894 * No more registered as an error, just generate a warning now
5895 * since this was deprecated in XML second edition
5896 */
5897 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5898 if (!xmlCheckLanguageID(val)) {
5899 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5900 ctxt->sax->warning(ctxt->userData,
5901 "Malformed value for xml:lang : %s\n", val);
5902 }
5903 }
5904
5905 /*
5906 * Check that xml:space conforms to the specification
5907 */
5908 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5909 if (xmlStrEqual(val, BAD_CAST "default"))
5910 *(ctxt->space) = 0;
5911 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5912 *(ctxt->space) = 1;
5913 else {
5914 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5916 ctxt->sax->error(ctxt->userData,
5917"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5918 val);
5919 ctxt->wellFormed = 0;
5920 ctxt->disableSAX = 1;
5921 }
5922 }
5923
5924 *value = val;
5925 return(name);
5926}
5927
5928/**
5929 * xmlParseStartTag:
5930 * @ctxt: an XML parser context
5931 *
5932 * parse a start of tag either for rule element or
5933 * EmptyElement. In both case we don't parse the tag closing chars.
5934 *
5935 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5936 *
5937 * [ WFC: Unique Att Spec ]
5938 * No attribute name may appear more than once in the same start-tag or
5939 * empty-element tag.
5940 *
5941 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5942 *
5943 * [ WFC: Unique Att Spec ]
5944 * No attribute name may appear more than once in the same start-tag or
5945 * empty-element tag.
5946 *
5947 * With namespace:
5948 *
5949 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5950 *
5951 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
5952 *
5953 * Returns the element name parsed
5954 */
5955
5956xmlChar *
5957xmlParseStartTag(xmlParserCtxtPtr ctxt) {
5958 xmlChar *name;
5959 xmlChar *attname;
5960 xmlChar *attvalue;
5961 const xmlChar **atts = NULL;
5962 int nbatts = 0;
5963 int maxatts = 0;
5964 int i;
5965
5966 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00005967 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00005968
5969 name = xmlParseName(ctxt);
5970 if (name == NULL) {
5971 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5973 ctxt->sax->error(ctxt->userData,
5974 "xmlParseStartTag: invalid element name\n");
5975 ctxt->wellFormed = 0;
5976 ctxt->disableSAX = 1;
5977 return(NULL);
5978 }
5979
5980 /*
5981 * Now parse the attributes, it ends up with the ending
5982 *
5983 * (S Attribute)* S?
5984 */
5985 SKIP_BLANKS;
5986 GROW;
5987
Daniel Veillard21a0f912001-02-25 19:54:14 +00005988 while ((RAW != '>') &&
5989 ((RAW != '/') || (NXT(1) != '>')) &&
5990 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005991 const xmlChar *q = CUR_PTR;
5992 int cons = ctxt->input->consumed;
5993
5994 attname = xmlParseAttribute(ctxt, &attvalue);
5995 if ((attname != NULL) && (attvalue != NULL)) {
5996 /*
5997 * [ WFC: Unique Att Spec ]
5998 * No attribute name may appear more than once in the same
5999 * start-tag or empty-element tag.
6000 */
6001 for (i = 0; i < nbatts;i += 2) {
6002 if (xmlStrEqual(atts[i], attname)) {
6003 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6005 ctxt->sax->error(ctxt->userData,
6006 "Attribute %s redefined\n",
6007 attname);
6008 ctxt->wellFormed = 0;
6009 ctxt->disableSAX = 1;
6010 xmlFree(attname);
6011 xmlFree(attvalue);
6012 goto failed;
6013 }
6014 }
6015
6016 /*
6017 * Add the pair to atts
6018 */
6019 if (atts == NULL) {
6020 maxatts = 10;
6021 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6022 if (atts == NULL) {
6023 xmlGenericError(xmlGenericErrorContext,
6024 "malloc of %ld byte failed\n",
6025 maxatts * (long)sizeof(xmlChar *));
6026 return(NULL);
6027 }
6028 } else if (nbatts + 4 > maxatts) {
6029 maxatts *= 2;
6030 atts = (const xmlChar **) xmlRealloc((void *) atts,
6031 maxatts * sizeof(xmlChar *));
6032 if (atts == NULL) {
6033 xmlGenericError(xmlGenericErrorContext,
6034 "realloc of %ld byte failed\n",
6035 maxatts * (long)sizeof(xmlChar *));
6036 return(NULL);
6037 }
6038 }
6039 atts[nbatts++] = attname;
6040 atts[nbatts++] = attvalue;
6041 atts[nbatts] = NULL;
6042 atts[nbatts + 1] = NULL;
6043 } else {
6044 if (attname != NULL)
6045 xmlFree(attname);
6046 if (attvalue != NULL)
6047 xmlFree(attvalue);
6048 }
6049
6050failed:
6051
6052 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6053 break;
6054 if (!IS_BLANK(RAW)) {
6055 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6057 ctxt->sax->error(ctxt->userData,
6058 "attributes construct error\n");
6059 ctxt->wellFormed = 0;
6060 ctxt->disableSAX = 1;
6061 }
6062 SKIP_BLANKS;
6063 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6064 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6066 ctxt->sax->error(ctxt->userData,
6067 "xmlParseStartTag: problem parsing attributes\n");
6068 ctxt->wellFormed = 0;
6069 ctxt->disableSAX = 1;
6070 break;
6071 }
6072 GROW;
6073 }
6074
6075 /*
6076 * SAX: Start of Element !
6077 */
6078 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6079 (!ctxt->disableSAX))
6080 ctxt->sax->startElement(ctxt->userData, name, atts);
6081
6082 if (atts != NULL) {
6083 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6084 xmlFree((void *) atts);
6085 }
6086 return(name);
6087}
6088
6089/**
6090 * xmlParseEndTag:
6091 * @ctxt: an XML parser context
6092 *
6093 * parse an end of tag
6094 *
6095 * [42] ETag ::= '</' Name S? '>'
6096 *
6097 * With namespace
6098 *
6099 * [NS 9] ETag ::= '</' QName S? '>'
6100 */
6101
6102void
6103xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6104 xmlChar *name;
6105 xmlChar *oldname;
6106
6107 GROW;
6108 if ((RAW != '<') || (NXT(1) != '/')) {
6109 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6111 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6112 ctxt->wellFormed = 0;
6113 ctxt->disableSAX = 1;
6114 return;
6115 }
6116 SKIP(2);
6117
6118 name = xmlParseName(ctxt);
6119
6120 /*
6121 * We should definitely be at the ending "S? '>'" part
6122 */
6123 GROW;
6124 SKIP_BLANKS;
6125 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6126 ctxt->errNo = XML_ERR_GT_REQUIRED;
6127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6128 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6129 ctxt->wellFormed = 0;
6130 ctxt->disableSAX = 1;
6131 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006132 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006133
6134 /*
6135 * [ WFC: Element Type Match ]
6136 * The Name in an element's end-tag must match the element type in the
6137 * start-tag.
6138 *
6139 */
6140 if ((name == NULL) || (ctxt->name == NULL) ||
6141 (!xmlStrEqual(name, ctxt->name))) {
6142 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6144 if ((name != NULL) && (ctxt->name != NULL)) {
6145 ctxt->sax->error(ctxt->userData,
6146 "Opening and ending tag mismatch: %s and %s\n",
6147 ctxt->name, name);
6148 } else if (ctxt->name != NULL) {
6149 ctxt->sax->error(ctxt->userData,
6150 "Ending tag eror for: %s\n", ctxt->name);
6151 } else {
6152 ctxt->sax->error(ctxt->userData,
6153 "Ending tag error: internal error ???\n");
6154 }
6155
6156 }
6157 ctxt->wellFormed = 0;
6158 ctxt->disableSAX = 1;
6159 }
6160
6161 /*
6162 * SAX: End of Tag
6163 */
6164 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6165 (!ctxt->disableSAX))
6166 ctxt->sax->endElement(ctxt->userData, name);
6167
6168 if (name != NULL)
6169 xmlFree(name);
6170 oldname = namePop(ctxt);
6171 spacePop(ctxt);
6172 if (oldname != NULL) {
6173#ifdef DEBUG_STACK
6174 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6175#endif
6176 xmlFree(oldname);
6177 }
6178 return;
6179}
6180
6181/**
6182 * xmlParseCDSect:
6183 * @ctxt: an XML parser context
6184 *
6185 * Parse escaped pure raw content.
6186 *
6187 * [18] CDSect ::= CDStart CData CDEnd
6188 *
6189 * [19] CDStart ::= '<![CDATA['
6190 *
6191 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6192 *
6193 * [21] CDEnd ::= ']]>'
6194 */
6195void
6196xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6197 xmlChar *buf = NULL;
6198 int len = 0;
6199 int size = XML_PARSER_BUFFER_SIZE;
6200 int r, rl;
6201 int s, sl;
6202 int cur, l;
6203 int count = 0;
6204
6205 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6206 (NXT(2) == '[') && (NXT(3) == 'C') &&
6207 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6208 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6209 (NXT(8) == '[')) {
6210 SKIP(9);
6211 } else
6212 return;
6213
6214 ctxt->instate = XML_PARSER_CDATA_SECTION;
6215 r = CUR_CHAR(rl);
6216 if (!IS_CHAR(r)) {
6217 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6219 ctxt->sax->error(ctxt->userData,
6220 "CData section not finished\n");
6221 ctxt->wellFormed = 0;
6222 ctxt->disableSAX = 1;
6223 ctxt->instate = XML_PARSER_CONTENT;
6224 return;
6225 }
6226 NEXTL(rl);
6227 s = CUR_CHAR(sl);
6228 if (!IS_CHAR(s)) {
6229 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6231 ctxt->sax->error(ctxt->userData,
6232 "CData section not finished\n");
6233 ctxt->wellFormed = 0;
6234 ctxt->disableSAX = 1;
6235 ctxt->instate = XML_PARSER_CONTENT;
6236 return;
6237 }
6238 NEXTL(sl);
6239 cur = CUR_CHAR(l);
6240 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6241 if (buf == NULL) {
6242 xmlGenericError(xmlGenericErrorContext,
6243 "malloc of %d byte failed\n", size);
6244 return;
6245 }
6246 while (IS_CHAR(cur) &&
6247 ((r != ']') || (s != ']') || (cur != '>'))) {
6248 if (len + 5 >= size) {
6249 size *= 2;
6250 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6251 if (buf == NULL) {
6252 xmlGenericError(xmlGenericErrorContext,
6253 "realloc of %d byte failed\n", size);
6254 return;
6255 }
6256 }
6257 COPY_BUF(rl,buf,len,r);
6258 r = s;
6259 rl = sl;
6260 s = cur;
6261 sl = l;
6262 count++;
6263 if (count > 50) {
6264 GROW;
6265 count = 0;
6266 }
6267 NEXTL(l);
6268 cur = CUR_CHAR(l);
6269 }
6270 buf[len] = 0;
6271 ctxt->instate = XML_PARSER_CONTENT;
6272 if (cur != '>') {
6273 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6275 ctxt->sax->error(ctxt->userData,
6276 "CData section not finished\n%.50s\n", buf);
6277 ctxt->wellFormed = 0;
6278 ctxt->disableSAX = 1;
6279 xmlFree(buf);
6280 return;
6281 }
6282 NEXTL(l);
6283
6284 /*
6285 * Ok the buffer is to be consumed as cdata.
6286 */
6287 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6288 if (ctxt->sax->cdataBlock != NULL)
6289 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6290 }
6291 xmlFree(buf);
6292}
6293
6294/**
6295 * xmlParseContent:
6296 * @ctxt: an XML parser context
6297 *
6298 * Parse a content:
6299 *
6300 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6301 */
6302
6303void
6304xmlParseContent(xmlParserCtxtPtr ctxt) {
6305 GROW;
6306 while (((RAW != 0) || (ctxt->token != 0)) &&
6307 ((RAW != '<') || (NXT(1) != '/'))) {
6308 const xmlChar *test = CUR_PTR;
6309 int cons = ctxt->input->consumed;
6310 xmlChar tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006311 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006312
6313 /*
6314 * Handle possible processed charrefs.
6315 */
6316 if (ctxt->token != 0) {
6317 xmlParseCharData(ctxt, 0);
6318 }
6319 /*
6320 * First case : a Processing Instruction.
6321 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006322 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006323 xmlParsePI(ctxt);
6324 }
6325
6326 /*
6327 * Second case : a CDSection
6328 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006329 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006330 (NXT(2) == '[') && (NXT(3) == 'C') &&
6331 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6332 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6333 (NXT(8) == '[')) {
6334 xmlParseCDSect(ctxt);
6335 }
6336
6337 /*
6338 * Third case : a comment
6339 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006340 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006341 (NXT(2) == '-') && (NXT(3) == '-')) {
6342 xmlParseComment(ctxt);
6343 ctxt->instate = XML_PARSER_CONTENT;
6344 }
6345
6346 /*
6347 * Fourth case : a sub-element.
6348 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006349 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006350 xmlParseElement(ctxt);
6351 }
6352
6353 /*
6354 * Fifth case : a reference. If if has not been resolved,
6355 * parsing returns it's Name, create the node
6356 */
6357
Daniel Veillard21a0f912001-02-25 19:54:14 +00006358 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006359 xmlParseReference(ctxt);
6360 }
6361
6362 /*
6363 * Last case, text. Note that References are handled directly.
6364 */
6365 else {
6366 xmlParseCharData(ctxt, 0);
6367 }
6368
6369 GROW;
6370 /*
6371 * Pop-up of finished entities.
6372 */
6373 while ((RAW == 0) && (ctxt->inputNr > 1))
6374 xmlPopInput(ctxt);
6375 SHRINK;
6376
6377 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6378 (tok == ctxt->token)) {
6379 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6381 ctxt->sax->error(ctxt->userData,
6382 "detected an error in element content\n");
6383 ctxt->wellFormed = 0;
6384 ctxt->disableSAX = 1;
6385 ctxt->instate = XML_PARSER_EOF;
6386 break;
6387 }
6388 }
6389}
6390
6391/**
6392 * xmlParseElement:
6393 * @ctxt: an XML parser context
6394 *
6395 * parse an XML element, this is highly recursive
6396 *
6397 * [39] element ::= EmptyElemTag | STag content ETag
6398 *
6399 * [ WFC: Element Type Match ]
6400 * The Name in an element's end-tag must match the element type in the
6401 * start-tag.
6402 *
6403 * [ VC: Element Valid ]
6404 * An element is valid if there is a declaration matching elementdecl
6405 * where the Name matches the element type and one of the following holds:
6406 * - The declaration matches EMPTY and the element has no content.
6407 * - The declaration matches children and the sequence of child elements
6408 * belongs to the language generated by the regular expression in the
6409 * content model, with optional white space (characters matching the
6410 * nonterminal S) between each pair of child elements.
6411 * - The declaration matches Mixed and the content consists of character
6412 * data and child elements whose types match names in the content model.
6413 * - The declaration matches ANY, and the types of any child elements have
6414 * been declared.
6415 */
6416
6417void
6418xmlParseElement(xmlParserCtxtPtr ctxt) {
6419 const xmlChar *openTag = CUR_PTR;
6420 xmlChar *name;
6421 xmlChar *oldname;
6422 xmlParserNodeInfo node_info;
6423 xmlNodePtr ret;
6424
6425 /* Capture start position */
6426 if (ctxt->record_info) {
6427 node_info.begin_pos = ctxt->input->consumed +
6428 (CUR_PTR - ctxt->input->base);
6429 node_info.begin_line = ctxt->input->line;
6430 }
6431
6432 if (ctxt->spaceNr == 0)
6433 spacePush(ctxt, -1);
6434 else
6435 spacePush(ctxt, *ctxt->space);
6436
6437 name = xmlParseStartTag(ctxt);
6438 if (name == NULL) {
6439 spacePop(ctxt);
6440 return;
6441 }
6442 namePush(ctxt, name);
6443 ret = ctxt->node;
6444
6445 /*
6446 * [ VC: Root Element Type ]
6447 * The Name in the document type declaration must match the element
6448 * type of the root element.
6449 */
6450 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6451 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6452 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6453
6454 /*
6455 * Check for an Empty Element.
6456 */
6457 if ((RAW == '/') && (NXT(1) == '>')) {
6458 SKIP(2);
6459 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6460 (!ctxt->disableSAX))
6461 ctxt->sax->endElement(ctxt->userData, name);
6462 oldname = namePop(ctxt);
6463 spacePop(ctxt);
6464 if (oldname != NULL) {
6465#ifdef DEBUG_STACK
6466 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6467#endif
6468 xmlFree(oldname);
6469 }
6470 if ( ret != NULL && ctxt->record_info ) {
6471 node_info.end_pos = ctxt->input->consumed +
6472 (CUR_PTR - ctxt->input->base);
6473 node_info.end_line = ctxt->input->line;
6474 node_info.node = ret;
6475 xmlParserAddNodeInfo(ctxt, &node_info);
6476 }
6477 return;
6478 }
6479 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006480 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006481 } else {
6482 ctxt->errNo = XML_ERR_GT_REQUIRED;
6483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6484 ctxt->sax->error(ctxt->userData,
6485 "Couldn't find end of Start Tag\n%.30s\n",
6486 openTag);
6487 ctxt->wellFormed = 0;
6488 ctxt->disableSAX = 1;
6489
6490 /*
6491 * end of parsing of this node.
6492 */
6493 nodePop(ctxt);
6494 oldname = namePop(ctxt);
6495 spacePop(ctxt);
6496 if (oldname != NULL) {
6497#ifdef DEBUG_STACK
6498 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6499#endif
6500 xmlFree(oldname);
6501 }
6502
6503 /*
6504 * Capture end position and add node
6505 */
6506 if ( ret != NULL && ctxt->record_info ) {
6507 node_info.end_pos = ctxt->input->consumed +
6508 (CUR_PTR - ctxt->input->base);
6509 node_info.end_line = ctxt->input->line;
6510 node_info.node = ret;
6511 xmlParserAddNodeInfo(ctxt, &node_info);
6512 }
6513 return;
6514 }
6515
6516 /*
6517 * Parse the content of the element:
6518 */
6519 xmlParseContent(ctxt);
6520 if (!IS_CHAR(RAW)) {
6521 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6523 ctxt->sax->error(ctxt->userData,
6524 "Premature end of data in tag %.30s\n", openTag);
6525 ctxt->wellFormed = 0;
6526 ctxt->disableSAX = 1;
6527
6528 /*
6529 * end of parsing of this node.
6530 */
6531 nodePop(ctxt);
6532 oldname = namePop(ctxt);
6533 spacePop(ctxt);
6534 if (oldname != NULL) {
6535#ifdef DEBUG_STACK
6536 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6537#endif
6538 xmlFree(oldname);
6539 }
6540 return;
6541 }
6542
6543 /*
6544 * parse the end of tag: '</' should be here.
6545 */
6546 xmlParseEndTag(ctxt);
6547
6548 /*
6549 * Capture end position and add node
6550 */
6551 if ( ret != NULL && ctxt->record_info ) {
6552 node_info.end_pos = ctxt->input->consumed +
6553 (CUR_PTR - ctxt->input->base);
6554 node_info.end_line = ctxt->input->line;
6555 node_info.node = ret;
6556 xmlParserAddNodeInfo(ctxt, &node_info);
6557 }
6558}
6559
6560/**
6561 * xmlParseVersionNum:
6562 * @ctxt: an XML parser context
6563 *
6564 * parse the XML version value.
6565 *
6566 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6567 *
6568 * Returns the string giving the XML version number, or NULL
6569 */
6570xmlChar *
6571xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6572 xmlChar *buf = NULL;
6573 int len = 0;
6574 int size = 10;
6575 xmlChar cur;
6576
6577 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6578 if (buf == NULL) {
6579 xmlGenericError(xmlGenericErrorContext,
6580 "malloc of %d byte failed\n", size);
6581 return(NULL);
6582 }
6583 cur = CUR;
6584 while (((cur >= 'a') && (cur <= 'z')) ||
6585 ((cur >= 'A') && (cur <= 'Z')) ||
6586 ((cur >= '0') && (cur <= '9')) ||
6587 (cur == '_') || (cur == '.') ||
6588 (cur == ':') || (cur == '-')) {
6589 if (len + 1 >= size) {
6590 size *= 2;
6591 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6592 if (buf == NULL) {
6593 xmlGenericError(xmlGenericErrorContext,
6594 "realloc of %d byte failed\n", size);
6595 return(NULL);
6596 }
6597 }
6598 buf[len++] = cur;
6599 NEXT;
6600 cur=CUR;
6601 }
6602 buf[len] = 0;
6603 return(buf);
6604}
6605
6606/**
6607 * xmlParseVersionInfo:
6608 * @ctxt: an XML parser context
6609 *
6610 * parse the XML version.
6611 *
6612 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6613 *
6614 * [25] Eq ::= S? '=' S?
6615 *
6616 * Returns the version string, e.g. "1.0"
6617 */
6618
6619xmlChar *
6620xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6621 xmlChar *version = NULL;
6622 const xmlChar *q;
6623
6624 if ((RAW == 'v') && (NXT(1) == 'e') &&
6625 (NXT(2) == 'r') && (NXT(3) == 's') &&
6626 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6627 (NXT(6) == 'n')) {
6628 SKIP(7);
6629 SKIP_BLANKS;
6630 if (RAW != '=') {
6631 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6633 ctxt->sax->error(ctxt->userData,
6634 "xmlParseVersionInfo : expected '='\n");
6635 ctxt->wellFormed = 0;
6636 ctxt->disableSAX = 1;
6637 return(NULL);
6638 }
6639 NEXT;
6640 SKIP_BLANKS;
6641 if (RAW == '"') {
6642 NEXT;
6643 q = CUR_PTR;
6644 version = xmlParseVersionNum(ctxt);
6645 if (RAW != '"') {
6646 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6648 ctxt->sax->error(ctxt->userData,
6649 "String not closed\n%.50s\n", q);
6650 ctxt->wellFormed = 0;
6651 ctxt->disableSAX = 1;
6652 } else
6653 NEXT;
6654 } else if (RAW == '\''){
6655 NEXT;
6656 q = CUR_PTR;
6657 version = xmlParseVersionNum(ctxt);
6658 if (RAW != '\'') {
6659 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6661 ctxt->sax->error(ctxt->userData,
6662 "String not closed\n%.50s\n", q);
6663 ctxt->wellFormed = 0;
6664 ctxt->disableSAX = 1;
6665 } else
6666 NEXT;
6667 } else {
6668 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6670 ctxt->sax->error(ctxt->userData,
6671 "xmlParseVersionInfo : expected ' or \"\n");
6672 ctxt->wellFormed = 0;
6673 ctxt->disableSAX = 1;
6674 }
6675 }
6676 return(version);
6677}
6678
6679/**
6680 * xmlParseEncName:
6681 * @ctxt: an XML parser context
6682 *
6683 * parse the XML encoding name
6684 *
6685 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6686 *
6687 * Returns the encoding name value or NULL
6688 */
6689xmlChar *
6690xmlParseEncName(xmlParserCtxtPtr ctxt) {
6691 xmlChar *buf = NULL;
6692 int len = 0;
6693 int size = 10;
6694 xmlChar cur;
6695
6696 cur = CUR;
6697 if (((cur >= 'a') && (cur <= 'z')) ||
6698 ((cur >= 'A') && (cur <= 'Z'))) {
6699 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6700 if (buf == NULL) {
6701 xmlGenericError(xmlGenericErrorContext,
6702 "malloc of %d byte failed\n", size);
6703 return(NULL);
6704 }
6705
6706 buf[len++] = cur;
6707 NEXT;
6708 cur = CUR;
6709 while (((cur >= 'a') && (cur <= 'z')) ||
6710 ((cur >= 'A') && (cur <= 'Z')) ||
6711 ((cur >= '0') && (cur <= '9')) ||
6712 (cur == '.') || (cur == '_') ||
6713 (cur == '-')) {
6714 if (len + 1 >= size) {
6715 size *= 2;
6716 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6717 if (buf == NULL) {
6718 xmlGenericError(xmlGenericErrorContext,
6719 "realloc of %d byte failed\n", size);
6720 return(NULL);
6721 }
6722 }
6723 buf[len++] = cur;
6724 NEXT;
6725 cur = CUR;
6726 if (cur == 0) {
6727 SHRINK;
6728 GROW;
6729 cur = CUR;
6730 }
6731 }
6732 buf[len] = 0;
6733 } else {
6734 ctxt->errNo = XML_ERR_ENCODING_NAME;
6735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6736 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6737 ctxt->wellFormed = 0;
6738 ctxt->disableSAX = 1;
6739 }
6740 return(buf);
6741}
6742
6743/**
6744 * xmlParseEncodingDecl:
6745 * @ctxt: an XML parser context
6746 *
6747 * parse the XML encoding declaration
6748 *
6749 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6750 *
6751 * this setups the conversion filters.
6752 *
6753 * Returns the encoding value or NULL
6754 */
6755
6756xmlChar *
6757xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6758 xmlChar *encoding = NULL;
6759 const xmlChar *q;
6760
6761 SKIP_BLANKS;
6762 if ((RAW == 'e') && (NXT(1) == 'n') &&
6763 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6764 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6765 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6766 SKIP(8);
6767 SKIP_BLANKS;
6768 if (RAW != '=') {
6769 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6771 ctxt->sax->error(ctxt->userData,
6772 "xmlParseEncodingDecl : expected '='\n");
6773 ctxt->wellFormed = 0;
6774 ctxt->disableSAX = 1;
6775 return(NULL);
6776 }
6777 NEXT;
6778 SKIP_BLANKS;
6779 if (RAW == '"') {
6780 NEXT;
6781 q = CUR_PTR;
6782 encoding = xmlParseEncName(ctxt);
6783 if (RAW != '"') {
6784 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6786 ctxt->sax->error(ctxt->userData,
6787 "String not closed\n%.50s\n", q);
6788 ctxt->wellFormed = 0;
6789 ctxt->disableSAX = 1;
6790 } else
6791 NEXT;
6792 } else if (RAW == '\''){
6793 NEXT;
6794 q = CUR_PTR;
6795 encoding = xmlParseEncName(ctxt);
6796 if (RAW != '\'') {
6797 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6799 ctxt->sax->error(ctxt->userData,
6800 "String not closed\n%.50s\n", q);
6801 ctxt->wellFormed = 0;
6802 ctxt->disableSAX = 1;
6803 } else
6804 NEXT;
6805 } else if (RAW == '"'){
6806 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6808 ctxt->sax->error(ctxt->userData,
6809 "xmlParseEncodingDecl : expected ' or \"\n");
6810 ctxt->wellFormed = 0;
6811 ctxt->disableSAX = 1;
6812 }
6813 if (encoding != NULL) {
6814 xmlCharEncoding enc;
6815 xmlCharEncodingHandlerPtr handler;
6816
6817 if (ctxt->input->encoding != NULL)
6818 xmlFree((xmlChar *) ctxt->input->encoding);
6819 ctxt->input->encoding = encoding;
6820
6821 enc = xmlParseCharEncoding((const char *) encoding);
6822 /*
6823 * registered set of known encodings
6824 */
6825 if (enc != XML_CHAR_ENCODING_ERROR) {
6826 xmlSwitchEncoding(ctxt, enc);
6827 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6828 xmlFree(encoding);
6829 return(NULL);
6830 }
6831 } else {
6832 /*
6833 * fallback for unknown encodings
6834 */
6835 handler = xmlFindCharEncodingHandler((const char *) encoding);
6836 if (handler != NULL) {
6837 xmlSwitchToEncoding(ctxt, handler);
6838 } else {
6839 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6840 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6841 ctxt->sax->error(ctxt->userData,
6842 "Unsupported encoding %s\n", encoding);
6843 return(NULL);
6844 }
6845 }
6846 }
6847 }
6848 return(encoding);
6849}
6850
6851/**
6852 * xmlParseSDDecl:
6853 * @ctxt: an XML parser context
6854 *
6855 * parse the XML standalone declaration
6856 *
6857 * [32] SDDecl ::= S 'standalone' Eq
6858 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6859 *
6860 * [ VC: Standalone Document Declaration ]
6861 * TODO The standalone document declaration must have the value "no"
6862 * if any external markup declarations contain declarations of:
6863 * - attributes with default values, if elements to which these
6864 * attributes apply appear in the document without specifications
6865 * of values for these attributes, or
6866 * - entities (other than amp, lt, gt, apos, quot), if references
6867 * to those entities appear in the document, or
6868 * - attributes with values subject to normalization, where the
6869 * attribute appears in the document with a value which will change
6870 * as a result of normalization, or
6871 * - element types with element content, if white space occurs directly
6872 * within any instance of those types.
6873 *
6874 * Returns 1 if standalone, 0 otherwise
6875 */
6876
6877int
6878xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6879 int standalone = -1;
6880
6881 SKIP_BLANKS;
6882 if ((RAW == 's') && (NXT(1) == 't') &&
6883 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6884 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6885 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6886 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6887 SKIP(10);
6888 SKIP_BLANKS;
6889 if (RAW != '=') {
6890 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6892 ctxt->sax->error(ctxt->userData,
6893 "XML standalone declaration : expected '='\n");
6894 ctxt->wellFormed = 0;
6895 ctxt->disableSAX = 1;
6896 return(standalone);
6897 }
6898 NEXT;
6899 SKIP_BLANKS;
6900 if (RAW == '\''){
6901 NEXT;
6902 if ((RAW == 'n') && (NXT(1) == 'o')) {
6903 standalone = 0;
6904 SKIP(2);
6905 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6906 (NXT(2) == 's')) {
6907 standalone = 1;
6908 SKIP(3);
6909 } else {
6910 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6912 ctxt->sax->error(ctxt->userData,
6913 "standalone accepts only 'yes' or 'no'\n");
6914 ctxt->wellFormed = 0;
6915 ctxt->disableSAX = 1;
6916 }
6917 if (RAW != '\'') {
6918 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6920 ctxt->sax->error(ctxt->userData, "String not closed\n");
6921 ctxt->wellFormed = 0;
6922 ctxt->disableSAX = 1;
6923 } else
6924 NEXT;
6925 } else if (RAW == '"'){
6926 NEXT;
6927 if ((RAW == 'n') && (NXT(1) == 'o')) {
6928 standalone = 0;
6929 SKIP(2);
6930 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6931 (NXT(2) == 's')) {
6932 standalone = 1;
6933 SKIP(3);
6934 } else {
6935 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6937 ctxt->sax->error(ctxt->userData,
6938 "standalone accepts only 'yes' or 'no'\n");
6939 ctxt->wellFormed = 0;
6940 ctxt->disableSAX = 1;
6941 }
6942 if (RAW != '"') {
6943 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6945 ctxt->sax->error(ctxt->userData, "String not closed\n");
6946 ctxt->wellFormed = 0;
6947 ctxt->disableSAX = 1;
6948 } else
6949 NEXT;
6950 } else {
6951 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6953 ctxt->sax->error(ctxt->userData,
6954 "Standalone value not found\n");
6955 ctxt->wellFormed = 0;
6956 ctxt->disableSAX = 1;
6957 }
6958 }
6959 return(standalone);
6960}
6961
6962/**
6963 * xmlParseXMLDecl:
6964 * @ctxt: an XML parser context
6965 *
6966 * parse an XML declaration header
6967 *
6968 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
6969 */
6970
6971void
6972xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
6973 xmlChar *version;
6974
6975 /*
6976 * We know that '<?xml' is here.
6977 */
6978 SKIP(5);
6979
6980 if (!IS_BLANK(RAW)) {
6981 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6983 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
6984 ctxt->wellFormed = 0;
6985 ctxt->disableSAX = 1;
6986 }
6987 SKIP_BLANKS;
6988
6989 /*
6990 * We should have the VersionInfo here.
6991 */
6992 version = xmlParseVersionInfo(ctxt);
6993 if (version == NULL)
6994 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6995 ctxt->version = xmlStrdup(version);
6996 xmlFree(version);
6997
6998 /*
6999 * We may have the encoding declaration
7000 */
7001 if (!IS_BLANK(RAW)) {
7002 if ((RAW == '?') && (NXT(1) == '>')) {
7003 SKIP(2);
7004 return;
7005 }
7006 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7008 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7009 ctxt->wellFormed = 0;
7010 ctxt->disableSAX = 1;
7011 }
7012 xmlParseEncodingDecl(ctxt);
7013 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7014 /*
7015 * The XML REC instructs us to stop parsing right here
7016 */
7017 return;
7018 }
7019
7020 /*
7021 * We may have the standalone status.
7022 */
7023 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7024 if ((RAW == '?') && (NXT(1) == '>')) {
7025 SKIP(2);
7026 return;
7027 }
7028 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7030 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7031 ctxt->wellFormed = 0;
7032 ctxt->disableSAX = 1;
7033 }
7034 SKIP_BLANKS;
7035 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7036
7037 SKIP_BLANKS;
7038 if ((RAW == '?') && (NXT(1) == '>')) {
7039 SKIP(2);
7040 } else if (RAW == '>') {
7041 /* Deprecated old WD ... */
7042 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7044 ctxt->sax->error(ctxt->userData,
7045 "XML declaration must end-up with '?>'\n");
7046 ctxt->wellFormed = 0;
7047 ctxt->disableSAX = 1;
7048 NEXT;
7049 } else {
7050 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7051 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7052 ctxt->sax->error(ctxt->userData,
7053 "parsing XML declaration: '?>' expected\n");
7054 ctxt->wellFormed = 0;
7055 ctxt->disableSAX = 1;
7056 MOVETO_ENDTAG(CUR_PTR);
7057 NEXT;
7058 }
7059}
7060
7061/**
7062 * xmlParseMisc:
7063 * @ctxt: an XML parser context
7064 *
7065 * parse an XML Misc* optionnal field.
7066 *
7067 * [27] Misc ::= Comment | PI | S
7068 */
7069
7070void
7071xmlParseMisc(xmlParserCtxtPtr ctxt) {
7072 while (((RAW == '<') && (NXT(1) == '?')) ||
7073 ((RAW == '<') && (NXT(1) == '!') &&
7074 (NXT(2) == '-') && (NXT(3) == '-')) ||
7075 IS_BLANK(CUR)) {
7076 if ((RAW == '<') && (NXT(1) == '?')) {
7077 xmlParsePI(ctxt);
7078 } else if (IS_BLANK(CUR)) {
7079 NEXT;
7080 } else
7081 xmlParseComment(ctxt);
7082 }
7083}
7084
7085/**
7086 * xmlParseDocument:
7087 * @ctxt: an XML parser context
7088 *
7089 * parse an XML document (and build a tree if using the standard SAX
7090 * interface).
7091 *
7092 * [1] document ::= prolog element Misc*
7093 *
7094 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7095 *
7096 * Returns 0, -1 in case of error. the parser context is augmented
7097 * as a result of the parsing.
7098 */
7099
7100int
7101xmlParseDocument(xmlParserCtxtPtr ctxt) {
7102 xmlChar start[4];
7103 xmlCharEncoding enc;
7104
7105 xmlInitParser();
7106
7107 GROW;
7108
7109 /*
7110 * SAX: beginning of the document processing.
7111 */
7112 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7113 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7114
7115 /*
7116 * Get the 4 first bytes and decode the charset
7117 * if enc != XML_CHAR_ENCODING_NONE
7118 * plug some encoding conversion routines.
7119 */
7120 start[0] = RAW;
7121 start[1] = NXT(1);
7122 start[2] = NXT(2);
7123 start[3] = NXT(3);
7124 enc = xmlDetectCharEncoding(start, 4);
7125 if (enc != XML_CHAR_ENCODING_NONE) {
7126 xmlSwitchEncoding(ctxt, enc);
7127 }
7128
7129
7130 if (CUR == 0) {
7131 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7133 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7134 ctxt->wellFormed = 0;
7135 ctxt->disableSAX = 1;
7136 }
7137
7138 /*
7139 * Check for the XMLDecl in the Prolog.
7140 */
7141 GROW;
7142 if ((RAW == '<') && (NXT(1) == '?') &&
7143 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7144 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7145
7146 /*
7147 * Note that we will switch encoding on the fly.
7148 */
7149 xmlParseXMLDecl(ctxt);
7150 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7151 /*
7152 * The XML REC instructs us to stop parsing right here
7153 */
7154 return(-1);
7155 }
7156 ctxt->standalone = ctxt->input->standalone;
7157 SKIP_BLANKS;
7158 } else {
7159 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7160 }
7161 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7162 ctxt->sax->startDocument(ctxt->userData);
7163
7164 /*
7165 * The Misc part of the Prolog
7166 */
7167 GROW;
7168 xmlParseMisc(ctxt);
7169
7170 /*
7171 * Then possibly doc type declaration(s) and more Misc
7172 * (doctypedecl Misc*)?
7173 */
7174 GROW;
7175 if ((RAW == '<') && (NXT(1) == '!') &&
7176 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7177 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7178 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7179 (NXT(8) == 'E')) {
7180
7181 ctxt->inSubset = 1;
7182 xmlParseDocTypeDecl(ctxt);
7183 if (RAW == '[') {
7184 ctxt->instate = XML_PARSER_DTD;
7185 xmlParseInternalSubset(ctxt);
7186 }
7187
7188 /*
7189 * Create and update the external subset.
7190 */
7191 ctxt->inSubset = 2;
7192 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7193 (!ctxt->disableSAX))
7194 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7195 ctxt->extSubSystem, ctxt->extSubURI);
7196 ctxt->inSubset = 0;
7197
7198
7199 ctxt->instate = XML_PARSER_PROLOG;
7200 xmlParseMisc(ctxt);
7201 }
7202
7203 /*
7204 * Time to start parsing the tree itself
7205 */
7206 GROW;
7207 if (RAW != '<') {
7208 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7209 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7210 ctxt->sax->error(ctxt->userData,
7211 "Start tag expected, '<' not found\n");
7212 ctxt->wellFormed = 0;
7213 ctxt->disableSAX = 1;
7214 ctxt->instate = XML_PARSER_EOF;
7215 } else {
7216 ctxt->instate = XML_PARSER_CONTENT;
7217 xmlParseElement(ctxt);
7218 ctxt->instate = XML_PARSER_EPILOG;
7219
7220
7221 /*
7222 * The Misc part at the end
7223 */
7224 xmlParseMisc(ctxt);
7225
7226 if (RAW != 0) {
7227 ctxt->errNo = XML_ERR_DOCUMENT_END;
7228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7229 ctxt->sax->error(ctxt->userData,
7230 "Extra content at the end of the document\n");
7231 ctxt->wellFormed = 0;
7232 ctxt->disableSAX = 1;
7233 }
7234 ctxt->instate = XML_PARSER_EOF;
7235 }
7236
7237 /*
7238 * SAX: end of the document processing.
7239 */
7240 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7241 (!ctxt->disableSAX))
7242 ctxt->sax->endDocument(ctxt->userData);
7243
7244 if (! ctxt->wellFormed) return(-1);
7245 return(0);
7246}
7247
7248/**
7249 * xmlParseExtParsedEnt:
7250 * @ctxt: an XML parser context
7251 *
7252 * parse a genreral parsed entity
7253 * An external general parsed entity is well-formed if it matches the
7254 * production labeled extParsedEnt.
7255 *
7256 * [78] extParsedEnt ::= TextDecl? content
7257 *
7258 * Returns 0, -1 in case of error. the parser context is augmented
7259 * as a result of the parsing.
7260 */
7261
7262int
7263xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7264 xmlChar start[4];
7265 xmlCharEncoding enc;
7266
7267 xmlDefaultSAXHandlerInit();
7268
7269 GROW;
7270
7271 /*
7272 * SAX: beginning of the document processing.
7273 */
7274 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7275 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7276
7277 /*
7278 * Get the 4 first bytes and decode the charset
7279 * if enc != XML_CHAR_ENCODING_NONE
7280 * plug some encoding conversion routines.
7281 */
7282 start[0] = RAW;
7283 start[1] = NXT(1);
7284 start[2] = NXT(2);
7285 start[3] = NXT(3);
7286 enc = xmlDetectCharEncoding(start, 4);
7287 if (enc != XML_CHAR_ENCODING_NONE) {
7288 xmlSwitchEncoding(ctxt, enc);
7289 }
7290
7291
7292 if (CUR == 0) {
7293 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7295 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7296 ctxt->wellFormed = 0;
7297 ctxt->disableSAX = 1;
7298 }
7299
7300 /*
7301 * Check for the XMLDecl in the Prolog.
7302 */
7303 GROW;
7304 if ((RAW == '<') && (NXT(1) == '?') &&
7305 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7306 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7307
7308 /*
7309 * Note that we will switch encoding on the fly.
7310 */
7311 xmlParseXMLDecl(ctxt);
7312 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7313 /*
7314 * The XML REC instructs us to stop parsing right here
7315 */
7316 return(-1);
7317 }
7318 SKIP_BLANKS;
7319 } else {
7320 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7321 }
7322 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7323 ctxt->sax->startDocument(ctxt->userData);
7324
7325 /*
7326 * Doing validity checking on chunk doesn't make sense
7327 */
7328 ctxt->instate = XML_PARSER_CONTENT;
7329 ctxt->validate = 0;
7330 ctxt->loadsubset = 0;
7331 ctxt->depth = 0;
7332
7333 xmlParseContent(ctxt);
7334
7335 if ((RAW == '<') && (NXT(1) == '/')) {
7336 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7338 ctxt->sax->error(ctxt->userData,
7339 "chunk is not well balanced\n");
7340 ctxt->wellFormed = 0;
7341 ctxt->disableSAX = 1;
7342 } else if (RAW != 0) {
7343 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7345 ctxt->sax->error(ctxt->userData,
7346 "extra content at the end of well balanced chunk\n");
7347 ctxt->wellFormed = 0;
7348 ctxt->disableSAX = 1;
7349 }
7350
7351 /*
7352 * SAX: end of the document processing.
7353 */
7354 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7355 (!ctxt->disableSAX))
7356 ctxt->sax->endDocument(ctxt->userData);
7357
7358 if (! ctxt->wellFormed) return(-1);
7359 return(0);
7360}
7361
7362/************************************************************************
7363 * *
7364 * Progressive parsing interfaces *
7365 * *
7366 ************************************************************************/
7367
7368/**
7369 * xmlParseLookupSequence:
7370 * @ctxt: an XML parser context
7371 * @first: the first char to lookup
7372 * @next: the next char to lookup or zero
7373 * @third: the next char to lookup or zero
7374 *
7375 * Try to find if a sequence (first, next, third) or just (first next) or
7376 * (first) is available in the input stream.
7377 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7378 * to avoid rescanning sequences of bytes, it DOES change the state of the
7379 * parser, do not use liberally.
7380 *
7381 * Returns the index to the current parsing point if the full sequence
7382 * is available, -1 otherwise.
7383 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007384static int
Owen Taylor3473f882001-02-23 17:55:21 +00007385xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7386 xmlChar next, xmlChar third) {
7387 int base, len;
7388 xmlParserInputPtr in;
7389 const xmlChar *buf;
7390
7391 in = ctxt->input;
7392 if (in == NULL) return(-1);
7393 base = in->cur - in->base;
7394 if (base < 0) return(-1);
7395 if (ctxt->checkIndex > base)
7396 base = ctxt->checkIndex;
7397 if (in->buf == NULL) {
7398 buf = in->base;
7399 len = in->length;
7400 } else {
7401 buf = in->buf->buffer->content;
7402 len = in->buf->buffer->use;
7403 }
7404 /* take into account the sequence length */
7405 if (third) len -= 2;
7406 else if (next) len --;
7407 for (;base < len;base++) {
7408 if (buf[base] == first) {
7409 if (third != 0) {
7410 if ((buf[base + 1] != next) ||
7411 (buf[base + 2] != third)) continue;
7412 } else if (next != 0) {
7413 if (buf[base + 1] != next) continue;
7414 }
7415 ctxt->checkIndex = 0;
7416#ifdef DEBUG_PUSH
7417 if (next == 0)
7418 xmlGenericError(xmlGenericErrorContext,
7419 "PP: lookup '%c' found at %d\n",
7420 first, base);
7421 else if (third == 0)
7422 xmlGenericError(xmlGenericErrorContext,
7423 "PP: lookup '%c%c' found at %d\n",
7424 first, next, base);
7425 else
7426 xmlGenericError(xmlGenericErrorContext,
7427 "PP: lookup '%c%c%c' found at %d\n",
7428 first, next, third, base);
7429#endif
7430 return(base - (in->cur - in->base));
7431 }
7432 }
7433 ctxt->checkIndex = base;
7434#ifdef DEBUG_PUSH
7435 if (next == 0)
7436 xmlGenericError(xmlGenericErrorContext,
7437 "PP: lookup '%c' failed\n", first);
7438 else if (third == 0)
7439 xmlGenericError(xmlGenericErrorContext,
7440 "PP: lookup '%c%c' failed\n", first, next);
7441 else
7442 xmlGenericError(xmlGenericErrorContext,
7443 "PP: lookup '%c%c%c' failed\n", first, next, third);
7444#endif
7445 return(-1);
7446}
7447
7448/**
7449 * xmlParseTryOrFinish:
7450 * @ctxt: an XML parser context
7451 * @terminate: last chunk indicator
7452 *
7453 * Try to progress on parsing
7454 *
7455 * Returns zero if no parsing was possible
7456 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007457static int
Owen Taylor3473f882001-02-23 17:55:21 +00007458xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7459 int ret = 0;
7460 int avail;
7461 xmlChar cur, next;
7462
7463#ifdef DEBUG_PUSH
7464 switch (ctxt->instate) {
7465 case XML_PARSER_EOF:
7466 xmlGenericError(xmlGenericErrorContext,
7467 "PP: try EOF\n"); break;
7468 case XML_PARSER_START:
7469 xmlGenericError(xmlGenericErrorContext,
7470 "PP: try START\n"); break;
7471 case XML_PARSER_MISC:
7472 xmlGenericError(xmlGenericErrorContext,
7473 "PP: try MISC\n");break;
7474 case XML_PARSER_COMMENT:
7475 xmlGenericError(xmlGenericErrorContext,
7476 "PP: try COMMENT\n");break;
7477 case XML_PARSER_PROLOG:
7478 xmlGenericError(xmlGenericErrorContext,
7479 "PP: try PROLOG\n");break;
7480 case XML_PARSER_START_TAG:
7481 xmlGenericError(xmlGenericErrorContext,
7482 "PP: try START_TAG\n");break;
7483 case XML_PARSER_CONTENT:
7484 xmlGenericError(xmlGenericErrorContext,
7485 "PP: try CONTENT\n");break;
7486 case XML_PARSER_CDATA_SECTION:
7487 xmlGenericError(xmlGenericErrorContext,
7488 "PP: try CDATA_SECTION\n");break;
7489 case XML_PARSER_END_TAG:
7490 xmlGenericError(xmlGenericErrorContext,
7491 "PP: try END_TAG\n");break;
7492 case XML_PARSER_ENTITY_DECL:
7493 xmlGenericError(xmlGenericErrorContext,
7494 "PP: try ENTITY_DECL\n");break;
7495 case XML_PARSER_ENTITY_VALUE:
7496 xmlGenericError(xmlGenericErrorContext,
7497 "PP: try ENTITY_VALUE\n");break;
7498 case XML_PARSER_ATTRIBUTE_VALUE:
7499 xmlGenericError(xmlGenericErrorContext,
7500 "PP: try ATTRIBUTE_VALUE\n");break;
7501 case XML_PARSER_DTD:
7502 xmlGenericError(xmlGenericErrorContext,
7503 "PP: try DTD\n");break;
7504 case XML_PARSER_EPILOG:
7505 xmlGenericError(xmlGenericErrorContext,
7506 "PP: try EPILOG\n");break;
7507 case XML_PARSER_PI:
7508 xmlGenericError(xmlGenericErrorContext,
7509 "PP: try PI\n");break;
7510 case XML_PARSER_IGNORE:
7511 xmlGenericError(xmlGenericErrorContext,
7512 "PP: try IGNORE\n");break;
7513 }
7514#endif
7515
7516 while (1) {
7517 /*
7518 * Pop-up of finished entities.
7519 */
7520 while ((RAW == 0) && (ctxt->inputNr > 1))
7521 xmlPopInput(ctxt);
7522
7523 if (ctxt->input ==NULL) break;
7524 if (ctxt->input->buf == NULL)
7525 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7526 else
7527 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7528 if (avail < 1)
7529 goto done;
7530 switch (ctxt->instate) {
7531 case XML_PARSER_EOF:
7532 /*
7533 * Document parsing is done !
7534 */
7535 goto done;
7536 case XML_PARSER_START:
7537 /*
7538 * Very first chars read from the document flow.
7539 */
Owen Taylor3473f882001-02-23 17:55:21 +00007540 if (avail < 2)
7541 goto done;
7542
7543 cur = ctxt->input->cur[0];
7544 next = ctxt->input->cur[1];
7545 if (cur == 0) {
7546 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7547 ctxt->sax->setDocumentLocator(ctxt->userData,
7548 &xmlDefaultSAXLocator);
7549 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7551 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7552 ctxt->wellFormed = 0;
7553 ctxt->disableSAX = 1;
7554 ctxt->instate = XML_PARSER_EOF;
7555#ifdef DEBUG_PUSH
7556 xmlGenericError(xmlGenericErrorContext,
7557 "PP: entering EOF\n");
7558#endif
7559 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7560 ctxt->sax->endDocument(ctxt->userData);
7561 goto done;
7562 }
7563 if ((cur == '<') && (next == '?')) {
7564 /* PI or XML decl */
7565 if (avail < 5) return(ret);
7566 if ((!terminate) &&
7567 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7568 return(ret);
7569 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7570 ctxt->sax->setDocumentLocator(ctxt->userData,
7571 &xmlDefaultSAXLocator);
7572 if ((ctxt->input->cur[2] == 'x') &&
7573 (ctxt->input->cur[3] == 'm') &&
7574 (ctxt->input->cur[4] == 'l') &&
7575 (IS_BLANK(ctxt->input->cur[5]))) {
7576 ret += 5;
7577#ifdef DEBUG_PUSH
7578 xmlGenericError(xmlGenericErrorContext,
7579 "PP: Parsing XML Decl\n");
7580#endif
7581 xmlParseXMLDecl(ctxt);
7582 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7583 /*
7584 * The XML REC instructs us to stop parsing right
7585 * here
7586 */
7587 ctxt->instate = XML_PARSER_EOF;
7588 return(0);
7589 }
7590 ctxt->standalone = ctxt->input->standalone;
7591 if ((ctxt->encoding == NULL) &&
7592 (ctxt->input->encoding != NULL))
7593 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7594 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7595 (!ctxt->disableSAX))
7596 ctxt->sax->startDocument(ctxt->userData);
7597 ctxt->instate = XML_PARSER_MISC;
7598#ifdef DEBUG_PUSH
7599 xmlGenericError(xmlGenericErrorContext,
7600 "PP: entering MISC\n");
7601#endif
7602 } else {
7603 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7604 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7605 (!ctxt->disableSAX))
7606 ctxt->sax->startDocument(ctxt->userData);
7607 ctxt->instate = XML_PARSER_MISC;
7608#ifdef DEBUG_PUSH
7609 xmlGenericError(xmlGenericErrorContext,
7610 "PP: entering MISC\n");
7611#endif
7612 }
7613 } else {
7614 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7615 ctxt->sax->setDocumentLocator(ctxt->userData,
7616 &xmlDefaultSAXLocator);
7617 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7618 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7619 (!ctxt->disableSAX))
7620 ctxt->sax->startDocument(ctxt->userData);
7621 ctxt->instate = XML_PARSER_MISC;
7622#ifdef DEBUG_PUSH
7623 xmlGenericError(xmlGenericErrorContext,
7624 "PP: entering MISC\n");
7625#endif
7626 }
7627 break;
7628 case XML_PARSER_MISC:
7629 SKIP_BLANKS;
7630 if (ctxt->input->buf == NULL)
7631 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7632 else
7633 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7634 if (avail < 2)
7635 goto done;
7636 cur = ctxt->input->cur[0];
7637 next = ctxt->input->cur[1];
7638 if ((cur == '<') && (next == '?')) {
7639 if ((!terminate) &&
7640 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7641 goto done;
7642#ifdef DEBUG_PUSH
7643 xmlGenericError(xmlGenericErrorContext,
7644 "PP: Parsing PI\n");
7645#endif
7646 xmlParsePI(ctxt);
7647 } else if ((cur == '<') && (next == '!') &&
7648 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7649 if ((!terminate) &&
7650 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7651 goto done;
7652#ifdef DEBUG_PUSH
7653 xmlGenericError(xmlGenericErrorContext,
7654 "PP: Parsing Comment\n");
7655#endif
7656 xmlParseComment(ctxt);
7657 ctxt->instate = XML_PARSER_MISC;
7658 } else if ((cur == '<') && (next == '!') &&
7659 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7660 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7661 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7662 (ctxt->input->cur[8] == 'E')) {
7663 if ((!terminate) &&
7664 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7665 goto done;
7666#ifdef DEBUG_PUSH
7667 xmlGenericError(xmlGenericErrorContext,
7668 "PP: Parsing internal subset\n");
7669#endif
7670 ctxt->inSubset = 1;
7671 xmlParseDocTypeDecl(ctxt);
7672 if (RAW == '[') {
7673 ctxt->instate = XML_PARSER_DTD;
7674#ifdef DEBUG_PUSH
7675 xmlGenericError(xmlGenericErrorContext,
7676 "PP: entering DTD\n");
7677#endif
7678 } else {
7679 /*
7680 * Create and update the external subset.
7681 */
7682 ctxt->inSubset = 2;
7683 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7684 (ctxt->sax->externalSubset != NULL))
7685 ctxt->sax->externalSubset(ctxt->userData,
7686 ctxt->intSubName, ctxt->extSubSystem,
7687 ctxt->extSubURI);
7688 ctxt->inSubset = 0;
7689 ctxt->instate = XML_PARSER_PROLOG;
7690#ifdef DEBUG_PUSH
7691 xmlGenericError(xmlGenericErrorContext,
7692 "PP: entering PROLOG\n");
7693#endif
7694 }
7695 } else if ((cur == '<') && (next == '!') &&
7696 (avail < 9)) {
7697 goto done;
7698 } else {
7699 ctxt->instate = XML_PARSER_START_TAG;
7700#ifdef DEBUG_PUSH
7701 xmlGenericError(xmlGenericErrorContext,
7702 "PP: entering START_TAG\n");
7703#endif
7704 }
7705 break;
7706 case XML_PARSER_IGNORE:
7707 xmlGenericError(xmlGenericErrorContext,
7708 "PP: internal error, state == IGNORE");
7709 ctxt->instate = XML_PARSER_DTD;
7710#ifdef DEBUG_PUSH
7711 xmlGenericError(xmlGenericErrorContext,
7712 "PP: entering DTD\n");
7713#endif
7714 break;
7715 case XML_PARSER_PROLOG:
7716 SKIP_BLANKS;
7717 if (ctxt->input->buf == NULL)
7718 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7719 else
7720 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7721 if (avail < 2)
7722 goto done;
7723 cur = ctxt->input->cur[0];
7724 next = ctxt->input->cur[1];
7725 if ((cur == '<') && (next == '?')) {
7726 if ((!terminate) &&
7727 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7728 goto done;
7729#ifdef DEBUG_PUSH
7730 xmlGenericError(xmlGenericErrorContext,
7731 "PP: Parsing PI\n");
7732#endif
7733 xmlParsePI(ctxt);
7734 } else if ((cur == '<') && (next == '!') &&
7735 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7736 if ((!terminate) &&
7737 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7738 goto done;
7739#ifdef DEBUG_PUSH
7740 xmlGenericError(xmlGenericErrorContext,
7741 "PP: Parsing Comment\n");
7742#endif
7743 xmlParseComment(ctxt);
7744 ctxt->instate = XML_PARSER_PROLOG;
7745 } else if ((cur == '<') && (next == '!') &&
7746 (avail < 4)) {
7747 goto done;
7748 } else {
7749 ctxt->instate = XML_PARSER_START_TAG;
7750#ifdef DEBUG_PUSH
7751 xmlGenericError(xmlGenericErrorContext,
7752 "PP: entering START_TAG\n");
7753#endif
7754 }
7755 break;
7756 case XML_PARSER_EPILOG:
7757 SKIP_BLANKS;
7758 if (ctxt->input->buf == NULL)
7759 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7760 else
7761 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7762 if (avail < 2)
7763 goto done;
7764 cur = ctxt->input->cur[0];
7765 next = ctxt->input->cur[1];
7766 if ((cur == '<') && (next == '?')) {
7767 if ((!terminate) &&
7768 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7769 goto done;
7770#ifdef DEBUG_PUSH
7771 xmlGenericError(xmlGenericErrorContext,
7772 "PP: Parsing PI\n");
7773#endif
7774 xmlParsePI(ctxt);
7775 ctxt->instate = XML_PARSER_EPILOG;
7776 } else if ((cur == '<') && (next == '!') &&
7777 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7778 if ((!terminate) &&
7779 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7780 goto done;
7781#ifdef DEBUG_PUSH
7782 xmlGenericError(xmlGenericErrorContext,
7783 "PP: Parsing Comment\n");
7784#endif
7785 xmlParseComment(ctxt);
7786 ctxt->instate = XML_PARSER_EPILOG;
7787 } else if ((cur == '<') && (next == '!') &&
7788 (avail < 4)) {
7789 goto done;
7790 } else {
7791 ctxt->errNo = XML_ERR_DOCUMENT_END;
7792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7793 ctxt->sax->error(ctxt->userData,
7794 "Extra content at the end of the document\n");
7795 ctxt->wellFormed = 0;
7796 ctxt->disableSAX = 1;
7797 ctxt->instate = XML_PARSER_EOF;
7798#ifdef DEBUG_PUSH
7799 xmlGenericError(xmlGenericErrorContext,
7800 "PP: entering EOF\n");
7801#endif
7802 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7803 (!ctxt->disableSAX))
7804 ctxt->sax->endDocument(ctxt->userData);
7805 goto done;
7806 }
7807 break;
7808 case XML_PARSER_START_TAG: {
7809 xmlChar *name, *oldname;
7810
7811 if ((avail < 2) && (ctxt->inputNr == 1))
7812 goto done;
7813 cur = ctxt->input->cur[0];
7814 if (cur != '<') {
7815 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7817 ctxt->sax->error(ctxt->userData,
7818 "Start tag expect, '<' not found\n");
7819 ctxt->wellFormed = 0;
7820 ctxt->disableSAX = 1;
7821 ctxt->instate = XML_PARSER_EOF;
7822#ifdef DEBUG_PUSH
7823 xmlGenericError(xmlGenericErrorContext,
7824 "PP: entering EOF\n");
7825#endif
7826 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7827 (!ctxt->disableSAX))
7828 ctxt->sax->endDocument(ctxt->userData);
7829 goto done;
7830 }
7831 if ((!terminate) &&
7832 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7833 goto done;
7834 if (ctxt->spaceNr == 0)
7835 spacePush(ctxt, -1);
7836 else
7837 spacePush(ctxt, *ctxt->space);
7838 name = xmlParseStartTag(ctxt);
7839 if (name == NULL) {
7840 spacePop(ctxt);
7841 ctxt->instate = XML_PARSER_EOF;
7842#ifdef DEBUG_PUSH
7843 xmlGenericError(xmlGenericErrorContext,
7844 "PP: entering EOF\n");
7845#endif
7846 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7847 (!ctxt->disableSAX))
7848 ctxt->sax->endDocument(ctxt->userData);
7849 goto done;
7850 }
7851 namePush(ctxt, xmlStrdup(name));
7852
7853 /*
7854 * [ VC: Root Element Type ]
7855 * The Name in the document type declaration must match
7856 * the element type of the root element.
7857 */
7858 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7859 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7860 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7861
7862 /*
7863 * Check for an Empty Element.
7864 */
7865 if ((RAW == '/') && (NXT(1) == '>')) {
7866 SKIP(2);
7867 if ((ctxt->sax != NULL) &&
7868 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7869 ctxt->sax->endElement(ctxt->userData, name);
7870 xmlFree(name);
7871 oldname = namePop(ctxt);
7872 spacePop(ctxt);
7873 if (oldname != NULL) {
7874#ifdef DEBUG_STACK
7875 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7876#endif
7877 xmlFree(oldname);
7878 }
7879 if (ctxt->name == NULL) {
7880 ctxt->instate = XML_PARSER_EPILOG;
7881#ifdef DEBUG_PUSH
7882 xmlGenericError(xmlGenericErrorContext,
7883 "PP: entering EPILOG\n");
7884#endif
7885 } else {
7886 ctxt->instate = XML_PARSER_CONTENT;
7887#ifdef DEBUG_PUSH
7888 xmlGenericError(xmlGenericErrorContext,
7889 "PP: entering CONTENT\n");
7890#endif
7891 }
7892 break;
7893 }
7894 if (RAW == '>') {
7895 NEXT;
7896 } else {
7897 ctxt->errNo = XML_ERR_GT_REQUIRED;
7898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7899 ctxt->sax->error(ctxt->userData,
7900 "Couldn't find end of Start Tag %s\n",
7901 name);
7902 ctxt->wellFormed = 0;
7903 ctxt->disableSAX = 1;
7904
7905 /*
7906 * end of parsing of this node.
7907 */
7908 nodePop(ctxt);
7909 oldname = namePop(ctxt);
7910 spacePop(ctxt);
7911 if (oldname != NULL) {
7912#ifdef DEBUG_STACK
7913 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7914#endif
7915 xmlFree(oldname);
7916 }
7917 }
7918 xmlFree(name);
7919 ctxt->instate = XML_PARSER_CONTENT;
7920#ifdef DEBUG_PUSH
7921 xmlGenericError(xmlGenericErrorContext,
7922 "PP: entering CONTENT\n");
7923#endif
7924 break;
7925 }
7926 case XML_PARSER_CONTENT: {
7927 const xmlChar *test;
7928 int cons;
7929 xmlChar tok;
7930
7931 /*
7932 * Handle preparsed entities and charRef
7933 */
7934 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007935 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00007936
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007937 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00007938 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7939 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007940 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00007941 ctxt->token = 0;
7942 }
7943 if ((avail < 2) && (ctxt->inputNr == 1))
7944 goto done;
7945 cur = ctxt->input->cur[0];
7946 next = ctxt->input->cur[1];
7947
7948 test = CUR_PTR;
7949 cons = ctxt->input->consumed;
7950 tok = ctxt->token;
7951 if ((cur == '<') && (next == '?')) {
7952 if ((!terminate) &&
7953 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7954 goto done;
7955#ifdef DEBUG_PUSH
7956 xmlGenericError(xmlGenericErrorContext,
7957 "PP: Parsing PI\n");
7958#endif
7959 xmlParsePI(ctxt);
7960 } else if ((cur == '<') && (next == '!') &&
7961 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7962 if ((!terminate) &&
7963 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7964 goto done;
7965#ifdef DEBUG_PUSH
7966 xmlGenericError(xmlGenericErrorContext,
7967 "PP: Parsing Comment\n");
7968#endif
7969 xmlParseComment(ctxt);
7970 ctxt->instate = XML_PARSER_CONTENT;
7971 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7972 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7973 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7974 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7975 (ctxt->input->cur[8] == '[')) {
7976 SKIP(9);
7977 ctxt->instate = XML_PARSER_CDATA_SECTION;
7978#ifdef DEBUG_PUSH
7979 xmlGenericError(xmlGenericErrorContext,
7980 "PP: entering CDATA_SECTION\n");
7981#endif
7982 break;
7983 } else if ((cur == '<') && (next == '!') &&
7984 (avail < 9)) {
7985 goto done;
7986 } else if ((cur == '<') && (next == '/')) {
7987 ctxt->instate = XML_PARSER_END_TAG;
7988#ifdef DEBUG_PUSH
7989 xmlGenericError(xmlGenericErrorContext,
7990 "PP: entering END_TAG\n");
7991#endif
7992 break;
7993 } else if (cur == '<') {
7994 ctxt->instate = XML_PARSER_START_TAG;
7995#ifdef DEBUG_PUSH
7996 xmlGenericError(xmlGenericErrorContext,
7997 "PP: entering START_TAG\n");
7998#endif
7999 break;
8000 } else if (cur == '&') {
8001 if ((!terminate) &&
8002 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8003 goto done;
8004#ifdef DEBUG_PUSH
8005 xmlGenericError(xmlGenericErrorContext,
8006 "PP: Parsing Reference\n");
8007#endif
8008 xmlParseReference(ctxt);
8009 } else {
8010 /* TODO Avoid the extra copy, handle directly !!! */
8011 /*
8012 * Goal of the following test is:
8013 * - minimize calls to the SAX 'character' callback
8014 * when they are mergeable
8015 * - handle an problem for isBlank when we only parse
8016 * a sequence of blank chars and the next one is
8017 * not available to check against '<' presence.
8018 * - tries to homogenize the differences in SAX
8019 * callbacks beween the push and pull versions
8020 * of the parser.
8021 */
8022 if ((ctxt->inputNr == 1) &&
8023 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8024 if ((!terminate) &&
8025 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8026 goto done;
8027 }
8028 ctxt->checkIndex = 0;
8029#ifdef DEBUG_PUSH
8030 xmlGenericError(xmlGenericErrorContext,
8031 "PP: Parsing char data\n");
8032#endif
8033 xmlParseCharData(ctxt, 0);
8034 }
8035 /*
8036 * Pop-up of finished entities.
8037 */
8038 while ((RAW == 0) && (ctxt->inputNr > 1))
8039 xmlPopInput(ctxt);
8040 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8041 (tok == ctxt->token)) {
8042 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8044 ctxt->sax->error(ctxt->userData,
8045 "detected an error in element content\n");
8046 ctxt->wellFormed = 0;
8047 ctxt->disableSAX = 1;
8048 ctxt->instate = XML_PARSER_EOF;
8049 break;
8050 }
8051 break;
8052 }
8053 case XML_PARSER_CDATA_SECTION: {
8054 /*
8055 * The Push mode need to have the SAX callback for
8056 * cdataBlock merge back contiguous callbacks.
8057 */
8058 int base;
8059
8060 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8061 if (base < 0) {
8062 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8063 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8064 if (ctxt->sax->cdataBlock != NULL)
8065 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8066 XML_PARSER_BIG_BUFFER_SIZE);
8067 }
8068 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8069 ctxt->checkIndex = 0;
8070 }
8071 goto done;
8072 } else {
8073 if ((ctxt->sax != NULL) && (base > 0) &&
8074 (!ctxt->disableSAX)) {
8075 if (ctxt->sax->cdataBlock != NULL)
8076 ctxt->sax->cdataBlock(ctxt->userData,
8077 ctxt->input->cur, base);
8078 }
8079 SKIP(base + 3);
8080 ctxt->checkIndex = 0;
8081 ctxt->instate = XML_PARSER_CONTENT;
8082#ifdef DEBUG_PUSH
8083 xmlGenericError(xmlGenericErrorContext,
8084 "PP: entering CONTENT\n");
8085#endif
8086 }
8087 break;
8088 }
8089 case XML_PARSER_END_TAG:
8090 if (avail < 2)
8091 goto done;
8092 if ((!terminate) &&
8093 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8094 goto done;
8095 xmlParseEndTag(ctxt);
8096 if (ctxt->name == NULL) {
8097 ctxt->instate = XML_PARSER_EPILOG;
8098#ifdef DEBUG_PUSH
8099 xmlGenericError(xmlGenericErrorContext,
8100 "PP: entering EPILOG\n");
8101#endif
8102 } else {
8103 ctxt->instate = XML_PARSER_CONTENT;
8104#ifdef DEBUG_PUSH
8105 xmlGenericError(xmlGenericErrorContext,
8106 "PP: entering CONTENT\n");
8107#endif
8108 }
8109 break;
8110 case XML_PARSER_DTD: {
8111 /*
8112 * Sorry but progressive parsing of the internal subset
8113 * is not expected to be supported. We first check that
8114 * the full content of the internal subset is available and
8115 * the parsing is launched only at that point.
8116 * Internal subset ends up with "']' S? '>'" in an unescaped
8117 * section and not in a ']]>' sequence which are conditional
8118 * sections (whoever argued to keep that crap in XML deserve
8119 * a place in hell !).
8120 */
8121 int base, i;
8122 xmlChar *buf;
8123 xmlChar quote = 0;
8124
8125 base = ctxt->input->cur - ctxt->input->base;
8126 if (base < 0) return(0);
8127 if (ctxt->checkIndex > base)
8128 base = ctxt->checkIndex;
8129 buf = ctxt->input->buf->buffer->content;
8130 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8131 base++) {
8132 if (quote != 0) {
8133 if (buf[base] == quote)
8134 quote = 0;
8135 continue;
8136 }
8137 if (buf[base] == '"') {
8138 quote = '"';
8139 continue;
8140 }
8141 if (buf[base] == '\'') {
8142 quote = '\'';
8143 continue;
8144 }
8145 if (buf[base] == ']') {
8146 if ((unsigned int) base +1 >=
8147 ctxt->input->buf->buffer->use)
8148 break;
8149 if (buf[base + 1] == ']') {
8150 /* conditional crap, skip both ']' ! */
8151 base++;
8152 continue;
8153 }
8154 for (i = 0;
8155 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8156 i++) {
8157 if (buf[base + i] == '>')
8158 goto found_end_int_subset;
8159 }
8160 break;
8161 }
8162 }
8163 /*
8164 * We didn't found the end of the Internal subset
8165 */
8166 if (quote == 0)
8167 ctxt->checkIndex = base;
8168#ifdef DEBUG_PUSH
8169 if (next == 0)
8170 xmlGenericError(xmlGenericErrorContext,
8171 "PP: lookup of int subset end filed\n");
8172#endif
8173 goto done;
8174
8175found_end_int_subset:
8176 xmlParseInternalSubset(ctxt);
8177 ctxt->inSubset = 2;
8178 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8179 (ctxt->sax->externalSubset != NULL))
8180 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8181 ctxt->extSubSystem, ctxt->extSubURI);
8182 ctxt->inSubset = 0;
8183 ctxt->instate = XML_PARSER_PROLOG;
8184 ctxt->checkIndex = 0;
8185#ifdef DEBUG_PUSH
8186 xmlGenericError(xmlGenericErrorContext,
8187 "PP: entering PROLOG\n");
8188#endif
8189 break;
8190 }
8191 case XML_PARSER_COMMENT:
8192 xmlGenericError(xmlGenericErrorContext,
8193 "PP: internal error, state == COMMENT\n");
8194 ctxt->instate = XML_PARSER_CONTENT;
8195#ifdef DEBUG_PUSH
8196 xmlGenericError(xmlGenericErrorContext,
8197 "PP: entering CONTENT\n");
8198#endif
8199 break;
8200 case XML_PARSER_PI:
8201 xmlGenericError(xmlGenericErrorContext,
8202 "PP: internal error, state == PI\n");
8203 ctxt->instate = XML_PARSER_CONTENT;
8204#ifdef DEBUG_PUSH
8205 xmlGenericError(xmlGenericErrorContext,
8206 "PP: entering CONTENT\n");
8207#endif
8208 break;
8209 case XML_PARSER_ENTITY_DECL:
8210 xmlGenericError(xmlGenericErrorContext,
8211 "PP: internal error, state == ENTITY_DECL\n");
8212 ctxt->instate = XML_PARSER_DTD;
8213#ifdef DEBUG_PUSH
8214 xmlGenericError(xmlGenericErrorContext,
8215 "PP: entering DTD\n");
8216#endif
8217 break;
8218 case XML_PARSER_ENTITY_VALUE:
8219 xmlGenericError(xmlGenericErrorContext,
8220 "PP: internal error, state == ENTITY_VALUE\n");
8221 ctxt->instate = XML_PARSER_CONTENT;
8222#ifdef DEBUG_PUSH
8223 xmlGenericError(xmlGenericErrorContext,
8224 "PP: entering DTD\n");
8225#endif
8226 break;
8227 case XML_PARSER_ATTRIBUTE_VALUE:
8228 xmlGenericError(xmlGenericErrorContext,
8229 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8230 ctxt->instate = XML_PARSER_START_TAG;
8231#ifdef DEBUG_PUSH
8232 xmlGenericError(xmlGenericErrorContext,
8233 "PP: entering START_TAG\n");
8234#endif
8235 break;
8236 case XML_PARSER_SYSTEM_LITERAL:
8237 xmlGenericError(xmlGenericErrorContext,
8238 "PP: internal error, state == SYSTEM_LITERAL\n");
8239 ctxt->instate = XML_PARSER_START_TAG;
8240#ifdef DEBUG_PUSH
8241 xmlGenericError(xmlGenericErrorContext,
8242 "PP: entering START_TAG\n");
8243#endif
8244 break;
8245 }
8246 }
8247done:
8248#ifdef DEBUG_PUSH
8249 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8250#endif
8251 return(ret);
8252}
8253
8254/**
Owen Taylor3473f882001-02-23 17:55:21 +00008255 * xmlParseChunk:
8256 * @ctxt: an XML parser context
8257 * @chunk: an char array
8258 * @size: the size in byte of the chunk
8259 * @terminate: last chunk indicator
8260 *
8261 * Parse a Chunk of memory
8262 *
8263 * Returns zero if no error, the xmlParserErrors otherwise.
8264 */
8265int
8266xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8267 int terminate) {
8268 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8269 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8270 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8271 int cur = ctxt->input->cur - ctxt->input->base;
8272
8273 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8274 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8275 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008276 ctxt->input->end =
8277 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008278#ifdef DEBUG_PUSH
8279 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8280#endif
8281
8282 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8283 xmlParseTryOrFinish(ctxt, terminate);
8284 } else if (ctxt->instate != XML_PARSER_EOF) {
8285 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8286 xmlParserInputBufferPtr in = ctxt->input->buf;
8287 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8288 (in->raw != NULL)) {
8289 int nbchars;
8290
8291 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8292 if (nbchars < 0) {
8293 xmlGenericError(xmlGenericErrorContext,
8294 "xmlParseChunk: encoder error\n");
8295 return(XML_ERR_INVALID_ENCODING);
8296 }
8297 }
8298 }
8299 }
8300 xmlParseTryOrFinish(ctxt, terminate);
8301 if (terminate) {
8302 /*
8303 * Check for termination
8304 */
8305 if ((ctxt->instate != XML_PARSER_EOF) &&
8306 (ctxt->instate != XML_PARSER_EPILOG)) {
8307 ctxt->errNo = XML_ERR_DOCUMENT_END;
8308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8309 ctxt->sax->error(ctxt->userData,
8310 "Extra content at the end of the document\n");
8311 ctxt->wellFormed = 0;
8312 ctxt->disableSAX = 1;
8313 }
8314 if (ctxt->instate != XML_PARSER_EOF) {
8315 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8316 (!ctxt->disableSAX))
8317 ctxt->sax->endDocument(ctxt->userData);
8318 }
8319 ctxt->instate = XML_PARSER_EOF;
8320 }
8321 return((xmlParserErrors) ctxt->errNo);
8322}
8323
8324/************************************************************************
8325 * *
8326 * I/O front end functions to the parser *
8327 * *
8328 ************************************************************************/
8329
8330/**
8331 * xmlStopParser:
8332 * @ctxt: an XML parser context
8333 *
8334 * Blocks further parser processing
8335 */
8336void
8337xmlStopParser(xmlParserCtxtPtr ctxt) {
8338 ctxt->instate = XML_PARSER_EOF;
8339 if (ctxt->input != NULL)
8340 ctxt->input->cur = BAD_CAST"";
8341}
8342
8343/**
8344 * xmlCreatePushParserCtxt:
8345 * @sax: a SAX handler
8346 * @user_data: The user data returned on SAX callbacks
8347 * @chunk: a pointer to an array of chars
8348 * @size: number of chars in the array
8349 * @filename: an optional file name or URI
8350 *
8351 * Create a parser context for using the XML parser in push mode
8352 * To allow content encoding detection, @size should be >= 4
8353 * The value of @filename is used for fetching external entities
8354 * and error/warning reports.
8355 *
8356 * Returns the new parser context or NULL
8357 */
8358xmlParserCtxtPtr
8359xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8360 const char *chunk, int size, const char *filename) {
8361 xmlParserCtxtPtr ctxt;
8362 xmlParserInputPtr inputStream;
8363 xmlParserInputBufferPtr buf;
8364 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8365
8366 /*
8367 * plug some encoding conversion routines
8368 */
8369 if ((chunk != NULL) && (size >= 4))
8370 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8371
8372 buf = xmlAllocParserInputBuffer(enc);
8373 if (buf == NULL) return(NULL);
8374
8375 ctxt = xmlNewParserCtxt();
8376 if (ctxt == NULL) {
8377 xmlFree(buf);
8378 return(NULL);
8379 }
8380 if (sax != NULL) {
8381 if (ctxt->sax != &xmlDefaultSAXHandler)
8382 xmlFree(ctxt->sax);
8383 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8384 if (ctxt->sax == NULL) {
8385 xmlFree(buf);
8386 xmlFree(ctxt);
8387 return(NULL);
8388 }
8389 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8390 if (user_data != NULL)
8391 ctxt->userData = user_data;
8392 }
8393 if (filename == NULL) {
8394 ctxt->directory = NULL;
8395 } else {
8396 ctxt->directory = xmlParserGetDirectory(filename);
8397 }
8398
8399 inputStream = xmlNewInputStream(ctxt);
8400 if (inputStream == NULL) {
8401 xmlFreeParserCtxt(ctxt);
8402 return(NULL);
8403 }
8404
8405 if (filename == NULL)
8406 inputStream->filename = NULL;
8407 else
8408 inputStream->filename = xmlMemStrdup(filename);
8409 inputStream->buf = buf;
8410 inputStream->base = inputStream->buf->buffer->content;
8411 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008412 inputStream->end =
8413 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008414 if (enc != XML_CHAR_ENCODING_NONE) {
8415 xmlSwitchEncoding(ctxt, enc);
8416 }
8417
8418 inputPush(ctxt, inputStream);
8419
8420 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8421 (ctxt->input->buf != NULL)) {
8422 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8423#ifdef DEBUG_PUSH
8424 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8425#endif
8426 }
8427
8428 return(ctxt);
8429}
8430
8431/**
8432 * xmlCreateIOParserCtxt:
8433 * @sax: a SAX handler
8434 * @user_data: The user data returned on SAX callbacks
8435 * @ioread: an I/O read function
8436 * @ioclose: an I/O close function
8437 * @ioctx: an I/O handler
8438 * @enc: the charset encoding if known
8439 *
8440 * Create a parser context for using the XML parser with an existing
8441 * I/O stream
8442 *
8443 * Returns the new parser context or NULL
8444 */
8445xmlParserCtxtPtr
8446xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8447 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8448 void *ioctx, xmlCharEncoding enc) {
8449 xmlParserCtxtPtr ctxt;
8450 xmlParserInputPtr inputStream;
8451 xmlParserInputBufferPtr buf;
8452
8453 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8454 if (buf == NULL) return(NULL);
8455
8456 ctxt = xmlNewParserCtxt();
8457 if (ctxt == NULL) {
8458 xmlFree(buf);
8459 return(NULL);
8460 }
8461 if (sax != NULL) {
8462 if (ctxt->sax != &xmlDefaultSAXHandler)
8463 xmlFree(ctxt->sax);
8464 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8465 if (ctxt->sax == NULL) {
8466 xmlFree(buf);
8467 xmlFree(ctxt);
8468 return(NULL);
8469 }
8470 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8471 if (user_data != NULL)
8472 ctxt->userData = user_data;
8473 }
8474
8475 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8476 if (inputStream == NULL) {
8477 xmlFreeParserCtxt(ctxt);
8478 return(NULL);
8479 }
8480 inputPush(ctxt, inputStream);
8481
8482 return(ctxt);
8483}
8484
8485/************************************************************************
8486 * *
8487 * Front ends when parsing a Dtd *
8488 * *
8489 ************************************************************************/
8490
8491/**
8492 * xmlIOParseDTD:
8493 * @sax: the SAX handler block or NULL
8494 * @input: an Input Buffer
8495 * @enc: the charset encoding if known
8496 *
8497 * Load and parse a DTD
8498 *
8499 * Returns the resulting xmlDtdPtr or NULL in case of error.
8500 * @input will be freed at parsing end.
8501 */
8502
8503xmlDtdPtr
8504xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8505 xmlCharEncoding enc) {
8506 xmlDtdPtr ret = NULL;
8507 xmlParserCtxtPtr ctxt;
8508 xmlParserInputPtr pinput = NULL;
8509
8510 if (input == NULL)
8511 return(NULL);
8512
8513 ctxt = xmlNewParserCtxt();
8514 if (ctxt == NULL) {
8515 return(NULL);
8516 }
8517
8518 /*
8519 * Set-up the SAX context
8520 */
8521 if (sax != NULL) {
8522 if (ctxt->sax != NULL)
8523 xmlFree(ctxt->sax);
8524 ctxt->sax = sax;
8525 ctxt->userData = NULL;
8526 }
8527
8528 /*
8529 * generate a parser input from the I/O handler
8530 */
8531
8532 pinput = xmlNewIOInputStream(ctxt, input, enc);
8533 if (pinput == NULL) {
8534 if (sax != NULL) ctxt->sax = NULL;
8535 xmlFreeParserCtxt(ctxt);
8536 return(NULL);
8537 }
8538
8539 /*
8540 * plug some encoding conversion routines here.
8541 */
8542 xmlPushInput(ctxt, pinput);
8543
8544 pinput->filename = NULL;
8545 pinput->line = 1;
8546 pinput->col = 1;
8547 pinput->base = ctxt->input->cur;
8548 pinput->cur = ctxt->input->cur;
8549 pinput->free = NULL;
8550
8551 /*
8552 * let's parse that entity knowing it's an external subset.
8553 */
8554 ctxt->inSubset = 2;
8555 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8556 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8557 BAD_CAST "none", BAD_CAST "none");
8558 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8559
8560 if (ctxt->myDoc != NULL) {
8561 if (ctxt->wellFormed) {
8562 ret = ctxt->myDoc->extSubset;
8563 ctxt->myDoc->extSubset = NULL;
8564 } else {
8565 ret = NULL;
8566 }
8567 xmlFreeDoc(ctxt->myDoc);
8568 ctxt->myDoc = NULL;
8569 }
8570 if (sax != NULL) ctxt->sax = NULL;
8571 xmlFreeParserCtxt(ctxt);
8572
8573 return(ret);
8574}
8575
8576/**
8577 * xmlSAXParseDTD:
8578 * @sax: the SAX handler block
8579 * @ExternalID: a NAME* containing the External ID of the DTD
8580 * @SystemID: a NAME* containing the URL to the DTD
8581 *
8582 * Load and parse an external subset.
8583 *
8584 * Returns the resulting xmlDtdPtr or NULL in case of error.
8585 */
8586
8587xmlDtdPtr
8588xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8589 const xmlChar *SystemID) {
8590 xmlDtdPtr ret = NULL;
8591 xmlParserCtxtPtr ctxt;
8592 xmlParserInputPtr input = NULL;
8593 xmlCharEncoding enc;
8594
8595 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8596
8597 ctxt = xmlNewParserCtxt();
8598 if (ctxt == NULL) {
8599 return(NULL);
8600 }
8601
8602 /*
8603 * Set-up the SAX context
8604 */
8605 if (sax != NULL) {
8606 if (ctxt->sax != NULL)
8607 xmlFree(ctxt->sax);
8608 ctxt->sax = sax;
8609 ctxt->userData = NULL;
8610 }
8611
8612 /*
8613 * Ask the Entity resolver to load the damn thing
8614 */
8615
8616 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8617 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8618 if (input == NULL) {
8619 if (sax != NULL) ctxt->sax = NULL;
8620 xmlFreeParserCtxt(ctxt);
8621 return(NULL);
8622 }
8623
8624 /*
8625 * plug some encoding conversion routines here.
8626 */
8627 xmlPushInput(ctxt, input);
8628 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8629 xmlSwitchEncoding(ctxt, enc);
8630
8631 if (input->filename == NULL)
8632 input->filename = (char *) xmlStrdup(SystemID);
8633 input->line = 1;
8634 input->col = 1;
8635 input->base = ctxt->input->cur;
8636 input->cur = ctxt->input->cur;
8637 input->free = NULL;
8638
8639 /*
8640 * let's parse that entity knowing it's an external subset.
8641 */
8642 ctxt->inSubset = 2;
8643 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8644 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8645 ExternalID, SystemID);
8646 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8647
8648 if (ctxt->myDoc != NULL) {
8649 if (ctxt->wellFormed) {
8650 ret = ctxt->myDoc->extSubset;
8651 ctxt->myDoc->extSubset = NULL;
8652 } else {
8653 ret = NULL;
8654 }
8655 xmlFreeDoc(ctxt->myDoc);
8656 ctxt->myDoc = NULL;
8657 }
8658 if (sax != NULL) ctxt->sax = NULL;
8659 xmlFreeParserCtxt(ctxt);
8660
8661 return(ret);
8662}
8663
8664/**
8665 * xmlParseDTD:
8666 * @ExternalID: a NAME* containing the External ID of the DTD
8667 * @SystemID: a NAME* containing the URL to the DTD
8668 *
8669 * Load and parse an external subset.
8670 *
8671 * Returns the resulting xmlDtdPtr or NULL in case of error.
8672 */
8673
8674xmlDtdPtr
8675xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8676 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8677}
8678
8679/************************************************************************
8680 * *
8681 * Front ends when parsing an Entity *
8682 * *
8683 ************************************************************************/
8684
8685/**
Owen Taylor3473f882001-02-23 17:55:21 +00008686 * xmlParseCtxtExternalEntity:
8687 * @ctx: the existing parsing context
8688 * @URL: the URL for the entity to load
8689 * @ID: the System ID for the entity to load
8690 * @list: the return value for the set of parsed nodes
8691 *
8692 * Parse an external general entity within an existing parsing context
8693 * An external general parsed entity is well-formed if it matches the
8694 * production labeled extParsedEnt.
8695 *
8696 * [78] extParsedEnt ::= TextDecl? content
8697 *
8698 * Returns 0 if the entity is well formed, -1 in case of args problem and
8699 * the parser error code otherwise
8700 */
8701
8702int
8703xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8704 const xmlChar *ID, xmlNodePtr *list) {
8705 xmlParserCtxtPtr ctxt;
8706 xmlDocPtr newDoc;
8707 xmlSAXHandlerPtr oldsax = NULL;
8708 int ret = 0;
8709
8710 if (ctx->depth > 40) {
8711 return(XML_ERR_ENTITY_LOOP);
8712 }
8713
8714 if (list != NULL)
8715 *list = NULL;
8716 if ((URL == NULL) && (ID == NULL))
8717 return(-1);
8718 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8719 return(-1);
8720
8721
8722 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8723 if (ctxt == NULL) return(-1);
8724 ctxt->userData = ctxt;
8725 oldsax = ctxt->sax;
8726 ctxt->sax = ctx->sax;
8727 newDoc = xmlNewDoc(BAD_CAST "1.0");
8728 if (newDoc == NULL) {
8729 xmlFreeParserCtxt(ctxt);
8730 return(-1);
8731 }
8732 if (ctx->myDoc != NULL) {
8733 newDoc->intSubset = ctx->myDoc->intSubset;
8734 newDoc->extSubset = ctx->myDoc->extSubset;
8735 }
8736 if (ctx->myDoc->URL != NULL) {
8737 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8738 }
8739 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8740 if (newDoc->children == NULL) {
8741 ctxt->sax = oldsax;
8742 xmlFreeParserCtxt(ctxt);
8743 newDoc->intSubset = NULL;
8744 newDoc->extSubset = NULL;
8745 xmlFreeDoc(newDoc);
8746 return(-1);
8747 }
8748 nodePush(ctxt, newDoc->children);
8749 if (ctx->myDoc == NULL) {
8750 ctxt->myDoc = newDoc;
8751 } else {
8752 ctxt->myDoc = ctx->myDoc;
8753 newDoc->children->doc = ctx->myDoc;
8754 }
8755
8756 /*
8757 * Parse a possible text declaration first
8758 */
8759 GROW;
8760 if ((RAW == '<') && (NXT(1) == '?') &&
8761 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8762 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8763 xmlParseTextDecl(ctxt);
8764 }
8765
8766 /*
8767 * Doing validity checking on chunk doesn't make sense
8768 */
8769 ctxt->instate = XML_PARSER_CONTENT;
8770 ctxt->validate = ctx->validate;
8771 ctxt->loadsubset = ctx->loadsubset;
8772 ctxt->depth = ctx->depth + 1;
8773 ctxt->replaceEntities = ctx->replaceEntities;
8774 if (ctxt->validate) {
8775 ctxt->vctxt.error = ctx->vctxt.error;
8776 ctxt->vctxt.warning = ctx->vctxt.warning;
8777 /* Allocate the Node stack */
8778 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8779 if (ctxt->vctxt.nodeTab == NULL) {
8780 xmlGenericError(xmlGenericErrorContext,
8781 "xmlParseCtxtExternalEntity: out of memory\n");
8782 ctxt->validate = 0;
8783 ctxt->vctxt.error = NULL;
8784 ctxt->vctxt.warning = NULL;
8785 } else {
8786 ctxt->vctxt.nodeNr = 0;
8787 ctxt->vctxt.nodeMax = 4;
8788 ctxt->vctxt.node = NULL;
8789 }
8790 } else {
8791 ctxt->vctxt.error = NULL;
8792 ctxt->vctxt.warning = NULL;
8793 }
8794
8795 xmlParseContent(ctxt);
8796
8797 if ((RAW == '<') && (NXT(1) == '/')) {
8798 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8800 ctxt->sax->error(ctxt->userData,
8801 "chunk is not well balanced\n");
8802 ctxt->wellFormed = 0;
8803 ctxt->disableSAX = 1;
8804 } else if (RAW != 0) {
8805 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8807 ctxt->sax->error(ctxt->userData,
8808 "extra content at the end of well balanced chunk\n");
8809 ctxt->wellFormed = 0;
8810 ctxt->disableSAX = 1;
8811 }
8812 if (ctxt->node != newDoc->children) {
8813 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8815 ctxt->sax->error(ctxt->userData,
8816 "chunk is not well balanced\n");
8817 ctxt->wellFormed = 0;
8818 ctxt->disableSAX = 1;
8819 }
8820
8821 if (!ctxt->wellFormed) {
8822 if (ctxt->errNo == 0)
8823 ret = 1;
8824 else
8825 ret = ctxt->errNo;
8826 } else {
8827 if (list != NULL) {
8828 xmlNodePtr cur;
8829
8830 /*
8831 * Return the newly created nodeset after unlinking it from
8832 * they pseudo parent.
8833 */
8834 cur = newDoc->children->children;
8835 *list = cur;
8836 while (cur != NULL) {
8837 cur->parent = NULL;
8838 cur = cur->next;
8839 }
8840 newDoc->children->children = NULL;
8841 }
8842 ret = 0;
8843 }
8844 ctxt->sax = oldsax;
8845 xmlFreeParserCtxt(ctxt);
8846 newDoc->intSubset = NULL;
8847 newDoc->extSubset = NULL;
8848 xmlFreeDoc(newDoc);
8849
8850 return(ret);
8851}
8852
8853/**
8854 * xmlParseExternalEntity:
8855 * @doc: the document the chunk pertains to
8856 * @sax: the SAX handler bloc (possibly NULL)
8857 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8858 * @depth: Used for loop detection, use 0
8859 * @URL: the URL for the entity to load
8860 * @ID: the System ID for the entity to load
8861 * @list: the return value for the set of parsed nodes
8862 *
8863 * Parse an external general entity
8864 * An external general parsed entity is well-formed if it matches the
8865 * production labeled extParsedEnt.
8866 *
8867 * [78] extParsedEnt ::= TextDecl? content
8868 *
8869 * Returns 0 if the entity is well formed, -1 in case of args problem and
8870 * the parser error code otherwise
8871 */
8872
8873int
8874xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
8875 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
8876 xmlParserCtxtPtr ctxt;
8877 xmlDocPtr newDoc;
8878 xmlSAXHandlerPtr oldsax = NULL;
8879 int ret = 0;
8880
8881 if (depth > 40) {
8882 return(XML_ERR_ENTITY_LOOP);
8883 }
8884
8885
8886
8887 if (list != NULL)
8888 *list = NULL;
8889 if ((URL == NULL) && (ID == NULL))
8890 return(-1);
8891 if (doc == NULL) /* @@ relax but check for dereferences */
8892 return(-1);
8893
8894
8895 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8896 if (ctxt == NULL) return(-1);
8897 ctxt->userData = ctxt;
8898 if (sax != NULL) {
8899 oldsax = ctxt->sax;
8900 ctxt->sax = sax;
8901 if (user_data != NULL)
8902 ctxt->userData = user_data;
8903 }
8904 newDoc = xmlNewDoc(BAD_CAST "1.0");
8905 if (newDoc == NULL) {
8906 xmlFreeParserCtxt(ctxt);
8907 return(-1);
8908 }
8909 if (doc != NULL) {
8910 newDoc->intSubset = doc->intSubset;
8911 newDoc->extSubset = doc->extSubset;
8912 }
8913 if (doc->URL != NULL) {
8914 newDoc->URL = xmlStrdup(doc->URL);
8915 }
8916 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8917 if (newDoc->children == NULL) {
8918 if (sax != NULL)
8919 ctxt->sax = oldsax;
8920 xmlFreeParserCtxt(ctxt);
8921 newDoc->intSubset = NULL;
8922 newDoc->extSubset = NULL;
8923 xmlFreeDoc(newDoc);
8924 return(-1);
8925 }
8926 nodePush(ctxt, newDoc->children);
8927 if (doc == NULL) {
8928 ctxt->myDoc = newDoc;
8929 } else {
8930 ctxt->myDoc = doc;
8931 newDoc->children->doc = doc;
8932 }
8933
8934 /*
8935 * Parse a possible text declaration first
8936 */
8937 GROW;
8938 if ((RAW == '<') && (NXT(1) == '?') &&
8939 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8940 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8941 xmlParseTextDecl(ctxt);
8942 }
8943
8944 /*
8945 * Doing validity checking on chunk doesn't make sense
8946 */
8947 ctxt->instate = XML_PARSER_CONTENT;
8948 ctxt->validate = 0;
8949 ctxt->loadsubset = 0;
8950 ctxt->depth = depth;
8951
8952 xmlParseContent(ctxt);
8953
8954 if ((RAW == '<') && (NXT(1) == '/')) {
8955 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8957 ctxt->sax->error(ctxt->userData,
8958 "chunk is not well balanced\n");
8959 ctxt->wellFormed = 0;
8960 ctxt->disableSAX = 1;
8961 } else if (RAW != 0) {
8962 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8964 ctxt->sax->error(ctxt->userData,
8965 "extra content at the end of well balanced chunk\n");
8966 ctxt->wellFormed = 0;
8967 ctxt->disableSAX = 1;
8968 }
8969 if (ctxt->node != newDoc->children) {
8970 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8972 ctxt->sax->error(ctxt->userData,
8973 "chunk is not well balanced\n");
8974 ctxt->wellFormed = 0;
8975 ctxt->disableSAX = 1;
8976 }
8977
8978 if (!ctxt->wellFormed) {
8979 if (ctxt->errNo == 0)
8980 ret = 1;
8981 else
8982 ret = ctxt->errNo;
8983 } else {
8984 if (list != NULL) {
8985 xmlNodePtr cur;
8986
8987 /*
8988 * Return the newly created nodeset after unlinking it from
8989 * they pseudo parent.
8990 */
8991 cur = newDoc->children->children;
8992 *list = cur;
8993 while (cur != NULL) {
8994 cur->parent = NULL;
8995 cur = cur->next;
8996 }
8997 newDoc->children->children = NULL;
8998 }
8999 ret = 0;
9000 }
9001 if (sax != NULL)
9002 ctxt->sax = oldsax;
9003 xmlFreeParserCtxt(ctxt);
9004 newDoc->intSubset = NULL;
9005 newDoc->extSubset = NULL;
9006 xmlFreeDoc(newDoc);
9007
9008 return(ret);
9009}
9010
9011/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009012 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009013 * @doc: the document the chunk pertains to
9014 * @sax: the SAX handler bloc (possibly NULL)
9015 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9016 * @depth: Used for loop detection, use 0
9017 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9018 * @list: the return value for the set of parsed nodes
9019 *
9020 * Parse a well-balanced chunk of an XML document
9021 * called by the parser
9022 * The allowed sequence for the Well Balanced Chunk is the one defined by
9023 * the content production in the XML grammar:
9024 *
9025 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9026 *
9027 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9028 * the parser error code otherwise
9029 */
9030
9031int
9032xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9033 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9034 xmlParserCtxtPtr ctxt;
9035 xmlDocPtr newDoc;
9036 xmlSAXHandlerPtr oldsax = NULL;
9037 int size;
9038 int ret = 0;
9039
9040 if (depth > 40) {
9041 return(XML_ERR_ENTITY_LOOP);
9042 }
9043
9044
9045 if (list != NULL)
9046 *list = NULL;
9047 if (string == NULL)
9048 return(-1);
9049
9050 size = xmlStrlen(string);
9051
9052 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9053 if (ctxt == NULL) return(-1);
9054 ctxt->userData = ctxt;
9055 if (sax != NULL) {
9056 oldsax = ctxt->sax;
9057 ctxt->sax = sax;
9058 if (user_data != NULL)
9059 ctxt->userData = user_data;
9060 }
9061 newDoc = xmlNewDoc(BAD_CAST "1.0");
9062 if (newDoc == NULL) {
9063 xmlFreeParserCtxt(ctxt);
9064 return(-1);
9065 }
9066 if (doc != NULL) {
9067 newDoc->intSubset = doc->intSubset;
9068 newDoc->extSubset = doc->extSubset;
9069 }
9070 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9071 if (newDoc->children == NULL) {
9072 if (sax != NULL)
9073 ctxt->sax = oldsax;
9074 xmlFreeParserCtxt(ctxt);
9075 newDoc->intSubset = NULL;
9076 newDoc->extSubset = NULL;
9077 xmlFreeDoc(newDoc);
9078 return(-1);
9079 }
9080 nodePush(ctxt, newDoc->children);
9081 if (doc == NULL) {
9082 ctxt->myDoc = newDoc;
9083 } else {
9084 ctxt->myDoc = doc;
9085 newDoc->children->doc = doc;
9086 }
9087 ctxt->instate = XML_PARSER_CONTENT;
9088 ctxt->depth = depth;
9089
9090 /*
9091 * Doing validity checking on chunk doesn't make sense
9092 */
9093 ctxt->validate = 0;
9094 ctxt->loadsubset = 0;
9095
9096 xmlParseContent(ctxt);
9097
9098 if ((RAW == '<') && (NXT(1) == '/')) {
9099 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9100 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9101 ctxt->sax->error(ctxt->userData,
9102 "chunk is not well balanced\n");
9103 ctxt->wellFormed = 0;
9104 ctxt->disableSAX = 1;
9105 } else if (RAW != 0) {
9106 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9108 ctxt->sax->error(ctxt->userData,
9109 "extra content at the end of well balanced chunk\n");
9110 ctxt->wellFormed = 0;
9111 ctxt->disableSAX = 1;
9112 }
9113 if (ctxt->node != newDoc->children) {
9114 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9116 ctxt->sax->error(ctxt->userData,
9117 "chunk is not well balanced\n");
9118 ctxt->wellFormed = 0;
9119 ctxt->disableSAX = 1;
9120 }
9121
9122 if (!ctxt->wellFormed) {
9123 if (ctxt->errNo == 0)
9124 ret = 1;
9125 else
9126 ret = ctxt->errNo;
9127 } else {
9128 if (list != NULL) {
9129 xmlNodePtr cur;
9130
9131 /*
9132 * Return the newly created nodeset after unlinking it from
9133 * they pseudo parent.
9134 */
9135 cur = newDoc->children->children;
9136 *list = cur;
9137 while (cur != NULL) {
9138 cur->parent = NULL;
9139 cur = cur->next;
9140 }
9141 newDoc->children->children = NULL;
9142 }
9143 ret = 0;
9144 }
9145 if (sax != NULL)
9146 ctxt->sax = oldsax;
9147 xmlFreeParserCtxt(ctxt);
9148 newDoc->intSubset = NULL;
9149 newDoc->extSubset = NULL;
9150 xmlFreeDoc(newDoc);
9151
9152 return(ret);
9153}
9154
9155/**
9156 * xmlSAXParseEntity:
9157 * @sax: the SAX handler block
9158 * @filename: the filename
9159 *
9160 * parse an XML external entity out of context and build a tree.
9161 * It use the given SAX function block to handle the parsing callback.
9162 * If sax is NULL, fallback to the default DOM tree building routines.
9163 *
9164 * [78] extParsedEnt ::= TextDecl? content
9165 *
9166 * This correspond to a "Well Balanced" chunk
9167 *
9168 * Returns the resulting document tree
9169 */
9170
9171xmlDocPtr
9172xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9173 xmlDocPtr ret;
9174 xmlParserCtxtPtr ctxt;
9175 char *directory = NULL;
9176
9177 ctxt = xmlCreateFileParserCtxt(filename);
9178 if (ctxt == NULL) {
9179 return(NULL);
9180 }
9181 if (sax != NULL) {
9182 if (ctxt->sax != NULL)
9183 xmlFree(ctxt->sax);
9184 ctxt->sax = sax;
9185 ctxt->userData = NULL;
9186 }
9187
9188 if ((ctxt->directory == NULL) && (directory == NULL))
9189 directory = xmlParserGetDirectory(filename);
9190
9191 xmlParseExtParsedEnt(ctxt);
9192
9193 if (ctxt->wellFormed)
9194 ret = ctxt->myDoc;
9195 else {
9196 ret = NULL;
9197 xmlFreeDoc(ctxt->myDoc);
9198 ctxt->myDoc = NULL;
9199 }
9200 if (sax != NULL)
9201 ctxt->sax = NULL;
9202 xmlFreeParserCtxt(ctxt);
9203
9204 return(ret);
9205}
9206
9207/**
9208 * xmlParseEntity:
9209 * @filename: the filename
9210 *
9211 * parse an XML external entity out of context and build a tree.
9212 *
9213 * [78] extParsedEnt ::= TextDecl? content
9214 *
9215 * This correspond to a "Well Balanced" chunk
9216 *
9217 * Returns the resulting document tree
9218 */
9219
9220xmlDocPtr
9221xmlParseEntity(const char *filename) {
9222 return(xmlSAXParseEntity(NULL, filename));
9223}
9224
9225/**
9226 * xmlCreateEntityParserCtxt:
9227 * @URL: the entity URL
9228 * @ID: the entity PUBLIC ID
9229 * @base: a posible base for the target URI
9230 *
9231 * Create a parser context for an external entity
9232 * Automatic support for ZLIB/Compress compressed document is provided
9233 * by default if found at compile-time.
9234 *
9235 * Returns the new parser context or NULL
9236 */
9237xmlParserCtxtPtr
9238xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9239 const xmlChar *base) {
9240 xmlParserCtxtPtr ctxt;
9241 xmlParserInputPtr inputStream;
9242 char *directory = NULL;
9243 xmlChar *uri;
9244
9245 ctxt = xmlNewParserCtxt();
9246 if (ctxt == NULL) {
9247 return(NULL);
9248 }
9249
9250 uri = xmlBuildURI(URL, base);
9251
9252 if (uri == NULL) {
9253 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9254 if (inputStream == NULL) {
9255 xmlFreeParserCtxt(ctxt);
9256 return(NULL);
9257 }
9258
9259 inputPush(ctxt, inputStream);
9260
9261 if ((ctxt->directory == NULL) && (directory == NULL))
9262 directory = xmlParserGetDirectory((char *)URL);
9263 if ((ctxt->directory == NULL) && (directory != NULL))
9264 ctxt->directory = directory;
9265 } else {
9266 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9267 if (inputStream == NULL) {
9268 xmlFree(uri);
9269 xmlFreeParserCtxt(ctxt);
9270 return(NULL);
9271 }
9272
9273 inputPush(ctxt, inputStream);
9274
9275 if ((ctxt->directory == NULL) && (directory == NULL))
9276 directory = xmlParserGetDirectory((char *)uri);
9277 if ((ctxt->directory == NULL) && (directory != NULL))
9278 ctxt->directory = directory;
9279 xmlFree(uri);
9280 }
9281
9282 return(ctxt);
9283}
9284
9285/************************************************************************
9286 * *
9287 * Front ends when parsing from a file *
9288 * *
9289 ************************************************************************/
9290
9291/**
9292 * xmlCreateFileParserCtxt:
9293 * @filename: the filename
9294 *
9295 * Create a parser context for a file content.
9296 * Automatic support for ZLIB/Compress compressed document is provided
9297 * by default if found at compile-time.
9298 *
9299 * Returns the new parser context or NULL
9300 */
9301xmlParserCtxtPtr
9302xmlCreateFileParserCtxt(const char *filename)
9303{
9304 xmlParserCtxtPtr ctxt;
9305 xmlParserInputPtr inputStream;
9306 xmlParserInputBufferPtr buf;
9307 char *directory = NULL;
9308
9309 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9310 if (buf == NULL) {
9311 return(NULL);
9312 }
9313
9314 ctxt = xmlNewParserCtxt();
9315 if (ctxt == NULL) {
9316 if (xmlDefaultSAXHandler.error != NULL) {
9317 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9318 }
9319 return(NULL);
9320 }
9321
9322 inputStream = xmlNewInputStream(ctxt);
9323 if (inputStream == NULL) {
9324 xmlFreeParserCtxt(ctxt);
9325 return(NULL);
9326 }
9327
9328 inputStream->filename = xmlMemStrdup(filename);
9329 inputStream->buf = buf;
9330 inputStream->base = inputStream->buf->buffer->content;
9331 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009332 inputStream->end =
9333 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009334
9335 inputPush(ctxt, inputStream);
9336 if ((ctxt->directory == NULL) && (directory == NULL))
9337 directory = xmlParserGetDirectory(filename);
9338 if ((ctxt->directory == NULL) && (directory != NULL))
9339 ctxt->directory = directory;
9340
9341 return(ctxt);
9342}
9343
9344/**
9345 * xmlSAXParseFile:
9346 * @sax: the SAX handler block
9347 * @filename: the filename
9348 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9349 * documents
9350 *
9351 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9352 * compressed document is provided by default if found at compile-time.
9353 * It use the given SAX function block to handle the parsing callback.
9354 * If sax is NULL, fallback to the default DOM tree building routines.
9355 *
9356 * Returns the resulting document tree
9357 */
9358
9359xmlDocPtr
9360xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9361 int recovery) {
9362 xmlDocPtr ret;
9363 xmlParserCtxtPtr ctxt;
9364 char *directory = NULL;
9365
9366 ctxt = xmlCreateFileParserCtxt(filename);
9367 if (ctxt == NULL) {
9368 return(NULL);
9369 }
9370 if (sax != NULL) {
9371 if (ctxt->sax != NULL)
9372 xmlFree(ctxt->sax);
9373 ctxt->sax = sax;
9374 ctxt->userData = NULL;
9375 }
9376
9377 if ((ctxt->directory == NULL) && (directory == NULL))
9378 directory = xmlParserGetDirectory(filename);
9379 if ((ctxt->directory == NULL) && (directory != NULL))
9380 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9381
9382 xmlParseDocument(ctxt);
9383
9384 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9385 else {
9386 ret = NULL;
9387 xmlFreeDoc(ctxt->myDoc);
9388 ctxt->myDoc = NULL;
9389 }
9390 if (sax != NULL)
9391 ctxt->sax = NULL;
9392 xmlFreeParserCtxt(ctxt);
9393
9394 return(ret);
9395}
9396
9397/**
9398 * xmlRecoverDoc:
9399 * @cur: a pointer to an array of xmlChar
9400 *
9401 * parse an XML in-memory document and build a tree.
9402 * In the case the document is not Well Formed, a tree is built anyway
9403 *
9404 * Returns the resulting document tree
9405 */
9406
9407xmlDocPtr
9408xmlRecoverDoc(xmlChar *cur) {
9409 return(xmlSAXParseDoc(NULL, cur, 1));
9410}
9411
9412/**
9413 * xmlParseFile:
9414 * @filename: the filename
9415 *
9416 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9417 * compressed document is provided by default if found at compile-time.
9418 *
9419 * Returns the resulting document tree
9420 */
9421
9422xmlDocPtr
9423xmlParseFile(const char *filename) {
9424 return(xmlSAXParseFile(NULL, filename, 0));
9425}
9426
9427/**
9428 * xmlRecoverFile:
9429 * @filename: the filename
9430 *
9431 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9432 * compressed document is provided by default if found at compile-time.
9433 * In the case the document is not Well Formed, a tree is built anyway
9434 *
9435 * Returns the resulting document tree
9436 */
9437
9438xmlDocPtr
9439xmlRecoverFile(const char *filename) {
9440 return(xmlSAXParseFile(NULL, filename, 1));
9441}
9442
9443
9444/**
9445 * xmlSetupParserForBuffer:
9446 * @ctxt: an XML parser context
9447 * @buffer: a xmlChar * buffer
9448 * @filename: a file name
9449 *
9450 * Setup the parser context to parse a new buffer; Clears any prior
9451 * contents from the parser context. The buffer parameter must not be
9452 * NULL, but the filename parameter can be
9453 */
9454void
9455xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9456 const char* filename)
9457{
9458 xmlParserInputPtr input;
9459
9460 input = xmlNewInputStream(ctxt);
9461 if (input == NULL) {
9462 perror("malloc");
9463 xmlFree(ctxt);
9464 return;
9465 }
9466
9467 xmlClearParserCtxt(ctxt);
9468 if (filename != NULL)
9469 input->filename = xmlMemStrdup(filename);
9470 input->base = buffer;
9471 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009472 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009473 inputPush(ctxt, input);
9474}
9475
9476/**
9477 * xmlSAXUserParseFile:
9478 * @sax: a SAX handler
9479 * @user_data: The user data returned on SAX callbacks
9480 * @filename: a file name
9481 *
9482 * parse an XML file and call the given SAX handler routines.
9483 * Automatic support for ZLIB/Compress compressed document is provided
9484 *
9485 * Returns 0 in case of success or a error number otherwise
9486 */
9487int
9488xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9489 const char *filename) {
9490 int ret = 0;
9491 xmlParserCtxtPtr ctxt;
9492
9493 ctxt = xmlCreateFileParserCtxt(filename);
9494 if (ctxt == NULL) return -1;
9495 if (ctxt->sax != &xmlDefaultSAXHandler)
9496 xmlFree(ctxt->sax);
9497 ctxt->sax = sax;
9498 if (user_data != NULL)
9499 ctxt->userData = user_data;
9500
9501 xmlParseDocument(ctxt);
9502
9503 if (ctxt->wellFormed)
9504 ret = 0;
9505 else {
9506 if (ctxt->errNo != 0)
9507 ret = ctxt->errNo;
9508 else
9509 ret = -1;
9510 }
9511 if (sax != NULL)
9512 ctxt->sax = NULL;
9513 xmlFreeParserCtxt(ctxt);
9514
9515 return ret;
9516}
9517
9518/************************************************************************
9519 * *
9520 * Front ends when parsing from memory *
9521 * *
9522 ************************************************************************/
9523
9524/**
9525 * xmlCreateMemoryParserCtxt:
9526 * @buffer: a pointer to a char array
9527 * @size: the size of the array
9528 *
9529 * Create a parser context for an XML in-memory document.
9530 *
9531 * Returns the new parser context or NULL
9532 */
9533xmlParserCtxtPtr
9534xmlCreateMemoryParserCtxt(char *buffer, int size) {
9535 xmlParserCtxtPtr ctxt;
9536 xmlParserInputPtr input;
9537 xmlParserInputBufferPtr buf;
9538
9539 if (buffer == NULL)
9540 return(NULL);
9541 if (size <= 0)
9542 return(NULL);
9543
9544 ctxt = xmlNewParserCtxt();
9545 if (ctxt == NULL)
9546 return(NULL);
9547
9548 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9549 if (buf == NULL) return(NULL);
9550
9551 input = xmlNewInputStream(ctxt);
9552 if (input == NULL) {
9553 xmlFreeParserCtxt(ctxt);
9554 return(NULL);
9555 }
9556
9557 input->filename = NULL;
9558 input->buf = buf;
9559 input->base = input->buf->buffer->content;
9560 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009561 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009562
9563 inputPush(ctxt, input);
9564 return(ctxt);
9565}
9566
9567/**
9568 * xmlSAXParseMemory:
9569 * @sax: the SAX handler block
9570 * @buffer: an pointer to a char array
9571 * @size: the size of the array
9572 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9573 * documents
9574 *
9575 * parse an XML in-memory block and use the given SAX function block
9576 * to handle the parsing callback. If sax is NULL, fallback to the default
9577 * DOM tree building routines.
9578 *
9579 * Returns the resulting document tree
9580 */
9581xmlDocPtr
9582xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9583 xmlDocPtr ret;
9584 xmlParserCtxtPtr ctxt;
9585
9586 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9587 if (ctxt == NULL) return(NULL);
9588 if (sax != NULL) {
9589 ctxt->sax = sax;
9590 ctxt->userData = NULL;
9591 }
9592
9593 xmlParseDocument(ctxt);
9594
9595 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9596 else {
9597 ret = NULL;
9598 xmlFreeDoc(ctxt->myDoc);
9599 ctxt->myDoc = NULL;
9600 }
9601 if (sax != NULL)
9602 ctxt->sax = NULL;
9603 xmlFreeParserCtxt(ctxt);
9604
9605 return(ret);
9606}
9607
9608/**
9609 * xmlParseMemory:
9610 * @buffer: an pointer to a char array
9611 * @size: the size of the array
9612 *
9613 * parse an XML in-memory block and build a tree.
9614 *
9615 * Returns the resulting document tree
9616 */
9617
9618xmlDocPtr xmlParseMemory(char *buffer, int size) {
9619 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9620}
9621
9622/**
9623 * xmlRecoverMemory:
9624 * @buffer: an pointer to a char array
9625 * @size: the size of the array
9626 *
9627 * parse an XML in-memory block and build a tree.
9628 * In the case the document is not Well Formed, a tree is built anyway
9629 *
9630 * Returns the resulting document tree
9631 */
9632
9633xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9634 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9635}
9636
9637/**
9638 * xmlSAXUserParseMemory:
9639 * @sax: a SAX handler
9640 * @user_data: The user data returned on SAX callbacks
9641 * @buffer: an in-memory XML document input
9642 * @size: the length of the XML document in bytes
9643 *
9644 * A better SAX parsing routine.
9645 * parse an XML in-memory buffer and call the given SAX handler routines.
9646 *
9647 * Returns 0 in case of success or a error number otherwise
9648 */
9649int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9650 char *buffer, int size) {
9651 int ret = 0;
9652 xmlParserCtxtPtr ctxt;
9653 xmlSAXHandlerPtr oldsax = NULL;
9654
9655 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9656 if (ctxt == NULL) return -1;
9657 if (sax != NULL) {
9658 oldsax = ctxt->sax;
9659 ctxt->sax = sax;
9660 }
9661 ctxt->userData = user_data;
9662
9663 xmlParseDocument(ctxt);
9664
9665 if (ctxt->wellFormed)
9666 ret = 0;
9667 else {
9668 if (ctxt->errNo != 0)
9669 ret = ctxt->errNo;
9670 else
9671 ret = -1;
9672 }
9673 if (sax != NULL) {
9674 ctxt->sax = oldsax;
9675 }
9676 xmlFreeParserCtxt(ctxt);
9677
9678 return ret;
9679}
9680
9681/**
9682 * xmlCreateDocParserCtxt:
9683 * @cur: a pointer to an array of xmlChar
9684 *
9685 * Creates a parser context for an XML in-memory document.
9686 *
9687 * Returns the new parser context or NULL
9688 */
9689xmlParserCtxtPtr
9690xmlCreateDocParserCtxt(xmlChar *cur) {
9691 int len;
9692
9693 if (cur == NULL)
9694 return(NULL);
9695 len = xmlStrlen(cur);
9696 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9697}
9698
9699/**
9700 * xmlSAXParseDoc:
9701 * @sax: the SAX handler block
9702 * @cur: a pointer to an array of xmlChar
9703 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9704 * documents
9705 *
9706 * parse an XML in-memory document and build a tree.
9707 * It use the given SAX function block to handle the parsing callback.
9708 * If sax is NULL, fallback to the default DOM tree building routines.
9709 *
9710 * Returns the resulting document tree
9711 */
9712
9713xmlDocPtr
9714xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9715 xmlDocPtr ret;
9716 xmlParserCtxtPtr ctxt;
9717
9718 if (cur == NULL) return(NULL);
9719
9720
9721 ctxt = xmlCreateDocParserCtxt(cur);
9722 if (ctxt == NULL) return(NULL);
9723 if (sax != NULL) {
9724 ctxt->sax = sax;
9725 ctxt->userData = NULL;
9726 }
9727
9728 xmlParseDocument(ctxt);
9729 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9730 else {
9731 ret = NULL;
9732 xmlFreeDoc(ctxt->myDoc);
9733 ctxt->myDoc = NULL;
9734 }
9735 if (sax != NULL)
9736 ctxt->sax = NULL;
9737 xmlFreeParserCtxt(ctxt);
9738
9739 return(ret);
9740}
9741
9742/**
9743 * xmlParseDoc:
9744 * @cur: a pointer to an array of xmlChar
9745 *
9746 * parse an XML in-memory document and build a tree.
9747 *
9748 * Returns the resulting document tree
9749 */
9750
9751xmlDocPtr
9752xmlParseDoc(xmlChar *cur) {
9753 return(xmlSAXParseDoc(NULL, cur, 0));
9754}
9755
9756
9757/************************************************************************
9758 * *
9759 * Miscellaneous *
9760 * *
9761 ************************************************************************/
9762
9763#ifdef LIBXML_XPATH_ENABLED
9764#include <libxml/xpath.h>
9765#endif
9766
9767static int xmlParserInitialized = 0;
9768
9769/**
9770 * xmlInitParser:
9771 *
9772 * Initialization function for the XML parser.
9773 * This is not reentrant. Call once before processing in case of
9774 * use in multithreaded programs.
9775 */
9776
9777void
9778xmlInitParser(void) {
9779 if (xmlParserInitialized) return;
9780
9781 xmlInitCharEncodingHandlers();
9782 xmlInitializePredefinedEntities();
9783 xmlDefaultSAXHandlerInit();
9784 xmlRegisterDefaultInputCallbacks();
9785 xmlRegisterDefaultOutputCallbacks();
9786#ifdef LIBXML_HTML_ENABLED
9787 htmlInitAutoClose();
9788 htmlDefaultSAXHandlerInit();
9789#endif
9790#ifdef LIBXML_XPATH_ENABLED
9791 xmlXPathInit();
9792#endif
9793 xmlParserInitialized = 1;
9794}
9795
9796/**
9797 * xmlCleanupParser:
9798 *
9799 * Cleanup function for the XML parser. It tries to reclaim all
9800 * parsing related global memory allocated for the parser processing.
9801 * It doesn't deallocate any document related memory. Calling this
9802 * function should not prevent reusing the parser.
9803 */
9804
9805void
9806xmlCleanupParser(void) {
9807 xmlParserInitialized = 0;
9808 xmlCleanupCharEncodingHandlers();
9809 xmlCleanupPredefinedEntities();
9810}
9811
9812/**
9813 * xmlPedanticParserDefault:
9814 * @val: int 0 or 1
9815 *
9816 * Set and return the previous value for enabling pedantic warnings.
9817 *
9818 * Returns the last value for 0 for no substitution, 1 for substitution.
9819 */
9820
9821int
9822xmlPedanticParserDefault(int val) {
9823 int old = xmlPedanticParserDefaultValue;
9824
9825 xmlPedanticParserDefaultValue = val;
9826 return(old);
9827}
9828
9829/**
9830 * xmlSubstituteEntitiesDefault:
9831 * @val: int 0 or 1
9832 *
9833 * Set and return the previous value for default entity support.
9834 * Initially the parser always keep entity references instead of substituting
9835 * entity values in the output. This function has to be used to change the
9836 * default parser behaviour
9837 * SAX::subtituteEntities() has to be used for changing that on a file by
9838 * file basis.
9839 *
9840 * Returns the last value for 0 for no substitution, 1 for substitution.
9841 */
9842
9843int
9844xmlSubstituteEntitiesDefault(int val) {
9845 int old = xmlSubstituteEntitiesDefaultValue;
9846
9847 xmlSubstituteEntitiesDefaultValue = val;
9848 return(old);
9849}
9850
9851/**
9852 * xmlKeepBlanksDefault:
9853 * @val: int 0 or 1
9854 *
9855 * Set and return the previous value for default blanks text nodes support.
9856 * The 1.x version of the parser used an heuristic to try to detect
9857 * ignorable white spaces. As a result the SAX callback was generating
9858 * ignorableWhitespace() callbacks instead of characters() one, and when
9859 * using the DOM output text nodes containing those blanks were not generated.
9860 * The 2.x and later version will switch to the XML standard way and
9861 * ignorableWhitespace() are only generated when running the parser in
9862 * validating mode and when the current element doesn't allow CDATA or
9863 * mixed content.
9864 * This function is provided as a way to force the standard behaviour
9865 * on 1.X libs and to switch back to the old mode for compatibility when
9866 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9867 * by using xmlIsBlankNode() commodity function to detect the "empty"
9868 * nodes generated.
9869 * This value also affect autogeneration of indentation when saving code
9870 * if blanks sections are kept, indentation is not generated.
9871 *
9872 * Returns the last value for 0 for no substitution, 1 for substitution.
9873 */
9874
9875int
9876xmlKeepBlanksDefault(int val) {
9877 int old = xmlKeepBlanksDefaultValue;
9878
9879 xmlKeepBlanksDefaultValue = val;
9880 xmlIndentTreeOutput = !val;
9881 return(old);
9882}
9883