blob: a784f71643b6a61b90ca704500303c5741f931c1 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
36#ifdef WIN32
37#include "win32config.h"
38#define XML_DIR_SEP '\\'
39#else
40#include "config.h"
41#define XML_DIR_SEP '/'
42#endif
43
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <libxml/xmlmemory.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57
58#ifdef HAVE_CTYPE_H
59#include <ctype.h>
60#endif
61#ifdef HAVE_STDLIB_H
62#include <stdlib.h>
63#endif
64#ifdef HAVE_SYS_STAT_H
65#include <sys/stat.h>
66#endif
67#ifdef HAVE_FCNTL_H
68#include <fcntl.h>
69#endif
70#ifdef HAVE_UNISTD_H
71#include <unistd.h>
72#endif
73#ifdef HAVE_ZLIB_H
74#include <zlib.h>
75#endif
76
77
Daniel Veillard21a0f912001-02-25 19:54:14 +000078#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000079#define XML_PARSER_BUFFER_SIZE 100
80
81/*
82 * Various global defaults for parsing
83 */
84int xmlGetWarningsDefaultValue = 1;
85int xmlParserDebugEntities = 0;
86#ifdef VMS
87int xmlSubstituteEntitiesDefaultVal = 0;
88#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
89int xmlDoValidityCheckingDefaultVal = 0;
90#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
91#else
92int xmlSubstituteEntitiesDefaultValue = 0;
93int xmlDoValidityCheckingDefaultValue = 0;
94#endif
95int xmlLoadExtDtdDefaultValue = 0;
96int xmlPedanticParserDefaultValue = 0;
97int xmlKeepBlanksDefaultValue = 1;
98
99/*
100 * List of XML prefixed PI allowed by W3C specs
101 */
102
103const char *xmlW3CPIs[] = {
104 "xml-stylesheet",
105 NULL
106};
107
108/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
109void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
113
114/************************************************************************
115 * *
116 * Parser stacks related functions and macros *
117 * *
118 ************************************************************************/
119
120xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
121 const xmlChar ** str);
122
123/*
124 * Generic function for accessing stacks in the Parser Context
125 */
126
127#define PUSH_AND_POP(scope, type, name) \
128scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
129 if (ctxt->name##Nr >= ctxt->name##Max) { \
130 ctxt->name##Max *= 2; \
131 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
132 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
133 if (ctxt->name##Tab == NULL) { \
134 xmlGenericError(xmlGenericErrorContext, \
135 "realloc failed !\n"); \
136 return(0); \
137 } \
138 } \
139 ctxt->name##Tab[ctxt->name##Nr] = value; \
140 ctxt->name = value; \
141 return(ctxt->name##Nr++); \
142} \
143scope type name##Pop(xmlParserCtxtPtr ctxt) { \
144 type ret; \
145 if (ctxt->name##Nr <= 0) return(0); \
146 ctxt->name##Nr--; \
147 if (ctxt->name##Nr > 0) \
148 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
149 else \
150 ctxt->name = NULL; \
151 ret = ctxt->name##Tab[ctxt->name##Nr]; \
152 ctxt->name##Tab[ctxt->name##Nr] = 0; \
153 return(ret); \
154} \
155
156/*
157 * Those macros actually generate the functions
158 */
159PUSH_AND_POP(extern, xmlParserInputPtr, input)
160PUSH_AND_POP(extern, xmlNodePtr, node)
161PUSH_AND_POP(extern, xmlChar*, name)
162
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000163static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000164 if (ctxt->spaceNr >= ctxt->spaceMax) {
165 ctxt->spaceMax *= 2;
166 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
167 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
168 if (ctxt->spaceTab == NULL) {
169 xmlGenericError(xmlGenericErrorContext,
170 "realloc failed !\n");
171 return(0);
172 }
173 }
174 ctxt->spaceTab[ctxt->spaceNr] = val;
175 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
176 return(ctxt->spaceNr++);
177}
178
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000179static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000180 int ret;
181 if (ctxt->spaceNr <= 0) return(0);
182 ctxt->spaceNr--;
183 if (ctxt->spaceNr > 0)
184 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
185 else
186 ctxt->space = NULL;
187 ret = ctxt->spaceTab[ctxt->spaceNr];
188 ctxt->spaceTab[ctxt->spaceNr] = -1;
189 return(ret);
190}
191
192/*
193 * Macros for accessing the content. Those should be used only by the parser,
194 * and not exported.
195 *
196 * Dirty macros, i.e. one often need to make assumption on the context to
197 * use them
198 *
199 * CUR_PTR return the current pointer to the xmlChar to be parsed.
200 * To be used with extreme caution since operations consuming
201 * characters may move the input buffer to a different location !
202 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
203 * This should be used internally by the parser
204 * only to compare to ASCII values otherwise it would break when
205 * running with UTF-8 encoding.
206 * RAW same as CUR but in the input buffer, bypass any token
207 * extraction that may have been done
208 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
209 * to compare on ASCII based substring.
210 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
211 * strings within the parser.
212 *
213 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
214 *
215 * NEXT Skip to the next character, this does the proper decoding
216 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
217 * NEXTL(l) Skip l xmlChars in the input buffer
218 * CUR_CHAR(l) returns the current unicode character (int), set l
219 * to the number of xmlChars used for the encoding [0-5].
220 * CUR_SCHAR same but operate on a string instead of the context
221 * COPY_BUF copy the current unicode char to the target buffer, increment
222 * the index
223 * GROW, SHRINK handling of input buffers
224 */
225
226#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
227#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
228#define NXT(val) ctxt->input->cur[(val)]
229#define CUR_PTR ctxt->input->cur
230
231#define SKIP(val) do { \
232 ctxt->nbChars += (val),ctxt->input->cur += (val); \
233 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000234 if ((*ctxt->input->cur == 0) && \
235 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
236 xmlPopInput(ctxt); \
237 } while (0)
238
Daniel Veillard48b2f892001-02-25 16:11:03 +0000239#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000240 xmlParserInputShrink(ctxt->input); \
241 if ((*ctxt->input->cur == 0) && \
242 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
243 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000244 }
Owen Taylor3473f882001-02-23 17:55:21 +0000245
Daniel Veillard48b2f892001-02-25 16:11:03 +0000246#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000247 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
248 if ((*ctxt->input->cur == 0) && \
249 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
250 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000251 }
Owen Taylor3473f882001-02-23 17:55:21 +0000252
253#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
254
255#define NEXT xmlNextChar(ctxt)
256
Daniel Veillard21a0f912001-02-25 19:54:14 +0000257#define NEXT1 { \
258 ctxt->input->cur++; \
259 ctxt->nbChars++; \
260 if (*ctxt->input->cur == 0) \
261 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
262 }
263
Owen Taylor3473f882001-02-23 17:55:21 +0000264#define NEXTL(l) do { \
265 if (*(ctxt->input->cur) == '\n') { \
266 ctxt->input->line++; ctxt->input->col = 1; \
267 } else ctxt->input->col++; \
268 ctxt->token = 0; ctxt->input->cur += l; \
269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000270 } while (0)
271
272#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
273#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
274
275#define COPY_BUF(l,b,i,v) \
276 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000277 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000278
279/**
280 * xmlSkipBlankChars:
281 * @ctxt: the XML parser context
282 *
283 * skip all blanks character found at that point in the input streams.
284 * It pops up finished entities in the process if allowable at that point.
285 *
286 * Returns the number of space chars skipped
287 */
288
289int
290xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
291 int cur, res = 0;
292
293 /*
294 * It's Okay to use CUR/NEXT here since all the blanks are on
295 * the ASCII range.
296 */
297 do {
298 cur = CUR;
299 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
300 NEXT;
301 cur = CUR;
302 res++;
303 }
304 while ((cur == 0) && (ctxt->inputNr > 1) &&
305 (ctxt->instate != XML_PARSER_COMMENT)) {
306 xmlPopInput(ctxt);
307 cur = CUR;
308 }
309 /*
310 * Need to handle support of entities branching here
311 */
312 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
313 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
314 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
315 return(res);
316}
317
318/************************************************************************
319 * *
320 * Commodity functions to handle entities *
321 * *
322 ************************************************************************/
323
324/**
325 * xmlPopInput:
326 * @ctxt: an XML parser context
327 *
328 * xmlPopInput: the current input pointed by ctxt->input came to an end
329 * pop it and return the next char.
330 *
331 * Returns the current xmlChar in the parser context
332 */
333xmlChar
334xmlPopInput(xmlParserCtxtPtr ctxt) {
335 if (ctxt->inputNr == 1) return(0); /* End of main Input */
336 if (xmlParserDebugEntities)
337 xmlGenericError(xmlGenericErrorContext,
338 "Popping input %d\n", ctxt->inputNr);
339 xmlFreeInputStream(inputPop(ctxt));
340 if ((*ctxt->input->cur == 0) &&
341 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
342 return(xmlPopInput(ctxt));
343 return(CUR);
344}
345
346/**
347 * xmlPushInput:
348 * @ctxt: an XML parser context
349 * @input: an XML parser input fragment (entity, XML fragment ...).
350 *
351 * xmlPushInput: switch to a new input stream which is stacked on top
352 * of the previous one(s).
353 */
354void
355xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
356 if (input == NULL) return;
357
358 if (xmlParserDebugEntities) {
359 if ((ctxt->input != NULL) && (ctxt->input->filename))
360 xmlGenericError(xmlGenericErrorContext,
361 "%s(%d): ", ctxt->input->filename,
362 ctxt->input->line);
363 xmlGenericError(xmlGenericErrorContext,
364 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
365 }
366 inputPush(ctxt, input);
367 GROW;
368}
369
370/**
371 * xmlParseCharRef:
372 * @ctxt: an XML parser context
373 *
374 * parse Reference declarations
375 *
376 * [66] CharRef ::= '&#' [0-9]+ ';' |
377 * '&#x' [0-9a-fA-F]+ ';'
378 *
379 * [ WFC: Legal Character ]
380 * Characters referred to using character references must match the
381 * production for Char.
382 *
383 * Returns the value parsed (as an int), 0 in case of error
384 */
385int
386xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000387 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000388 int count = 0;
389
390 if (ctxt->token != 0) {
391 val = ctxt->token;
392 ctxt->token = 0;
393 return(val);
394 }
395 /*
396 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
397 */
398 if ((RAW == '&') && (NXT(1) == '#') &&
399 (NXT(2) == 'x')) {
400 SKIP(3);
401 GROW;
402 while (RAW != ';') { /* loop blocked by count */
403 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
404 val = val * 16 + (CUR - '0');
405 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
406 val = val * 16 + (CUR - 'a') + 10;
407 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
408 val = val * 16 + (CUR - 'A') + 10;
409 else {
410 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
412 ctxt->sax->error(ctxt->userData,
413 "xmlParseCharRef: invalid hexadecimal value\n");
414 ctxt->wellFormed = 0;
415 ctxt->disableSAX = 1;
416 val = 0;
417 break;
418 }
419 NEXT;
420 count++;
421 }
422 if (RAW == ';') {
423 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
424 ctxt->nbChars ++;
425 ctxt->input->cur++;
426 }
427 } else if ((RAW == '&') && (NXT(1) == '#')) {
428 SKIP(2);
429 GROW;
430 while (RAW != ';') { /* loop blocked by count */
431 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
432 val = val * 10 + (CUR - '0');
433 else {
434 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
436 ctxt->sax->error(ctxt->userData,
437 "xmlParseCharRef: invalid decimal value\n");
438 ctxt->wellFormed = 0;
439 ctxt->disableSAX = 1;
440 val = 0;
441 break;
442 }
443 NEXT;
444 count++;
445 }
446 if (RAW == ';') {
447 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
448 ctxt->nbChars ++;
449 ctxt->input->cur++;
450 }
451 } else {
452 ctxt->errNo = XML_ERR_INVALID_CHARREF;
453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
454 ctxt->sax->error(ctxt->userData,
455 "xmlParseCharRef: invalid value\n");
456 ctxt->wellFormed = 0;
457 ctxt->disableSAX = 1;
458 }
459
460 /*
461 * [ WFC: Legal Character ]
462 * Characters referred to using character references must match the
463 * production for Char.
464 */
465 if (IS_CHAR(val)) {
466 return(val);
467 } else {
468 ctxt->errNo = XML_ERR_INVALID_CHAR;
469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
470 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
471 val);
472 ctxt->wellFormed = 0;
473 ctxt->disableSAX = 1;
474 }
475 return(0);
476}
477
478/**
479 * xmlParseStringCharRef:
480 * @ctxt: an XML parser context
481 * @str: a pointer to an index in the string
482 *
483 * parse Reference declarations, variant parsing from a string rather
484 * than an an input flow.
485 *
486 * [66] CharRef ::= '&#' [0-9]+ ';' |
487 * '&#x' [0-9a-fA-F]+ ';'
488 *
489 * [ WFC: Legal Character ]
490 * Characters referred to using character references must match the
491 * production for Char.
492 *
493 * Returns the value parsed (as an int), 0 in case of error, str will be
494 * updated to the current value of the index
495 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000496static int
Owen Taylor3473f882001-02-23 17:55:21 +0000497xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
498 const xmlChar *ptr;
499 xmlChar cur;
500 int val = 0;
501
502 if ((str == NULL) || (*str == NULL)) return(0);
503 ptr = *str;
504 cur = *ptr;
505 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
506 ptr += 3;
507 cur = *ptr;
508 while (cur != ';') { /* Non input consuming loop */
509 if ((cur >= '0') && (cur <= '9'))
510 val = val * 16 + (cur - '0');
511 else if ((cur >= 'a') && (cur <= 'f'))
512 val = val * 16 + (cur - 'a') + 10;
513 else if ((cur >= 'A') && (cur <= 'F'))
514 val = val * 16 + (cur - 'A') + 10;
515 else {
516 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
518 ctxt->sax->error(ctxt->userData,
519 "xmlParseStringCharRef: invalid hexadecimal value\n");
520 ctxt->wellFormed = 0;
521 ctxt->disableSAX = 1;
522 val = 0;
523 break;
524 }
525 ptr++;
526 cur = *ptr;
527 }
528 if (cur == ';')
529 ptr++;
530 } else if ((cur == '&') && (ptr[1] == '#')){
531 ptr += 2;
532 cur = *ptr;
533 while (cur != ';') { /* Non input consuming loops */
534 if ((cur >= '0') && (cur <= '9'))
535 val = val * 10 + (cur - '0');
536 else {
537 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
539 ctxt->sax->error(ctxt->userData,
540 "xmlParseStringCharRef: invalid decimal value\n");
541 ctxt->wellFormed = 0;
542 ctxt->disableSAX = 1;
543 val = 0;
544 break;
545 }
546 ptr++;
547 cur = *ptr;
548 }
549 if (cur == ';')
550 ptr++;
551 } else {
552 ctxt->errNo = XML_ERR_INVALID_CHARREF;
553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
554 ctxt->sax->error(ctxt->userData,
555 "xmlParseCharRef: invalid value\n");
556 ctxt->wellFormed = 0;
557 ctxt->disableSAX = 1;
558 return(0);
559 }
560 *str = ptr;
561
562 /*
563 * [ WFC: Legal Character ]
564 * Characters referred to using character references must match the
565 * production for Char.
566 */
567 if (IS_CHAR(val)) {
568 return(val);
569 } else {
570 ctxt->errNo = XML_ERR_INVALID_CHAR;
571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
572 ctxt->sax->error(ctxt->userData,
573 "CharRef: invalid xmlChar value %d\n", val);
574 ctxt->wellFormed = 0;
575 ctxt->disableSAX = 1;
576 }
577 return(0);
578}
579
580/**
581 * xmlParserHandlePEReference:
582 * @ctxt: the parser context
583 *
584 * [69] PEReference ::= '%' Name ';'
585 *
586 * [ WFC: No Recursion ]
587 * A parsed entity must not contain a recursive
588 * reference to itself, either directly or indirectly.
589 *
590 * [ WFC: Entity Declared ]
591 * In a document without any DTD, a document with only an internal DTD
592 * subset which contains no parameter entity references, or a document
593 * with "standalone='yes'", ... ... The declaration of a parameter
594 * entity must precede any reference to it...
595 *
596 * [ VC: Entity Declared ]
597 * In a document with an external subset or external parameter entities
598 * with "standalone='no'", ... ... The declaration of a parameter entity
599 * must precede any reference to it...
600 *
601 * [ WFC: In DTD ]
602 * Parameter-entity references may only appear in the DTD.
603 * NOTE: misleading but this is handled.
604 *
605 * A PEReference may have been detected in the current input stream
606 * the handling is done accordingly to
607 * http://www.w3.org/TR/REC-xml#entproc
608 * i.e.
609 * - Included in literal in entity values
610 * - Included as Paraemeter Entity reference within DTDs
611 */
612void
613xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
614 xmlChar *name;
615 xmlEntityPtr entity = NULL;
616 xmlParserInputPtr input;
617
618 if (ctxt->token != 0) {
619 return;
620 }
621 if (RAW != '%') return;
622 switch(ctxt->instate) {
623 case XML_PARSER_CDATA_SECTION:
624 return;
625 case XML_PARSER_COMMENT:
626 return;
627 case XML_PARSER_START_TAG:
628 return;
629 case XML_PARSER_END_TAG:
630 return;
631 case XML_PARSER_EOF:
632 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
634 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
635 ctxt->wellFormed = 0;
636 ctxt->disableSAX = 1;
637 return;
638 case XML_PARSER_PROLOG:
639 case XML_PARSER_START:
640 case XML_PARSER_MISC:
641 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
643 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
644 ctxt->wellFormed = 0;
645 ctxt->disableSAX = 1;
646 return;
647 case XML_PARSER_ENTITY_DECL:
648 case XML_PARSER_CONTENT:
649 case XML_PARSER_ATTRIBUTE_VALUE:
650 case XML_PARSER_PI:
651 case XML_PARSER_SYSTEM_LITERAL:
652 /* we just ignore it there */
653 return;
654 case XML_PARSER_EPILOG:
655 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
657 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
658 ctxt->wellFormed = 0;
659 ctxt->disableSAX = 1;
660 return;
661 case XML_PARSER_ENTITY_VALUE:
662 /*
663 * NOTE: in the case of entity values, we don't do the
664 * substitution here since we need the literal
665 * entity value to be able to save the internal
666 * subset of the document.
667 * This will be handled by xmlStringDecodeEntities
668 */
669 return;
670 case XML_PARSER_DTD:
671 /*
672 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
673 * In the internal DTD subset, parameter-entity references
674 * can occur only where markup declarations can occur, not
675 * within markup declarations.
676 * In that case this is handled in xmlParseMarkupDecl
677 */
678 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
679 return;
680 break;
681 case XML_PARSER_IGNORE:
682 return;
683 }
684
685 NEXT;
686 name = xmlParseName(ctxt);
687 if (xmlParserDebugEntities)
688 xmlGenericError(xmlGenericErrorContext,
689 "PE Reference: %s\n", name);
690 if (name == NULL) {
691 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
693 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
694 ctxt->wellFormed = 0;
695 ctxt->disableSAX = 1;
696 } else {
697 if (RAW == ';') {
698 NEXT;
699 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
700 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
701 if (entity == NULL) {
702
703 /*
704 * [ WFC: Entity Declared ]
705 * In a document without any DTD, a document with only an
706 * internal DTD subset which contains no parameter entity
707 * references, or a document with "standalone='yes'", ...
708 * ... The declaration of a parameter entity must precede
709 * any reference to it...
710 */
711 if ((ctxt->standalone == 1) ||
712 ((ctxt->hasExternalSubset == 0) &&
713 (ctxt->hasPErefs == 0))) {
714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
715 ctxt->sax->error(ctxt->userData,
716 "PEReference: %%%s; not found\n", name);
717 ctxt->wellFormed = 0;
718 ctxt->disableSAX = 1;
719 } else {
720 /*
721 * [ VC: Entity Declared ]
722 * In a document with an external subset or external
723 * parameter entities with "standalone='no'", ...
724 * ... The declaration of a parameter entity must precede
725 * any reference to it...
726 */
727 if ((!ctxt->disableSAX) &&
728 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
729 ctxt->vctxt.error(ctxt->vctxt.userData,
730 "PEReference: %%%s; not found\n", name);
731 } else if ((!ctxt->disableSAX) &&
732 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
733 ctxt->sax->warning(ctxt->userData,
734 "PEReference: %%%s; not found\n", name);
735 ctxt->valid = 0;
736 }
737 } else {
738 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
739 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
740 /*
741 * handle the extra spaces added before and after
742 * c.f. http://www.w3.org/TR/REC-xml#as-PE
743 * this is done independantly.
744 */
745 input = xmlNewEntityInputStream(ctxt, entity);
746 xmlPushInput(ctxt, input);
747 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
748 (RAW == '<') && (NXT(1) == '?') &&
749 (NXT(2) == 'x') && (NXT(3) == 'm') &&
750 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
751 xmlParseTextDecl(ctxt);
752 }
753 if (ctxt->token == 0)
754 ctxt->token = ' ';
755 } else {
756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
757 ctxt->sax->error(ctxt->userData,
758 "xmlHandlePEReference: %s is not a parameter entity\n",
759 name);
760 ctxt->wellFormed = 0;
761 ctxt->disableSAX = 1;
762 }
763 }
764 } else {
765 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
767 ctxt->sax->error(ctxt->userData,
768 "xmlHandlePEReference: expecting ';'\n");
769 ctxt->wellFormed = 0;
770 ctxt->disableSAX = 1;
771 }
772 xmlFree(name);
773 }
774}
775
776/*
777 * Macro used to grow the current buffer.
778 */
779#define growBuffer(buffer) { \
780 buffer##_size *= 2; \
781 buffer = (xmlChar *) \
782 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
783 if (buffer == NULL) { \
784 perror("realloc failed"); \
785 return(NULL); \
786 } \
787}
788
789/**
790 * xmlStringDecodeEntities:
791 * @ctxt: the parser context
792 * @str: the input string
793 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
794 * @end: an end marker xmlChar, 0 if none
795 * @end2: an end marker xmlChar, 0 if none
796 * @end3: an end marker xmlChar, 0 if none
797 *
798 * Takes a entity string content and process to do the adequate subtitutions.
799 *
800 * [67] Reference ::= EntityRef | CharRef
801 *
802 * [69] PEReference ::= '%' Name ';'
803 *
804 * Returns A newly allocated string with the substitution done. The caller
805 * must deallocate it !
806 */
807xmlChar *
808xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
809 xmlChar end, xmlChar end2, xmlChar end3) {
810 xmlChar *buffer = NULL;
811 int buffer_size = 0;
812
813 xmlChar *current = NULL;
814 xmlEntityPtr ent;
815 int c,l;
816 int nbchars = 0;
817
818 if (str == NULL)
819 return(NULL);
820
821 if (ctxt->depth > 40) {
822 ctxt->errNo = XML_ERR_ENTITY_LOOP;
823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
824 ctxt->sax->error(ctxt->userData,
825 "Detected entity reference loop\n");
826 ctxt->wellFormed = 0;
827 ctxt->disableSAX = 1;
828 return(NULL);
829 }
830
831 /*
832 * allocate a translation buffer.
833 */
834 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
835 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
836 if (buffer == NULL) {
837 perror("xmlDecodeEntities: malloc failed");
838 return(NULL);
839 }
840
841 /*
842 * Ok loop until we reach one of the ending char or a size limit.
843 * we are operating on already parsed values.
844 */
845 c = CUR_SCHAR(str, l);
846 while ((c != 0) && (c != end) && /* non input consuming loop */
847 (c != end2) && (c != end3)) {
848
849 if (c == 0) break;
850 if ((c == '&') && (str[1] == '#')) {
851 int val = xmlParseStringCharRef(ctxt, &str);
852 if (val != 0) {
853 COPY_BUF(0,buffer,nbchars,val);
854 }
855 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
856 if (xmlParserDebugEntities)
857 xmlGenericError(xmlGenericErrorContext,
858 "String decoding Entity Reference: %.30s\n",
859 str);
860 ent = xmlParseStringEntityRef(ctxt, &str);
861 if ((ent != NULL) &&
862 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
863 if (ent->content != NULL) {
864 COPY_BUF(0,buffer,nbchars,ent->content[0]);
865 } else {
866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
867 ctxt->sax->error(ctxt->userData,
868 "internal error entity has no content\n");
869 }
870 } else if ((ent != NULL) && (ent->content != NULL)) {
871 xmlChar *rep;
872
873 ctxt->depth++;
874 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
875 0, 0, 0);
876 ctxt->depth--;
877 if (rep != NULL) {
878 current = rep;
879 while (*current != 0) { /* non input consuming loop */
880 buffer[nbchars++] = *current++;
881 if (nbchars >
882 buffer_size - XML_PARSER_BUFFER_SIZE) {
883 growBuffer(buffer);
884 }
885 }
886 xmlFree(rep);
887 }
888 } else if (ent != NULL) {
889 int i = xmlStrlen(ent->name);
890 const xmlChar *cur = ent->name;
891
892 buffer[nbchars++] = '&';
893 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
894 growBuffer(buffer);
895 }
896 for (;i > 0;i--)
897 buffer[nbchars++] = *cur++;
898 buffer[nbchars++] = ';';
899 }
900 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
901 if (xmlParserDebugEntities)
902 xmlGenericError(xmlGenericErrorContext,
903 "String decoding PE Reference: %.30s\n", str);
904 ent = xmlParseStringPEReference(ctxt, &str);
905 if (ent != NULL) {
906 xmlChar *rep;
907
908 ctxt->depth++;
909 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
910 0, 0, 0);
911 ctxt->depth--;
912 if (rep != NULL) {
913 current = rep;
914 while (*current != 0) { /* non input consuming loop */
915 buffer[nbchars++] = *current++;
916 if (nbchars >
917 buffer_size - XML_PARSER_BUFFER_SIZE) {
918 growBuffer(buffer);
919 }
920 }
921 xmlFree(rep);
922 }
923 }
924 } else {
925 COPY_BUF(l,buffer,nbchars,c);
926 str += l;
927 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
928 growBuffer(buffer);
929 }
930 }
931 c = CUR_SCHAR(str, l);
932 }
933 buffer[nbchars++] = 0;
934 return(buffer);
935}
936
937
938/************************************************************************
939 * *
940 * Commodity functions to handle xmlChars *
941 * *
942 ************************************************************************/
943
944/**
945 * xmlStrndup:
946 * @cur: the input xmlChar *
947 * @len: the len of @cur
948 *
949 * a strndup for array of xmlChar's
950 *
951 * Returns a new xmlChar * or NULL
952 */
953xmlChar *
954xmlStrndup(const xmlChar *cur, int len) {
955 xmlChar *ret;
956
957 if ((cur == NULL) || (len < 0)) return(NULL);
958 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
959 if (ret == NULL) {
960 xmlGenericError(xmlGenericErrorContext,
961 "malloc of %ld byte failed\n",
962 (len + 1) * (long)sizeof(xmlChar));
963 return(NULL);
964 }
965 memcpy(ret, cur, len * sizeof(xmlChar));
966 ret[len] = 0;
967 return(ret);
968}
969
970/**
971 * xmlStrdup:
972 * @cur: the input xmlChar *
973 *
974 * a strdup for array of xmlChar's. Since they are supposed to be
975 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
976 * a termination mark of '0'.
977 *
978 * Returns a new xmlChar * or NULL
979 */
980xmlChar *
981xmlStrdup(const xmlChar *cur) {
982 const xmlChar *p = cur;
983
984 if (cur == NULL) return(NULL);
985 while (*p != 0) p++; /* non input consuming */
986 return(xmlStrndup(cur, p - cur));
987}
988
989/**
990 * xmlCharStrndup:
991 * @cur: the input char *
992 * @len: the len of @cur
993 *
994 * a strndup for char's to xmlChar's
995 *
996 * Returns a new xmlChar * or NULL
997 */
998
999xmlChar *
1000xmlCharStrndup(const char *cur, int len) {
1001 int i;
1002 xmlChar *ret;
1003
1004 if ((cur == NULL) || (len < 0)) return(NULL);
1005 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1006 if (ret == NULL) {
1007 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1008 (len + 1) * (long)sizeof(xmlChar));
1009 return(NULL);
1010 }
1011 for (i = 0;i < len;i++)
1012 ret[i] = (xmlChar) cur[i];
1013 ret[len] = 0;
1014 return(ret);
1015}
1016
1017/**
1018 * xmlCharStrdup:
1019 * @cur: the input char *
1020 * @len: the len of @cur
1021 *
1022 * a strdup for char's to xmlChar's
1023 *
1024 * Returns a new xmlChar * or NULL
1025 */
1026
1027xmlChar *
1028xmlCharStrdup(const char *cur) {
1029 const char *p = cur;
1030
1031 if (cur == NULL) return(NULL);
1032 while (*p != '\0') p++; /* non input consuming */
1033 return(xmlCharStrndup(cur, p - cur));
1034}
1035
1036/**
1037 * xmlStrcmp:
1038 * @str1: the first xmlChar *
1039 * @str2: the second xmlChar *
1040 *
1041 * a strcmp for xmlChar's
1042 *
1043 * Returns the integer result of the comparison
1044 */
1045
1046int
1047xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1048 register int tmp;
1049
1050 if (str1 == str2) return(0);
1051 if (str1 == NULL) return(-1);
1052 if (str2 == NULL) return(1);
1053 do {
1054 tmp = *str1++ - *str2;
1055 if (tmp != 0) return(tmp);
1056 } while (*str2++ != 0);
1057 return 0;
1058}
1059
1060/**
1061 * xmlStrEqual:
1062 * @str1: the first xmlChar *
1063 * @str2: the second xmlChar *
1064 *
1065 * Check if both string are equal of have same content
1066 * Should be a bit more readable and faster than xmlStrEqual()
1067 *
1068 * Returns 1 if they are equal, 0 if they are different
1069 */
1070
1071int
1072xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1073 if (str1 == str2) return(1);
1074 if (str1 == NULL) return(0);
1075 if (str2 == NULL) return(0);
1076 do {
1077 if (*str1++ != *str2) return(0);
1078 } while (*str2++);
1079 return(1);
1080}
1081
1082/**
1083 * xmlStrncmp:
1084 * @str1: the first xmlChar *
1085 * @str2: the second xmlChar *
1086 * @len: the max comparison length
1087 *
1088 * a strncmp for xmlChar's
1089 *
1090 * Returns the integer result of the comparison
1091 */
1092
1093int
1094xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1095 register int tmp;
1096
1097 if (len <= 0) return(0);
1098 if (str1 == str2) return(0);
1099 if (str1 == NULL) return(-1);
1100 if (str2 == NULL) return(1);
1101 do {
1102 tmp = *str1++ - *str2;
1103 if (tmp != 0 || --len == 0) return(tmp);
1104 } while (*str2++ != 0);
1105 return 0;
1106}
1107
1108static xmlChar casemap[256] = {
1109 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1110 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1111 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1112 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1113 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1114 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1115 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1116 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1117 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1118 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1119 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1120 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1121 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1122 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1123 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1124 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1125 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1126 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1127 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1128 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1129 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1130 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1131 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1132 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1133 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1134 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1135 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1136 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1137 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1138 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1139 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1140 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1141};
1142
1143/**
1144 * xmlStrcasecmp:
1145 * @str1: the first xmlChar *
1146 * @str2: the second xmlChar *
1147 *
1148 * a strcasecmp for xmlChar's
1149 *
1150 * Returns the integer result of the comparison
1151 */
1152
1153int
1154xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1155 register int tmp;
1156
1157 if (str1 == str2) return(0);
1158 if (str1 == NULL) return(-1);
1159 if (str2 == NULL) return(1);
1160 do {
1161 tmp = casemap[*str1++] - casemap[*str2];
1162 if (tmp != 0) return(tmp);
1163 } while (*str2++ != 0);
1164 return 0;
1165}
1166
1167/**
1168 * xmlStrncasecmp:
1169 * @str1: the first xmlChar *
1170 * @str2: the second xmlChar *
1171 * @len: the max comparison length
1172 *
1173 * a strncasecmp for xmlChar's
1174 *
1175 * Returns the integer result of the comparison
1176 */
1177
1178int
1179xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1180 register int tmp;
1181
1182 if (len <= 0) return(0);
1183 if (str1 == str2) return(0);
1184 if (str1 == NULL) return(-1);
1185 if (str2 == NULL) return(1);
1186 do {
1187 tmp = casemap[*str1++] - casemap[*str2];
1188 if (tmp != 0 || --len == 0) return(tmp);
1189 } while (*str2++ != 0);
1190 return 0;
1191}
1192
1193/**
1194 * xmlStrchr:
1195 * @str: the xmlChar * array
1196 * @val: the xmlChar to search
1197 *
1198 * a strchr for xmlChar's
1199 *
1200 * Returns the xmlChar * for the first occurence or NULL.
1201 */
1202
1203const xmlChar *
1204xmlStrchr(const xmlChar *str, xmlChar val) {
1205 if (str == NULL) return(NULL);
1206 while (*str != 0) { /* non input consuming */
1207 if (*str == val) return((xmlChar *) str);
1208 str++;
1209 }
1210 return(NULL);
1211}
1212
1213/**
1214 * xmlStrstr:
1215 * @str: the xmlChar * array (haystack)
1216 * @val: the xmlChar to search (needle)
1217 *
1218 * a strstr for xmlChar's
1219 *
1220 * Returns the xmlChar * for the first occurence or NULL.
1221 */
1222
1223const xmlChar *
1224xmlStrstr(const xmlChar *str, xmlChar *val) {
1225 int n;
1226
1227 if (str == NULL) return(NULL);
1228 if (val == NULL) return(NULL);
1229 n = xmlStrlen(val);
1230
1231 if (n == 0) return(str);
1232 while (*str != 0) { /* non input consuming */
1233 if (*str == *val) {
1234 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1235 }
1236 str++;
1237 }
1238 return(NULL);
1239}
1240
1241/**
1242 * xmlStrcasestr:
1243 * @str: the xmlChar * array (haystack)
1244 * @val: the xmlChar to search (needle)
1245 *
1246 * a case-ignoring strstr for xmlChar's
1247 *
1248 * Returns the xmlChar * for the first occurence or NULL.
1249 */
1250
1251const xmlChar *
1252xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1253 int n;
1254
1255 if (str == NULL) return(NULL);
1256 if (val == NULL) return(NULL);
1257 n = xmlStrlen(val);
1258
1259 if (n == 0) return(str);
1260 while (*str != 0) { /* non input consuming */
1261 if (casemap[*str] == casemap[*val])
1262 if (!xmlStrncasecmp(str, val, n)) return(str);
1263 str++;
1264 }
1265 return(NULL);
1266}
1267
1268/**
1269 * xmlStrsub:
1270 * @str: the xmlChar * array (haystack)
1271 * @start: the index of the first char (zero based)
1272 * @len: the length of the substring
1273 *
1274 * Extract a substring of a given string
1275 *
1276 * Returns the xmlChar * for the first occurence or NULL.
1277 */
1278
1279xmlChar *
1280xmlStrsub(const xmlChar *str, int start, int len) {
1281 int i;
1282
1283 if (str == NULL) return(NULL);
1284 if (start < 0) return(NULL);
1285 if (len < 0) return(NULL);
1286
1287 for (i = 0;i < start;i++) {
1288 if (*str == 0) return(NULL);
1289 str++;
1290 }
1291 if (*str == 0) return(NULL);
1292 return(xmlStrndup(str, len));
1293}
1294
1295/**
1296 * xmlStrlen:
1297 * @str: the xmlChar * array
1298 *
1299 * length of a xmlChar's string
1300 *
1301 * Returns the number of xmlChar contained in the ARRAY.
1302 */
1303
1304int
1305xmlStrlen(const xmlChar *str) {
1306 int len = 0;
1307
1308 if (str == NULL) return(0);
1309 while (*str != 0) { /* non input consuming */
1310 str++;
1311 len++;
1312 }
1313 return(len);
1314}
1315
1316/**
1317 * xmlStrncat:
1318 * @cur: the original xmlChar * array
1319 * @add: the xmlChar * array added
1320 * @len: the length of @add
1321 *
1322 * a strncat for array of xmlChar's, it will extend cur with the len
1323 * first bytes of @add.
1324 *
1325 * Returns a new xmlChar *, the original @cur is reallocated if needed
1326 * and should not be freed
1327 */
1328
1329xmlChar *
1330xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1331 int size;
1332 xmlChar *ret;
1333
1334 if ((add == NULL) || (len == 0))
1335 return(cur);
1336 if (cur == NULL)
1337 return(xmlStrndup(add, len));
1338
1339 size = xmlStrlen(cur);
1340 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1341 if (ret == NULL) {
1342 xmlGenericError(xmlGenericErrorContext,
1343 "xmlStrncat: realloc of %ld byte failed\n",
1344 (size + len + 1) * (long)sizeof(xmlChar));
1345 return(cur);
1346 }
1347 memcpy(&ret[size], add, len * sizeof(xmlChar));
1348 ret[size + len] = 0;
1349 return(ret);
1350}
1351
1352/**
1353 * xmlStrcat:
1354 * @cur: the original xmlChar * array
1355 * @add: the xmlChar * array added
1356 *
1357 * a strcat for array of xmlChar's. Since they are supposed to be
1358 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1359 * a termination mark of '0'.
1360 *
1361 * Returns a new xmlChar * containing the concatenated string.
1362 */
1363xmlChar *
1364xmlStrcat(xmlChar *cur, const xmlChar *add) {
1365 const xmlChar *p = add;
1366
1367 if (add == NULL) return(cur);
1368 if (cur == NULL)
1369 return(xmlStrdup(add));
1370
1371 while (*p != 0) p++; /* non input consuming */
1372 return(xmlStrncat(cur, add, p - add));
1373}
1374
1375/************************************************************************
1376 * *
1377 * Commodity functions, cleanup needed ? *
1378 * *
1379 ************************************************************************/
1380
1381/**
1382 * areBlanks:
1383 * @ctxt: an XML parser context
1384 * @str: a xmlChar *
1385 * @len: the size of @str
1386 *
1387 * Is this a sequence of blank chars that one can ignore ?
1388 *
1389 * Returns 1 if ignorable 0 otherwise.
1390 */
1391
1392static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1393 int i, ret;
1394 xmlNodePtr lastChild;
1395
Daniel Veillard2f362242001-03-02 17:36:21 +00001396 if (ctxt->keepBlanks)
1397 return(0);
1398
Owen Taylor3473f882001-02-23 17:55:21 +00001399 /*
1400 * Check for xml:space value.
1401 */
1402 if (*(ctxt->space) == 1)
1403 return(0);
1404
1405 /*
1406 * Check that the string is made of blanks
1407 */
1408 for (i = 0;i < len;i++)
1409 if (!(IS_BLANK(str[i]))) return(0);
1410
1411 /*
1412 * Look if the element is mixed content in the Dtd if available
1413 */
1414 if (ctxt->myDoc != NULL) {
1415 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1416 if (ret == 0) return(1);
1417 if (ret == 1) return(0);
1418 }
1419
1420 /*
1421 * Otherwise, heuristic :-\
1422 */
Owen Taylor3473f882001-02-23 17:55:21 +00001423 if (RAW != '<') return(0);
1424 if (ctxt->node == NULL) return(0);
1425 if ((ctxt->node->children == NULL) &&
1426 (RAW == '<') && (NXT(1) == '/')) return(0);
1427
1428 lastChild = xmlGetLastChild(ctxt->node);
1429 if (lastChild == NULL) {
1430 if (ctxt->node->content != NULL) return(0);
1431 } else if (xmlNodeIsText(lastChild))
1432 return(0);
1433 else if ((ctxt->node->children != NULL) &&
1434 (xmlNodeIsText(ctxt->node->children)))
1435 return(0);
1436 return(1);
1437}
1438
1439/*
1440 * Forward definition for recusive behaviour.
1441 */
1442void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1443void xmlParseReference(xmlParserCtxtPtr ctxt);
1444
1445/************************************************************************
1446 * *
1447 * Extra stuff for namespace support *
1448 * Relates to http://www.w3.org/TR/WD-xml-names *
1449 * *
1450 ************************************************************************/
1451
1452/**
1453 * xmlSplitQName:
1454 * @ctxt: an XML parser context
1455 * @name: an XML parser context
1456 * @prefix: a xmlChar **
1457 *
1458 * parse an UTF8 encoded XML qualified name string
1459 *
1460 * [NS 5] QName ::= (Prefix ':')? LocalPart
1461 *
1462 * [NS 6] Prefix ::= NCName
1463 *
1464 * [NS 7] LocalPart ::= NCName
1465 *
1466 * Returns the local part, and prefix is updated
1467 * to get the Prefix if any.
1468 */
1469
1470xmlChar *
1471xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1472 xmlChar buf[XML_MAX_NAMELEN + 5];
1473 xmlChar *buffer = NULL;
1474 int len = 0;
1475 int max = XML_MAX_NAMELEN;
1476 xmlChar *ret = NULL;
1477 const xmlChar *cur = name;
1478 int c;
1479
1480 *prefix = NULL;
1481
1482#ifndef XML_XML_NAMESPACE
1483 /* xml: prefix is not really a namespace */
1484 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1485 (cur[2] == 'l') && (cur[3] == ':'))
1486 return(xmlStrdup(name));
1487#endif
1488
1489 /* nasty but valid */
1490 if (cur[0] == ':')
1491 return(xmlStrdup(name));
1492
1493 c = *cur++;
1494 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1495 buf[len++] = c;
1496 c = *cur++;
1497 }
1498 if (len >= max) {
1499 /*
1500 * Okay someone managed to make a huge name, so he's ready to pay
1501 * for the processing speed.
1502 */
1503 max = len * 2;
1504
1505 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1506 if (buffer == NULL) {
1507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1508 ctxt->sax->error(ctxt->userData,
1509 "xmlSplitQName: out of memory\n");
1510 return(NULL);
1511 }
1512 memcpy(buffer, buf, len);
1513 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1514 if (len + 10 > max) {
1515 max *= 2;
1516 buffer = (xmlChar *) xmlRealloc(buffer,
1517 max * sizeof(xmlChar));
1518 if (buffer == NULL) {
1519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1520 ctxt->sax->error(ctxt->userData,
1521 "xmlSplitQName: out of memory\n");
1522 return(NULL);
1523 }
1524 }
1525 buffer[len++] = c;
1526 c = *cur++;
1527 }
1528 buffer[len] = 0;
1529 }
1530
1531 if (buffer == NULL)
1532 ret = xmlStrndup(buf, len);
1533 else {
1534 ret = buffer;
1535 buffer = NULL;
1536 max = XML_MAX_NAMELEN;
1537 }
1538
1539
1540 if (c == ':') {
1541 c = *cur++;
1542 if (c == 0) return(ret);
1543 *prefix = ret;
1544 len = 0;
1545
1546 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1547 buf[len++] = c;
1548 c = *cur++;
1549 }
1550 if (len >= max) {
1551 /*
1552 * Okay someone managed to make a huge name, so he's ready to pay
1553 * for the processing speed.
1554 */
1555 max = len * 2;
1556
1557 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1558 if (buffer == NULL) {
1559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1560 ctxt->sax->error(ctxt->userData,
1561 "xmlSplitQName: out of memory\n");
1562 return(NULL);
1563 }
1564 memcpy(buffer, buf, len);
1565 while (c != 0) { /* tested bigname2.xml */
1566 if (len + 10 > max) {
1567 max *= 2;
1568 buffer = (xmlChar *) xmlRealloc(buffer,
1569 max * sizeof(xmlChar));
1570 if (buffer == NULL) {
1571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1572 ctxt->sax->error(ctxt->userData,
1573 "xmlSplitQName: out of memory\n");
1574 return(NULL);
1575 }
1576 }
1577 buffer[len++] = c;
1578 c = *cur++;
1579 }
1580 buffer[len] = 0;
1581 }
1582
1583 if (buffer == NULL)
1584 ret = xmlStrndup(buf, len);
1585 else {
1586 ret = buffer;
1587 }
1588 }
1589
1590 return(ret);
1591}
1592
1593/************************************************************************
1594 * *
1595 * The parser itself *
1596 * Relates to http://www.w3.org/TR/REC-xml *
1597 * *
1598 ************************************************************************/
1599
Daniel Veillard21a0f912001-02-25 19:54:14 +00001600xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001601/**
1602 * xmlParseName:
1603 * @ctxt: an XML parser context
1604 *
1605 * parse an XML name.
1606 *
1607 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1608 * CombiningChar | Extender
1609 *
1610 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1611 *
1612 * [6] Names ::= Name (S Name)*
1613 *
1614 * Returns the Name parsed or NULL
1615 */
1616
1617xmlChar *
1618xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001619 const xmlChar *in;
1620 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001621 int count = 0;
1622
1623 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001624
1625 /*
1626 * Accelerator for simple ASCII names
1627 */
1628 in = ctxt->input->cur;
1629 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1630 ((*in >= 0x41) && (*in <= 0x5A)) ||
1631 (*in == '_') || (*in == ':')) {
1632 in++;
1633 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1634 ((*in >= 0x41) && (*in <= 0x5A)) ||
1635 ((*in >= 0x30) && (*in <= 0x39)) ||
1636 (*in == '_') || (*in == ':'))
1637 in++;
1638 if ((*in == ' ') || (*in == '>') || (*in == '/')) {
1639 count = in - ctxt->input->cur;
1640 ret = xmlStrndup(ctxt->input->cur, count);
1641 ctxt->input->cur = in;
1642 return(ret);
1643 }
1644 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001645 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001646}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001647
Daniel Veillard21a0f912001-02-25 19:54:14 +00001648xmlChar *
1649xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1650 xmlChar buf[XML_MAX_NAMELEN + 5];
1651 int len = 0, l;
1652 int c;
1653 int count = 0;
1654
1655 /*
1656 * Handler for more complex cases
1657 */
1658 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001659 c = CUR_CHAR(l);
1660 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1661 (!IS_LETTER(c) && (c != '_') &&
1662 (c != ':'))) {
1663 return(NULL);
1664 }
1665
1666 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1667 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1668 (c == '.') || (c == '-') ||
1669 (c == '_') || (c == ':') ||
1670 (IS_COMBINING(c)) ||
1671 (IS_EXTENDER(c)))) {
1672 if (count++ > 100) {
1673 count = 0;
1674 GROW;
1675 }
1676 COPY_BUF(l,buf,len,c);
1677 NEXTL(l);
1678 c = CUR_CHAR(l);
1679 if (len >= XML_MAX_NAMELEN) {
1680 /*
1681 * Okay someone managed to make a huge name, so he's ready to pay
1682 * for the processing speed.
1683 */
1684 xmlChar *buffer;
1685 int max = len * 2;
1686
1687 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1688 if (buffer == NULL) {
1689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1690 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001691 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001692 return(NULL);
1693 }
1694 memcpy(buffer, buf, len);
1695 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1696 (c == '.') || (c == '-') ||
1697 (c == '_') || (c == ':') ||
1698 (IS_COMBINING(c)) ||
1699 (IS_EXTENDER(c))) {
1700 if (count++ > 100) {
1701 count = 0;
1702 GROW;
1703 }
1704 if (len + 10 > max) {
1705 max *= 2;
1706 buffer = (xmlChar *) xmlRealloc(buffer,
1707 max * sizeof(xmlChar));
1708 if (buffer == NULL) {
1709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1710 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001711 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001712 return(NULL);
1713 }
1714 }
1715 COPY_BUF(l,buffer,len,c);
1716 NEXTL(l);
1717 c = CUR_CHAR(l);
1718 }
1719 buffer[len] = 0;
1720 return(buffer);
1721 }
1722 }
1723 return(xmlStrndup(buf, len));
1724}
1725
1726/**
1727 * xmlParseStringName:
1728 * @ctxt: an XML parser context
1729 * @str: a pointer to the string pointer (IN/OUT)
1730 *
1731 * parse an XML name.
1732 *
1733 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1734 * CombiningChar | Extender
1735 *
1736 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1737 *
1738 * [6] Names ::= Name (S Name)*
1739 *
1740 * Returns the Name parsed or NULL. The str pointer
1741 * is updated to the current location in the string.
1742 */
1743
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001744static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001745xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1746 xmlChar buf[XML_MAX_NAMELEN + 5];
1747 const xmlChar *cur = *str;
1748 int len = 0, l;
1749 int c;
1750
1751 c = CUR_SCHAR(cur, l);
1752 if (!IS_LETTER(c) && (c != '_') &&
1753 (c != ':')) {
1754 return(NULL);
1755 }
1756
1757 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1758 (c == '.') || (c == '-') ||
1759 (c == '_') || (c == ':') ||
1760 (IS_COMBINING(c)) ||
1761 (IS_EXTENDER(c))) {
1762 COPY_BUF(l,buf,len,c);
1763 cur += l;
1764 c = CUR_SCHAR(cur, l);
1765 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1766 /*
1767 * Okay someone managed to make a huge name, so he's ready to pay
1768 * for the processing speed.
1769 */
1770 xmlChar *buffer;
1771 int max = len * 2;
1772
1773 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1774 if (buffer == NULL) {
1775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1776 ctxt->sax->error(ctxt->userData,
1777 "xmlParseStringName: out of memory\n");
1778 return(NULL);
1779 }
1780 memcpy(buffer, buf, len);
1781 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1782 (c == '.') || (c == '-') ||
1783 (c == '_') || (c == ':') ||
1784 (IS_COMBINING(c)) ||
1785 (IS_EXTENDER(c))) {
1786 if (len + 10 > max) {
1787 max *= 2;
1788 buffer = (xmlChar *) xmlRealloc(buffer,
1789 max * sizeof(xmlChar));
1790 if (buffer == NULL) {
1791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1792 ctxt->sax->error(ctxt->userData,
1793 "xmlParseStringName: out of memory\n");
1794 return(NULL);
1795 }
1796 }
1797 COPY_BUF(l,buffer,len,c);
1798 cur += l;
1799 c = CUR_SCHAR(cur, l);
1800 }
1801 buffer[len] = 0;
1802 *str = cur;
1803 return(buffer);
1804 }
1805 }
1806 *str = cur;
1807 return(xmlStrndup(buf, len));
1808}
1809
1810/**
1811 * xmlParseNmtoken:
1812 * @ctxt: an XML parser context
1813 *
1814 * parse an XML Nmtoken.
1815 *
1816 * [7] Nmtoken ::= (NameChar)+
1817 *
1818 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1819 *
1820 * Returns the Nmtoken parsed or NULL
1821 */
1822
1823xmlChar *
1824xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1825 xmlChar buf[XML_MAX_NAMELEN + 5];
1826 int len = 0, l;
1827 int c;
1828 int count = 0;
1829
1830 GROW;
1831 c = CUR_CHAR(l);
1832
1833 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1834 (c == '.') || (c == '-') ||
1835 (c == '_') || (c == ':') ||
1836 (IS_COMBINING(c)) ||
1837 (IS_EXTENDER(c))) {
1838 if (count++ > 100) {
1839 count = 0;
1840 GROW;
1841 }
1842 COPY_BUF(l,buf,len,c);
1843 NEXTL(l);
1844 c = CUR_CHAR(l);
1845 if (len >= XML_MAX_NAMELEN) {
1846 /*
1847 * Okay someone managed to make a huge token, so he's ready to pay
1848 * for the processing speed.
1849 */
1850 xmlChar *buffer;
1851 int max = len * 2;
1852
1853 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1854 if (buffer == NULL) {
1855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1856 ctxt->sax->error(ctxt->userData,
1857 "xmlParseNmtoken: out of memory\n");
1858 return(NULL);
1859 }
1860 memcpy(buffer, buf, len);
1861 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1862 (c == '.') || (c == '-') ||
1863 (c == '_') || (c == ':') ||
1864 (IS_COMBINING(c)) ||
1865 (IS_EXTENDER(c))) {
1866 if (count++ > 100) {
1867 count = 0;
1868 GROW;
1869 }
1870 if (len + 10 > max) {
1871 max *= 2;
1872 buffer = (xmlChar *) xmlRealloc(buffer,
1873 max * sizeof(xmlChar));
1874 if (buffer == NULL) {
1875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1876 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001877 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001878 return(NULL);
1879 }
1880 }
1881 COPY_BUF(l,buffer,len,c);
1882 NEXTL(l);
1883 c = CUR_CHAR(l);
1884 }
1885 buffer[len] = 0;
1886 return(buffer);
1887 }
1888 }
1889 if (len == 0)
1890 return(NULL);
1891 return(xmlStrndup(buf, len));
1892}
1893
1894/**
1895 * xmlParseEntityValue:
1896 * @ctxt: an XML parser context
1897 * @orig: if non-NULL store a copy of the original entity value
1898 *
1899 * parse a value for ENTITY declarations
1900 *
1901 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1902 * "'" ([^%&'] | PEReference | Reference)* "'"
1903 *
1904 * Returns the EntityValue parsed with reference substitued or NULL
1905 */
1906
1907xmlChar *
1908xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1909 xmlChar *buf = NULL;
1910 int len = 0;
1911 int size = XML_PARSER_BUFFER_SIZE;
1912 int c, l;
1913 xmlChar stop;
1914 xmlChar *ret = NULL;
1915 const xmlChar *cur = NULL;
1916 xmlParserInputPtr input;
1917
1918 if (RAW == '"') stop = '"';
1919 else if (RAW == '\'') stop = '\'';
1920 else {
1921 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1923 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1924 ctxt->wellFormed = 0;
1925 ctxt->disableSAX = 1;
1926 return(NULL);
1927 }
1928 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1929 if (buf == NULL) {
1930 xmlGenericError(xmlGenericErrorContext,
1931 "malloc of %d byte failed\n", size);
1932 return(NULL);
1933 }
1934
1935 /*
1936 * The content of the entity definition is copied in a buffer.
1937 */
1938
1939 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1940 input = ctxt->input;
1941 GROW;
1942 NEXT;
1943 c = CUR_CHAR(l);
1944 /*
1945 * NOTE: 4.4.5 Included in Literal
1946 * When a parameter entity reference appears in a literal entity
1947 * value, ... a single or double quote character in the replacement
1948 * text is always treated as a normal data character and will not
1949 * terminate the literal.
1950 * In practice it means we stop the loop only when back at parsing
1951 * the initial entity and the quote is found
1952 */
1953 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1954 (ctxt->input != input))) {
1955 if (len + 5 >= size) {
1956 size *= 2;
1957 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1958 if (buf == NULL) {
1959 xmlGenericError(xmlGenericErrorContext,
1960 "realloc of %d byte failed\n", size);
1961 return(NULL);
1962 }
1963 }
1964 COPY_BUF(l,buf,len,c);
1965 NEXTL(l);
1966 /*
1967 * Pop-up of finished entities.
1968 */
1969 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1970 xmlPopInput(ctxt);
1971
1972 GROW;
1973 c = CUR_CHAR(l);
1974 if (c == 0) {
1975 GROW;
1976 c = CUR_CHAR(l);
1977 }
1978 }
1979 buf[len] = 0;
1980
1981 /*
1982 * Raise problem w.r.t. '&' and '%' being used in non-entities
1983 * reference constructs. Note Charref will be handled in
1984 * xmlStringDecodeEntities()
1985 */
1986 cur = buf;
1987 while (*cur != 0) { /* non input consuming */
1988 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1989 xmlChar *name;
1990 xmlChar tmp = *cur;
1991
1992 cur++;
1993 name = xmlParseStringName(ctxt, &cur);
1994 if ((name == NULL) || (*cur != ';')) {
1995 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1997 ctxt->sax->error(ctxt->userData,
1998 "EntityValue: '%c' forbidden except for entities references\n",
1999 tmp);
2000 ctxt->wellFormed = 0;
2001 ctxt->disableSAX = 1;
2002 }
2003 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2004 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2006 ctxt->sax->error(ctxt->userData,
2007 "EntityValue: PEReferences forbidden in internal subset\n",
2008 tmp);
2009 ctxt->wellFormed = 0;
2010 ctxt->disableSAX = 1;
2011 }
2012 if (name != NULL)
2013 xmlFree(name);
2014 }
2015 cur++;
2016 }
2017
2018 /*
2019 * Then PEReference entities are substituted.
2020 */
2021 if (c != stop) {
2022 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2024 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2025 ctxt->wellFormed = 0;
2026 ctxt->disableSAX = 1;
2027 xmlFree(buf);
2028 } else {
2029 NEXT;
2030 /*
2031 * NOTE: 4.4.7 Bypassed
2032 * When a general entity reference appears in the EntityValue in
2033 * an entity declaration, it is bypassed and left as is.
2034 * so XML_SUBSTITUTE_REF is not set here.
2035 */
2036 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2037 0, 0, 0);
2038 if (orig != NULL)
2039 *orig = buf;
2040 else
2041 xmlFree(buf);
2042 }
2043
2044 return(ret);
2045}
2046
2047/**
2048 * xmlParseAttValue:
2049 * @ctxt: an XML parser context
2050 *
2051 * parse a value for an attribute
2052 * Note: the parser won't do substitution of entities here, this
2053 * will be handled later in xmlStringGetNodeList
2054 *
2055 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2056 * "'" ([^<&'] | Reference)* "'"
2057 *
2058 * 3.3.3 Attribute-Value Normalization:
2059 * Before the value of an attribute is passed to the application or
2060 * checked for validity, the XML processor must normalize it as follows:
2061 * - a character reference is processed by appending the referenced
2062 * character to the attribute value
2063 * - an entity reference is processed by recursively processing the
2064 * replacement text of the entity
2065 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2066 * appending #x20 to the normalized value, except that only a single
2067 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2068 * parsed entity or the literal entity value of an internal parsed entity
2069 * - other characters are processed by appending them to the normalized value
2070 * If the declared value is not CDATA, then the XML processor must further
2071 * process the normalized attribute value by discarding any leading and
2072 * trailing space (#x20) characters, and by replacing sequences of space
2073 * (#x20) characters by a single space (#x20) character.
2074 * All attributes for which no declaration has been read should be treated
2075 * by a non-validating parser as if declared CDATA.
2076 *
2077 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2078 */
2079
2080xmlChar *
2081xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2082 xmlChar limit = 0;
2083 xmlChar *buf = NULL;
2084 int len = 0;
2085 int buf_size = 0;
2086 int c, l;
2087 xmlChar *current = NULL;
2088 xmlEntityPtr ent;
2089
2090
2091 SHRINK;
2092 if (NXT(0) == '"') {
2093 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2094 limit = '"';
2095 NEXT;
2096 } else if (NXT(0) == '\'') {
2097 limit = '\'';
2098 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2099 NEXT;
2100 } else {
2101 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2103 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2104 ctxt->wellFormed = 0;
2105 ctxt->disableSAX = 1;
2106 return(NULL);
2107 }
2108
2109 /*
2110 * allocate a translation buffer.
2111 */
2112 buf_size = XML_PARSER_BUFFER_SIZE;
2113 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2114 if (buf == NULL) {
2115 perror("xmlParseAttValue: malloc failed");
2116 return(NULL);
2117 }
2118
2119 /*
2120 * Ok loop until we reach one of the ending char or a size limit.
2121 */
2122 c = CUR_CHAR(l);
2123 while (((NXT(0) != limit) && /* checked */
2124 (c != '<')) || (ctxt->token != 0)) {
2125 if (c == 0) break;
2126 if (ctxt->token == '&') {
2127 /*
2128 * The reparsing will be done in xmlStringGetNodeList()
2129 * called by the attribute() function in SAX.c
2130 */
2131 static xmlChar buffer[6] = "&#38;";
2132
2133 if (len > buf_size - 10) {
2134 growBuffer(buf);
2135 }
2136 current = &buffer[0];
2137 while (*current != 0) { /* non input consuming */
2138 buf[len++] = *current++;
2139 }
2140 ctxt->token = 0;
2141 } else if (c == '&') {
2142 if (NXT(1) == '#') {
2143 int val = xmlParseCharRef(ctxt);
2144 if (val == '&') {
2145 /*
2146 * The reparsing will be done in xmlStringGetNodeList()
2147 * called by the attribute() function in SAX.c
2148 */
2149 static xmlChar buffer[6] = "&#38;";
2150
2151 if (len > buf_size - 10) {
2152 growBuffer(buf);
2153 }
2154 current = &buffer[0];
2155 while (*current != 0) { /* non input consuming */
2156 buf[len++] = *current++;
2157 }
2158 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002159 if (len > buf_size - 10) {
2160 growBuffer(buf);
2161 }
Owen Taylor3473f882001-02-23 17:55:21 +00002162 len += xmlCopyChar(0, &buf[len], val);
2163 }
2164 } else {
2165 ent = xmlParseEntityRef(ctxt);
2166 if ((ent != NULL) &&
2167 (ctxt->replaceEntities != 0)) {
2168 xmlChar *rep;
2169
2170 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2171 rep = xmlStringDecodeEntities(ctxt, ent->content,
2172 XML_SUBSTITUTE_REF, 0, 0, 0);
2173 if (rep != NULL) {
2174 current = rep;
2175 while (*current != 0) { /* non input consuming */
2176 buf[len++] = *current++;
2177 if (len > buf_size - 10) {
2178 growBuffer(buf);
2179 }
2180 }
2181 xmlFree(rep);
2182 }
2183 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002184 if (len > buf_size - 10) {
2185 growBuffer(buf);
2186 }
Owen Taylor3473f882001-02-23 17:55:21 +00002187 if (ent->content != NULL)
2188 buf[len++] = ent->content[0];
2189 }
2190 } else if (ent != NULL) {
2191 int i = xmlStrlen(ent->name);
2192 const xmlChar *cur = ent->name;
2193
2194 /*
2195 * This may look absurd but is needed to detect
2196 * entities problems
2197 */
2198 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2199 (ent->content != NULL)) {
2200 xmlChar *rep;
2201 rep = xmlStringDecodeEntities(ctxt, ent->content,
2202 XML_SUBSTITUTE_REF, 0, 0, 0);
2203 if (rep != NULL)
2204 xmlFree(rep);
2205 }
2206
2207 /*
2208 * Just output the reference
2209 */
2210 buf[len++] = '&';
2211 if (len > buf_size - i - 10) {
2212 growBuffer(buf);
2213 }
2214 for (;i > 0;i--)
2215 buf[len++] = *cur++;
2216 buf[len++] = ';';
2217 }
2218 }
2219 } else {
2220 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2221 COPY_BUF(l,buf,len,0x20);
2222 if (len > buf_size - 10) {
2223 growBuffer(buf);
2224 }
2225 } else {
2226 COPY_BUF(l,buf,len,c);
2227 if (len > buf_size - 10) {
2228 growBuffer(buf);
2229 }
2230 }
2231 NEXTL(l);
2232 }
2233 GROW;
2234 c = CUR_CHAR(l);
2235 }
2236 buf[len++] = 0;
2237 if (RAW == '<') {
2238 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2240 ctxt->sax->error(ctxt->userData,
2241 "Unescaped '<' not allowed in attributes values\n");
2242 ctxt->wellFormed = 0;
2243 ctxt->disableSAX = 1;
2244 } else if (RAW != limit) {
2245 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2247 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2248 ctxt->wellFormed = 0;
2249 ctxt->disableSAX = 1;
2250 } else
2251 NEXT;
2252 return(buf);
2253}
2254
2255/**
2256 * xmlParseSystemLiteral:
2257 * @ctxt: an XML parser context
2258 *
2259 * parse an XML Literal
2260 *
2261 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2262 *
2263 * Returns the SystemLiteral parsed or NULL
2264 */
2265
2266xmlChar *
2267xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2268 xmlChar *buf = NULL;
2269 int len = 0;
2270 int size = XML_PARSER_BUFFER_SIZE;
2271 int cur, l;
2272 xmlChar stop;
2273 int state = ctxt->instate;
2274 int count = 0;
2275
2276 SHRINK;
2277 if (RAW == '"') {
2278 NEXT;
2279 stop = '"';
2280 } else if (RAW == '\'') {
2281 NEXT;
2282 stop = '\'';
2283 } else {
2284 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2286 ctxt->sax->error(ctxt->userData,
2287 "SystemLiteral \" or ' expected\n");
2288 ctxt->wellFormed = 0;
2289 ctxt->disableSAX = 1;
2290 return(NULL);
2291 }
2292
2293 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2294 if (buf == NULL) {
2295 xmlGenericError(xmlGenericErrorContext,
2296 "malloc of %d byte failed\n", size);
2297 return(NULL);
2298 }
2299 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2300 cur = CUR_CHAR(l);
2301 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2302 if (len + 5 >= size) {
2303 size *= 2;
2304 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2305 if (buf == NULL) {
2306 xmlGenericError(xmlGenericErrorContext,
2307 "realloc of %d byte failed\n", size);
2308 ctxt->instate = (xmlParserInputState) state;
2309 return(NULL);
2310 }
2311 }
2312 count++;
2313 if (count > 50) {
2314 GROW;
2315 count = 0;
2316 }
2317 COPY_BUF(l,buf,len,cur);
2318 NEXTL(l);
2319 cur = CUR_CHAR(l);
2320 if (cur == 0) {
2321 GROW;
2322 SHRINK;
2323 cur = CUR_CHAR(l);
2324 }
2325 }
2326 buf[len] = 0;
2327 ctxt->instate = (xmlParserInputState) state;
2328 if (!IS_CHAR(cur)) {
2329 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2331 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2332 ctxt->wellFormed = 0;
2333 ctxt->disableSAX = 1;
2334 } else {
2335 NEXT;
2336 }
2337 return(buf);
2338}
2339
2340/**
2341 * xmlParsePubidLiteral:
2342 * @ctxt: an XML parser context
2343 *
2344 * parse an XML public literal
2345 *
2346 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2347 *
2348 * Returns the PubidLiteral parsed or NULL.
2349 */
2350
2351xmlChar *
2352xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2353 xmlChar *buf = NULL;
2354 int len = 0;
2355 int size = XML_PARSER_BUFFER_SIZE;
2356 xmlChar cur;
2357 xmlChar stop;
2358 int count = 0;
2359
2360 SHRINK;
2361 if (RAW == '"') {
2362 NEXT;
2363 stop = '"';
2364 } else if (RAW == '\'') {
2365 NEXT;
2366 stop = '\'';
2367 } else {
2368 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2370 ctxt->sax->error(ctxt->userData,
2371 "SystemLiteral \" or ' expected\n");
2372 ctxt->wellFormed = 0;
2373 ctxt->disableSAX = 1;
2374 return(NULL);
2375 }
2376 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2377 if (buf == NULL) {
2378 xmlGenericError(xmlGenericErrorContext,
2379 "malloc of %d byte failed\n", size);
2380 return(NULL);
2381 }
2382 cur = CUR;
2383 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2384 if (len + 1 >= size) {
2385 size *= 2;
2386 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2387 if (buf == NULL) {
2388 xmlGenericError(xmlGenericErrorContext,
2389 "realloc of %d byte failed\n", size);
2390 return(NULL);
2391 }
2392 }
2393 buf[len++] = cur;
2394 count++;
2395 if (count > 50) {
2396 GROW;
2397 count = 0;
2398 }
2399 NEXT;
2400 cur = CUR;
2401 if (cur == 0) {
2402 GROW;
2403 SHRINK;
2404 cur = CUR;
2405 }
2406 }
2407 buf[len] = 0;
2408 if (cur != stop) {
2409 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2411 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2412 ctxt->wellFormed = 0;
2413 ctxt->disableSAX = 1;
2414 } else {
2415 NEXT;
2416 }
2417 return(buf);
2418}
2419
Daniel Veillard48b2f892001-02-25 16:11:03 +00002420void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002421/**
2422 * xmlParseCharData:
2423 * @ctxt: an XML parser context
2424 * @cdata: int indicating whether we are within a CDATA section
2425 *
2426 * parse a CharData section.
2427 * if we are within a CDATA section ']]>' marks an end of section.
2428 *
2429 * The right angle bracket (>) may be represented using the string "&gt;",
2430 * and must, for compatibility, be escaped using "&gt;" or a character
2431 * reference when it appears in the string "]]>" in content, when that
2432 * string is not marking the end of a CDATA section.
2433 *
2434 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2435 */
2436
2437void
2438xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002439 const xmlChar *in;
2440 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002441 int line = ctxt->input->line;
2442 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002443
2444 SHRINK;
2445 GROW;
2446 /*
2447 * Accelerated common case where input don't need to be
2448 * modified before passing it to the handler.
2449 */
2450 if ((ctxt->token == 0) && (!cdata)) {
2451 in = ctxt->input->cur;
2452 do {
2453 while (((*in >= 0x20) && (*in != '<') &&
2454 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2455 in++;
2456 if (*in == 0xA) {
2457 ctxt->input->line++;
2458 continue; /* while */
2459 }
2460 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002461 if (nbchar > 0) {
2462 if (IS_BLANK(*ctxt->input->cur) &&
2463 areBlanks(ctxt, ctxt->input->cur, nbchar)) {
2464 if (ctxt->sax->ignorableWhitespace != NULL)
2465 ctxt->sax->ignorableWhitespace(ctxt->userData,
2466 ctxt->input->cur, nbchar);
2467 } else {
2468 if (ctxt->sax->characters != NULL)
2469 ctxt->sax->characters(ctxt->userData,
2470 ctxt->input->cur, nbchar);
2471 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002472 }
2473 ctxt->input->cur = in;
2474 if (*in == 0xD) {
2475 in++;
2476 if (*in == 0xA) {
2477 ctxt->input->cur = in;
2478 in++;
2479 ctxt->input->line++;
2480 continue; /* while */
2481 }
2482 in--;
2483 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002484 if (*in == '<') {
2485 return;
2486 }
2487 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002488 return;
2489 }
2490 SHRINK;
2491 GROW;
2492 in = ctxt->input->cur;
2493 } while ((*in >= 0x20) && (*in <= 0x7F));
2494 nbchar = 0;
2495 }
Daniel Veillard50582112001-03-26 22:52:16 +00002496 ctxt->input->line = line;
2497 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002498 xmlParseCharDataComplex(ctxt, cdata);
2499}
2500
2501void
2502xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002503 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2504 int nbchar = 0;
2505 int cur, l;
2506 int count = 0;
2507
2508 SHRINK;
2509 GROW;
2510 cur = CUR_CHAR(l);
2511 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2512 ((cur != '&') || (ctxt->token == '&')) &&
2513 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2514 if ((cur == ']') && (NXT(1) == ']') &&
2515 (NXT(2) == '>')) {
2516 if (cdata) break;
2517 else {
2518 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2520 ctxt->sax->error(ctxt->userData,
2521 "Sequence ']]>' not allowed in content\n");
2522 /* Should this be relaxed ??? I see a "must here */
2523 ctxt->wellFormed = 0;
2524 ctxt->disableSAX = 1;
2525 }
2526 }
2527 COPY_BUF(l,buf,nbchar,cur);
2528 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2529 /*
2530 * Ok the segment is to be consumed as chars.
2531 */
2532 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2533 if (areBlanks(ctxt, buf, nbchar)) {
2534 if (ctxt->sax->ignorableWhitespace != NULL)
2535 ctxt->sax->ignorableWhitespace(ctxt->userData,
2536 buf, nbchar);
2537 } else {
2538 if (ctxt->sax->characters != NULL)
2539 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2540 }
2541 }
2542 nbchar = 0;
2543 }
2544 count++;
2545 if (count > 50) {
2546 GROW;
2547 count = 0;
2548 }
2549 NEXTL(l);
2550 cur = CUR_CHAR(l);
2551 }
2552 if (nbchar != 0) {
2553 /*
2554 * Ok the segment is to be consumed as chars.
2555 */
2556 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2557 if (areBlanks(ctxt, buf, nbchar)) {
2558 if (ctxt->sax->ignorableWhitespace != NULL)
2559 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2560 } else {
2561 if (ctxt->sax->characters != NULL)
2562 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2563 }
2564 }
2565 }
2566}
2567
2568/**
2569 * xmlParseExternalID:
2570 * @ctxt: an XML parser context
2571 * @publicID: a xmlChar** receiving PubidLiteral
2572 * @strict: indicate whether we should restrict parsing to only
2573 * production [75], see NOTE below
2574 *
2575 * Parse an External ID or a Public ID
2576 *
2577 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2578 * 'PUBLIC' S PubidLiteral S SystemLiteral
2579 *
2580 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2581 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2582 *
2583 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2584 *
2585 * Returns the function returns SystemLiteral and in the second
2586 * case publicID receives PubidLiteral, is strict is off
2587 * it is possible to return NULL and have publicID set.
2588 */
2589
2590xmlChar *
2591xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2592 xmlChar *URI = NULL;
2593
2594 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002595
2596 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002597 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2598 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2599 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2600 SKIP(6);
2601 if (!IS_BLANK(CUR)) {
2602 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2604 ctxt->sax->error(ctxt->userData,
2605 "Space required after 'SYSTEM'\n");
2606 ctxt->wellFormed = 0;
2607 ctxt->disableSAX = 1;
2608 }
2609 SKIP_BLANKS;
2610 URI = xmlParseSystemLiteral(ctxt);
2611 if (URI == NULL) {
2612 ctxt->errNo = XML_ERR_URI_REQUIRED;
2613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2614 ctxt->sax->error(ctxt->userData,
2615 "xmlParseExternalID: SYSTEM, no URI\n");
2616 ctxt->wellFormed = 0;
2617 ctxt->disableSAX = 1;
2618 }
2619 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2620 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2621 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2622 SKIP(6);
2623 if (!IS_BLANK(CUR)) {
2624 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2626 ctxt->sax->error(ctxt->userData,
2627 "Space required after 'PUBLIC'\n");
2628 ctxt->wellFormed = 0;
2629 ctxt->disableSAX = 1;
2630 }
2631 SKIP_BLANKS;
2632 *publicID = xmlParsePubidLiteral(ctxt);
2633 if (*publicID == NULL) {
2634 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2636 ctxt->sax->error(ctxt->userData,
2637 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2638 ctxt->wellFormed = 0;
2639 ctxt->disableSAX = 1;
2640 }
2641 if (strict) {
2642 /*
2643 * We don't handle [83] so "S SystemLiteral" is required.
2644 */
2645 if (!IS_BLANK(CUR)) {
2646 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2648 ctxt->sax->error(ctxt->userData,
2649 "Space required after the Public Identifier\n");
2650 ctxt->wellFormed = 0;
2651 ctxt->disableSAX = 1;
2652 }
2653 } else {
2654 /*
2655 * We handle [83] so we return immediately, if
2656 * "S SystemLiteral" is not detected. From a purely parsing
2657 * point of view that's a nice mess.
2658 */
2659 const xmlChar *ptr;
2660 GROW;
2661
2662 ptr = CUR_PTR;
2663 if (!IS_BLANK(*ptr)) return(NULL);
2664
2665 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2666 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2667 }
2668 SKIP_BLANKS;
2669 URI = xmlParseSystemLiteral(ctxt);
2670 if (URI == NULL) {
2671 ctxt->errNo = XML_ERR_URI_REQUIRED;
2672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2673 ctxt->sax->error(ctxt->userData,
2674 "xmlParseExternalID: PUBLIC, no URI\n");
2675 ctxt->wellFormed = 0;
2676 ctxt->disableSAX = 1;
2677 }
2678 }
2679 return(URI);
2680}
2681
2682/**
2683 * xmlParseComment:
2684 * @ctxt: an XML parser context
2685 *
2686 * Skip an XML (SGML) comment <!-- .... -->
2687 * The spec says that "For compatibility, the string "--" (double-hyphen)
2688 * must not occur within comments. "
2689 *
2690 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2691 */
2692void
2693xmlParseComment(xmlParserCtxtPtr ctxt) {
2694 xmlChar *buf = NULL;
2695 int len;
2696 int size = XML_PARSER_BUFFER_SIZE;
2697 int q, ql;
2698 int r, rl;
2699 int cur, l;
2700 xmlParserInputState state;
2701 xmlParserInputPtr input = ctxt->input;
2702 int count = 0;
2703
2704 /*
2705 * Check that there is a comment right here.
2706 */
2707 if ((RAW != '<') || (NXT(1) != '!') ||
2708 (NXT(2) != '-') || (NXT(3) != '-')) return;
2709
2710 state = ctxt->instate;
2711 ctxt->instate = XML_PARSER_COMMENT;
2712 SHRINK;
2713 SKIP(4);
2714 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2715 if (buf == NULL) {
2716 xmlGenericError(xmlGenericErrorContext,
2717 "malloc of %d byte failed\n", size);
2718 ctxt->instate = state;
2719 return;
2720 }
2721 q = CUR_CHAR(ql);
2722 NEXTL(ql);
2723 r = CUR_CHAR(rl);
2724 NEXTL(rl);
2725 cur = CUR_CHAR(l);
2726 len = 0;
2727 while (IS_CHAR(cur) && /* checked */
2728 ((cur != '>') ||
2729 (r != '-') || (q != '-'))) {
2730 if ((r == '-') && (q == '-') && (len > 1)) {
2731 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2733 ctxt->sax->error(ctxt->userData,
2734 "Comment must not contain '--' (double-hyphen)`\n");
2735 ctxt->wellFormed = 0;
2736 ctxt->disableSAX = 1;
2737 }
2738 if (len + 5 >= size) {
2739 size *= 2;
2740 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2741 if (buf == NULL) {
2742 xmlGenericError(xmlGenericErrorContext,
2743 "realloc of %d byte failed\n", size);
2744 ctxt->instate = state;
2745 return;
2746 }
2747 }
2748 COPY_BUF(ql,buf,len,q);
2749 q = r;
2750 ql = rl;
2751 r = cur;
2752 rl = l;
2753
2754 count++;
2755 if (count > 50) {
2756 GROW;
2757 count = 0;
2758 }
2759 NEXTL(l);
2760 cur = CUR_CHAR(l);
2761 if (cur == 0) {
2762 SHRINK;
2763 GROW;
2764 cur = CUR_CHAR(l);
2765 }
2766 }
2767 buf[len] = 0;
2768 if (!IS_CHAR(cur)) {
2769 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2771 ctxt->sax->error(ctxt->userData,
2772 "Comment not terminated \n<!--%.50s\n", buf);
2773 ctxt->wellFormed = 0;
2774 ctxt->disableSAX = 1;
2775 xmlFree(buf);
2776 } else {
2777 if (input != ctxt->input) {
2778 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2779 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2780 ctxt->sax->error(ctxt->userData,
2781"Comment doesn't start and stop in the same entity\n");
2782 ctxt->wellFormed = 0;
2783 ctxt->disableSAX = 1;
2784 }
2785 NEXT;
2786 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2787 (!ctxt->disableSAX))
2788 ctxt->sax->comment(ctxt->userData, buf);
2789 xmlFree(buf);
2790 }
2791 ctxt->instate = state;
2792}
2793
2794/**
2795 * xmlParsePITarget:
2796 * @ctxt: an XML parser context
2797 *
2798 * parse the name of a PI
2799 *
2800 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2801 *
2802 * Returns the PITarget name or NULL
2803 */
2804
2805xmlChar *
2806xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2807 xmlChar *name;
2808
2809 name = xmlParseName(ctxt);
2810 if ((name != NULL) &&
2811 ((name[0] == 'x') || (name[0] == 'X')) &&
2812 ((name[1] == 'm') || (name[1] == 'M')) &&
2813 ((name[2] == 'l') || (name[2] == 'L'))) {
2814 int i;
2815 if ((name[0] == 'x') && (name[1] == 'm') &&
2816 (name[2] == 'l') && (name[3] == 0)) {
2817 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2819 ctxt->sax->error(ctxt->userData,
2820 "XML declaration allowed only at the start of the document\n");
2821 ctxt->wellFormed = 0;
2822 ctxt->disableSAX = 1;
2823 return(name);
2824 } else if (name[3] == 0) {
2825 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2827 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2828 ctxt->wellFormed = 0;
2829 ctxt->disableSAX = 1;
2830 return(name);
2831 }
2832 for (i = 0;;i++) {
2833 if (xmlW3CPIs[i] == NULL) break;
2834 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2835 return(name);
2836 }
2837 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2838 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2839 ctxt->sax->warning(ctxt->userData,
2840 "xmlParsePItarget: invalid name prefix 'xml'\n");
2841 }
2842 }
2843 return(name);
2844}
2845
2846/**
2847 * xmlParsePI:
2848 * @ctxt: an XML parser context
2849 *
2850 * parse an XML Processing Instruction.
2851 *
2852 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2853 *
2854 * The processing is transfered to SAX once parsed.
2855 */
2856
2857void
2858xmlParsePI(xmlParserCtxtPtr ctxt) {
2859 xmlChar *buf = NULL;
2860 int len = 0;
2861 int size = XML_PARSER_BUFFER_SIZE;
2862 int cur, l;
2863 xmlChar *target;
2864 xmlParserInputState state;
2865 int count = 0;
2866
2867 if ((RAW == '<') && (NXT(1) == '?')) {
2868 xmlParserInputPtr input = ctxt->input;
2869 state = ctxt->instate;
2870 ctxt->instate = XML_PARSER_PI;
2871 /*
2872 * this is a Processing Instruction.
2873 */
2874 SKIP(2);
2875 SHRINK;
2876
2877 /*
2878 * Parse the target name and check for special support like
2879 * namespace.
2880 */
2881 target = xmlParsePITarget(ctxt);
2882 if (target != NULL) {
2883 if ((RAW == '?') && (NXT(1) == '>')) {
2884 if (input != ctxt->input) {
2885 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2887 ctxt->sax->error(ctxt->userData,
2888 "PI declaration doesn't start and stop in the same entity\n");
2889 ctxt->wellFormed = 0;
2890 ctxt->disableSAX = 1;
2891 }
2892 SKIP(2);
2893
2894 /*
2895 * SAX: PI detected.
2896 */
2897 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2898 (ctxt->sax->processingInstruction != NULL))
2899 ctxt->sax->processingInstruction(ctxt->userData,
2900 target, NULL);
2901 ctxt->instate = state;
2902 xmlFree(target);
2903 return;
2904 }
2905 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2906 if (buf == NULL) {
2907 xmlGenericError(xmlGenericErrorContext,
2908 "malloc of %d byte failed\n", size);
2909 ctxt->instate = state;
2910 return;
2911 }
2912 cur = CUR;
2913 if (!IS_BLANK(cur)) {
2914 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2916 ctxt->sax->error(ctxt->userData,
2917 "xmlParsePI: PI %s space expected\n", target);
2918 ctxt->wellFormed = 0;
2919 ctxt->disableSAX = 1;
2920 }
2921 SKIP_BLANKS;
2922 cur = CUR_CHAR(l);
2923 while (IS_CHAR(cur) && /* checked */
2924 ((cur != '?') || (NXT(1) != '>'))) {
2925 if (len + 5 >= size) {
2926 size *= 2;
2927 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2928 if (buf == NULL) {
2929 xmlGenericError(xmlGenericErrorContext,
2930 "realloc of %d byte failed\n", size);
2931 ctxt->instate = state;
2932 return;
2933 }
2934 }
2935 count++;
2936 if (count > 50) {
2937 GROW;
2938 count = 0;
2939 }
2940 COPY_BUF(l,buf,len,cur);
2941 NEXTL(l);
2942 cur = CUR_CHAR(l);
2943 if (cur == 0) {
2944 SHRINK;
2945 GROW;
2946 cur = CUR_CHAR(l);
2947 }
2948 }
2949 buf[len] = 0;
2950 if (cur != '?') {
2951 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2953 ctxt->sax->error(ctxt->userData,
2954 "xmlParsePI: PI %s never end ...\n", target);
2955 ctxt->wellFormed = 0;
2956 ctxt->disableSAX = 1;
2957 } else {
2958 if (input != ctxt->input) {
2959 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2961 ctxt->sax->error(ctxt->userData,
2962 "PI declaration doesn't start and stop in the same entity\n");
2963 ctxt->wellFormed = 0;
2964 ctxt->disableSAX = 1;
2965 }
2966 SKIP(2);
2967
2968 /*
2969 * SAX: PI detected.
2970 */
2971 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2972 (ctxt->sax->processingInstruction != NULL))
2973 ctxt->sax->processingInstruction(ctxt->userData,
2974 target, buf);
2975 }
2976 xmlFree(buf);
2977 xmlFree(target);
2978 } else {
2979 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
2980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2981 ctxt->sax->error(ctxt->userData,
2982 "xmlParsePI : no target name\n");
2983 ctxt->wellFormed = 0;
2984 ctxt->disableSAX = 1;
2985 }
2986 ctxt->instate = state;
2987 }
2988}
2989
2990/**
2991 * xmlParseNotationDecl:
2992 * @ctxt: an XML parser context
2993 *
2994 * parse a notation declaration
2995 *
2996 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2997 *
2998 * Hence there is actually 3 choices:
2999 * 'PUBLIC' S PubidLiteral
3000 * 'PUBLIC' S PubidLiteral S SystemLiteral
3001 * and 'SYSTEM' S SystemLiteral
3002 *
3003 * See the NOTE on xmlParseExternalID().
3004 */
3005
3006void
3007xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3008 xmlChar *name;
3009 xmlChar *Pubid;
3010 xmlChar *Systemid;
3011
3012 if ((RAW == '<') && (NXT(1) == '!') &&
3013 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3014 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3015 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3016 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3017 xmlParserInputPtr input = ctxt->input;
3018 SHRINK;
3019 SKIP(10);
3020 if (!IS_BLANK(CUR)) {
3021 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3023 ctxt->sax->error(ctxt->userData,
3024 "Space required after '<!NOTATION'\n");
3025 ctxt->wellFormed = 0;
3026 ctxt->disableSAX = 1;
3027 return;
3028 }
3029 SKIP_BLANKS;
3030
Daniel Veillard29631a82001-03-05 09:49:20 +00003031 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003032 if (name == NULL) {
3033 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3035 ctxt->sax->error(ctxt->userData,
3036 "NOTATION: Name expected here\n");
3037 ctxt->wellFormed = 0;
3038 ctxt->disableSAX = 1;
3039 return;
3040 }
3041 if (!IS_BLANK(CUR)) {
3042 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3044 ctxt->sax->error(ctxt->userData,
3045 "Space required after the NOTATION name'\n");
3046 ctxt->wellFormed = 0;
3047 ctxt->disableSAX = 1;
3048 return;
3049 }
3050 SKIP_BLANKS;
3051
3052 /*
3053 * Parse the IDs.
3054 */
3055 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3056 SKIP_BLANKS;
3057
3058 if (RAW == '>') {
3059 if (input != ctxt->input) {
3060 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3062 ctxt->sax->error(ctxt->userData,
3063"Notation declaration doesn't start and stop in the same entity\n");
3064 ctxt->wellFormed = 0;
3065 ctxt->disableSAX = 1;
3066 }
3067 NEXT;
3068 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3069 (ctxt->sax->notationDecl != NULL))
3070 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3071 } else {
3072 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3074 ctxt->sax->error(ctxt->userData,
3075 "'>' required to close NOTATION declaration\n");
3076 ctxt->wellFormed = 0;
3077 ctxt->disableSAX = 1;
3078 }
3079 xmlFree(name);
3080 if (Systemid != NULL) xmlFree(Systemid);
3081 if (Pubid != NULL) xmlFree(Pubid);
3082 }
3083}
3084
3085/**
3086 * xmlParseEntityDecl:
3087 * @ctxt: an XML parser context
3088 *
3089 * parse <!ENTITY declarations
3090 *
3091 * [70] EntityDecl ::= GEDecl | PEDecl
3092 *
3093 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3094 *
3095 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3096 *
3097 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3098 *
3099 * [74] PEDef ::= EntityValue | ExternalID
3100 *
3101 * [76] NDataDecl ::= S 'NDATA' S Name
3102 *
3103 * [ VC: Notation Declared ]
3104 * The Name must match the declared name of a notation.
3105 */
3106
3107void
3108xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3109 xmlChar *name = NULL;
3110 xmlChar *value = NULL;
3111 xmlChar *URI = NULL, *literal = NULL;
3112 xmlChar *ndata = NULL;
3113 int isParameter = 0;
3114 xmlChar *orig = NULL;
3115
3116 GROW;
3117 if ((RAW == '<') && (NXT(1) == '!') &&
3118 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3119 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3120 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3121 xmlParserInputPtr input = ctxt->input;
3122 ctxt->instate = XML_PARSER_ENTITY_DECL;
3123 SHRINK;
3124 SKIP(8);
3125 if (!IS_BLANK(CUR)) {
3126 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3128 ctxt->sax->error(ctxt->userData,
3129 "Space required after '<!ENTITY'\n");
3130 ctxt->wellFormed = 0;
3131 ctxt->disableSAX = 1;
3132 }
3133 SKIP_BLANKS;
3134
3135 if (RAW == '%') {
3136 NEXT;
3137 if (!IS_BLANK(CUR)) {
3138 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3139 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3140 ctxt->sax->error(ctxt->userData,
3141 "Space required after '%'\n");
3142 ctxt->wellFormed = 0;
3143 ctxt->disableSAX = 1;
3144 }
3145 SKIP_BLANKS;
3146 isParameter = 1;
3147 }
3148
Daniel Veillard29631a82001-03-05 09:49:20 +00003149 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003150 if (name == NULL) {
3151 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3153 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3154 ctxt->wellFormed = 0;
3155 ctxt->disableSAX = 1;
3156 return;
3157 }
3158 if (!IS_BLANK(CUR)) {
3159 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3161 ctxt->sax->error(ctxt->userData,
3162 "Space required after the entity name\n");
3163 ctxt->wellFormed = 0;
3164 ctxt->disableSAX = 1;
3165 }
3166 SKIP_BLANKS;
3167
3168 /*
3169 * handle the various case of definitions...
3170 */
3171 if (isParameter) {
3172 if ((RAW == '"') || (RAW == '\'')) {
3173 value = xmlParseEntityValue(ctxt, &orig);
3174 if (value) {
3175 if ((ctxt->sax != NULL) &&
3176 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3177 ctxt->sax->entityDecl(ctxt->userData, name,
3178 XML_INTERNAL_PARAMETER_ENTITY,
3179 NULL, NULL, value);
3180 }
3181 } else {
3182 URI = xmlParseExternalID(ctxt, &literal, 1);
3183 if ((URI == NULL) && (literal == NULL)) {
3184 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3186 ctxt->sax->error(ctxt->userData,
3187 "Entity value required\n");
3188 ctxt->wellFormed = 0;
3189 ctxt->disableSAX = 1;
3190 }
3191 if (URI) {
3192 xmlURIPtr uri;
3193
3194 uri = xmlParseURI((const char *) URI);
3195 if (uri == NULL) {
3196 ctxt->errNo = XML_ERR_INVALID_URI;
3197 if ((ctxt->sax != NULL) &&
3198 (!ctxt->disableSAX) &&
3199 (ctxt->sax->error != NULL))
3200 ctxt->sax->error(ctxt->userData,
3201 "Invalid URI: %s\n", URI);
3202 ctxt->wellFormed = 0;
3203 } else {
3204 if (uri->fragment != NULL) {
3205 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3206 if ((ctxt->sax != NULL) &&
3207 (!ctxt->disableSAX) &&
3208 (ctxt->sax->error != NULL))
3209 ctxt->sax->error(ctxt->userData,
3210 "Fragment not allowed: %s\n", URI);
3211 ctxt->wellFormed = 0;
3212 } else {
3213 if ((ctxt->sax != NULL) &&
3214 (!ctxt->disableSAX) &&
3215 (ctxt->sax->entityDecl != NULL))
3216 ctxt->sax->entityDecl(ctxt->userData, name,
3217 XML_EXTERNAL_PARAMETER_ENTITY,
3218 literal, URI, NULL);
3219 }
3220 xmlFreeURI(uri);
3221 }
3222 }
3223 }
3224 } else {
3225 if ((RAW == '"') || (RAW == '\'')) {
3226 value = xmlParseEntityValue(ctxt, &orig);
3227 if ((ctxt->sax != NULL) &&
3228 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3229 ctxt->sax->entityDecl(ctxt->userData, name,
3230 XML_INTERNAL_GENERAL_ENTITY,
3231 NULL, NULL, value);
3232 } else {
3233 URI = xmlParseExternalID(ctxt, &literal, 1);
3234 if ((URI == NULL) && (literal == NULL)) {
3235 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3237 ctxt->sax->error(ctxt->userData,
3238 "Entity value required\n");
3239 ctxt->wellFormed = 0;
3240 ctxt->disableSAX = 1;
3241 }
3242 if (URI) {
3243 xmlURIPtr uri;
3244
3245 uri = xmlParseURI((const char *)URI);
3246 if (uri == NULL) {
3247 ctxt->errNo = XML_ERR_INVALID_URI;
3248 if ((ctxt->sax != NULL) &&
3249 (!ctxt->disableSAX) &&
3250 (ctxt->sax->error != NULL))
3251 ctxt->sax->error(ctxt->userData,
3252 "Invalid URI: %s\n", URI);
3253 ctxt->wellFormed = 0;
3254 } else {
3255 if (uri->fragment != NULL) {
3256 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3257 if ((ctxt->sax != NULL) &&
3258 (!ctxt->disableSAX) &&
3259 (ctxt->sax->error != NULL))
3260 ctxt->sax->error(ctxt->userData,
3261 "Fragment not allowed: %s\n", URI);
3262 ctxt->wellFormed = 0;
3263 }
3264 xmlFreeURI(uri);
3265 }
3266 }
3267 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3268 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3270 ctxt->sax->error(ctxt->userData,
3271 "Space required before 'NDATA'\n");
3272 ctxt->wellFormed = 0;
3273 ctxt->disableSAX = 1;
3274 }
3275 SKIP_BLANKS;
3276 if ((RAW == 'N') && (NXT(1) == 'D') &&
3277 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3278 (NXT(4) == 'A')) {
3279 SKIP(5);
3280 if (!IS_BLANK(CUR)) {
3281 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3282 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3283 ctxt->sax->error(ctxt->userData,
3284 "Space required after 'NDATA'\n");
3285 ctxt->wellFormed = 0;
3286 ctxt->disableSAX = 1;
3287 }
3288 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003289 ndata = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003290 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3291 (ctxt->sax->unparsedEntityDecl != NULL))
3292 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3293 literal, URI, ndata);
3294 } else {
3295 if ((ctxt->sax != NULL) &&
3296 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3297 ctxt->sax->entityDecl(ctxt->userData, name,
3298 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3299 literal, URI, NULL);
3300 }
3301 }
3302 }
3303 SKIP_BLANKS;
3304 if (RAW != '>') {
3305 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3307 ctxt->sax->error(ctxt->userData,
3308 "xmlParseEntityDecl: entity %s not terminated\n", name);
3309 ctxt->wellFormed = 0;
3310 ctxt->disableSAX = 1;
3311 } else {
3312 if (input != ctxt->input) {
3313 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3314 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3315 ctxt->sax->error(ctxt->userData,
3316"Entity declaration doesn't start and stop in the same entity\n");
3317 ctxt->wellFormed = 0;
3318 ctxt->disableSAX = 1;
3319 }
3320 NEXT;
3321 }
3322 if (orig != NULL) {
3323 /*
3324 * Ugly mechanism to save the raw entity value.
3325 */
3326 xmlEntityPtr cur = NULL;
3327
3328 if (isParameter) {
3329 if ((ctxt->sax != NULL) &&
3330 (ctxt->sax->getParameterEntity != NULL))
3331 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3332 } else {
3333 if ((ctxt->sax != NULL) &&
3334 (ctxt->sax->getEntity != NULL))
3335 cur = ctxt->sax->getEntity(ctxt->userData, name);
3336 }
3337 if (cur != NULL) {
3338 if (cur->orig != NULL)
3339 xmlFree(orig);
3340 else
3341 cur->orig = orig;
3342 } else
3343 xmlFree(orig);
3344 }
3345 if (name != NULL) xmlFree(name);
3346 if (value != NULL) xmlFree(value);
3347 if (URI != NULL) xmlFree(URI);
3348 if (literal != NULL) xmlFree(literal);
3349 if (ndata != NULL) xmlFree(ndata);
3350 }
3351}
3352
3353/**
3354 * xmlParseDefaultDecl:
3355 * @ctxt: an XML parser context
3356 * @value: Receive a possible fixed default value for the attribute
3357 *
3358 * Parse an attribute default declaration
3359 *
3360 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3361 *
3362 * [ VC: Required Attribute ]
3363 * if the default declaration is the keyword #REQUIRED, then the
3364 * attribute must be specified for all elements of the type in the
3365 * attribute-list declaration.
3366 *
3367 * [ VC: Attribute Default Legal ]
3368 * The declared default value must meet the lexical constraints of
3369 * the declared attribute type c.f. xmlValidateAttributeDecl()
3370 *
3371 * [ VC: Fixed Attribute Default ]
3372 * if an attribute has a default value declared with the #FIXED
3373 * keyword, instances of that attribute must match the default value.
3374 *
3375 * [ WFC: No < in Attribute Values ]
3376 * handled in xmlParseAttValue()
3377 *
3378 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3379 * or XML_ATTRIBUTE_FIXED.
3380 */
3381
3382int
3383xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3384 int val;
3385 xmlChar *ret;
3386
3387 *value = NULL;
3388 if ((RAW == '#') && (NXT(1) == 'R') &&
3389 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3390 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3391 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3392 (NXT(8) == 'D')) {
3393 SKIP(9);
3394 return(XML_ATTRIBUTE_REQUIRED);
3395 }
3396 if ((RAW == '#') && (NXT(1) == 'I') &&
3397 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3398 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3399 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3400 SKIP(8);
3401 return(XML_ATTRIBUTE_IMPLIED);
3402 }
3403 val = XML_ATTRIBUTE_NONE;
3404 if ((RAW == '#') && (NXT(1) == 'F') &&
3405 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3406 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3407 SKIP(6);
3408 val = XML_ATTRIBUTE_FIXED;
3409 if (!IS_BLANK(CUR)) {
3410 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3412 ctxt->sax->error(ctxt->userData,
3413 "Space required after '#FIXED'\n");
3414 ctxt->wellFormed = 0;
3415 ctxt->disableSAX = 1;
3416 }
3417 SKIP_BLANKS;
3418 }
3419 ret = xmlParseAttValue(ctxt);
3420 ctxt->instate = XML_PARSER_DTD;
3421 if (ret == NULL) {
3422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3423 ctxt->sax->error(ctxt->userData,
3424 "Attribute default value declaration error\n");
3425 ctxt->wellFormed = 0;
3426 ctxt->disableSAX = 1;
3427 } else
3428 *value = ret;
3429 return(val);
3430}
3431
3432/**
3433 * xmlParseNotationType:
3434 * @ctxt: an XML parser context
3435 *
3436 * parse an Notation attribute type.
3437 *
3438 * Note: the leading 'NOTATION' S part has already being parsed...
3439 *
3440 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3441 *
3442 * [ VC: Notation Attributes ]
3443 * Values of this type must match one of the notation names included
3444 * in the declaration; all notation names in the declaration must be declared.
3445 *
3446 * Returns: the notation attribute tree built while parsing
3447 */
3448
3449xmlEnumerationPtr
3450xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3451 xmlChar *name;
3452 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3453
3454 if (RAW != '(') {
3455 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3457 ctxt->sax->error(ctxt->userData,
3458 "'(' required to start 'NOTATION'\n");
3459 ctxt->wellFormed = 0;
3460 ctxt->disableSAX = 1;
3461 return(NULL);
3462 }
3463 SHRINK;
3464 do {
3465 NEXT;
3466 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003467 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003468 if (name == NULL) {
3469 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3471 ctxt->sax->error(ctxt->userData,
3472 "Name expected in NOTATION declaration\n");
3473 ctxt->wellFormed = 0;
3474 ctxt->disableSAX = 1;
3475 return(ret);
3476 }
3477 cur = xmlCreateEnumeration(name);
3478 xmlFree(name);
3479 if (cur == NULL) return(ret);
3480 if (last == NULL) ret = last = cur;
3481 else {
3482 last->next = cur;
3483 last = cur;
3484 }
3485 SKIP_BLANKS;
3486 } while (RAW == '|');
3487 if (RAW != ')') {
3488 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3490 ctxt->sax->error(ctxt->userData,
3491 "')' required to finish NOTATION declaration\n");
3492 ctxt->wellFormed = 0;
3493 ctxt->disableSAX = 1;
3494 if ((last != NULL) && (last != ret))
3495 xmlFreeEnumeration(last);
3496 return(ret);
3497 }
3498 NEXT;
3499 return(ret);
3500}
3501
3502/**
3503 * xmlParseEnumerationType:
3504 * @ctxt: an XML parser context
3505 *
3506 * parse an Enumeration attribute type.
3507 *
3508 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3509 *
3510 * [ VC: Enumeration ]
3511 * Values of this type must match one of the Nmtoken tokens in
3512 * the declaration
3513 *
3514 * Returns: the enumeration attribute tree built while parsing
3515 */
3516
3517xmlEnumerationPtr
3518xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3519 xmlChar *name;
3520 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3521
3522 if (RAW != '(') {
3523 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3525 ctxt->sax->error(ctxt->userData,
3526 "'(' required to start ATTLIST enumeration\n");
3527 ctxt->wellFormed = 0;
3528 ctxt->disableSAX = 1;
3529 return(NULL);
3530 }
3531 SHRINK;
3532 do {
3533 NEXT;
3534 SKIP_BLANKS;
3535 name = xmlParseNmtoken(ctxt);
3536 if (name == NULL) {
3537 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3539 ctxt->sax->error(ctxt->userData,
3540 "NmToken expected in ATTLIST enumeration\n");
3541 ctxt->wellFormed = 0;
3542 ctxt->disableSAX = 1;
3543 return(ret);
3544 }
3545 cur = xmlCreateEnumeration(name);
3546 xmlFree(name);
3547 if (cur == NULL) return(ret);
3548 if (last == NULL) ret = last = cur;
3549 else {
3550 last->next = cur;
3551 last = cur;
3552 }
3553 SKIP_BLANKS;
3554 } while (RAW == '|');
3555 if (RAW != ')') {
3556 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3558 ctxt->sax->error(ctxt->userData,
3559 "')' required to finish ATTLIST enumeration\n");
3560 ctxt->wellFormed = 0;
3561 ctxt->disableSAX = 1;
3562 return(ret);
3563 }
3564 NEXT;
3565 return(ret);
3566}
3567
3568/**
3569 * xmlParseEnumeratedType:
3570 * @ctxt: an XML parser context
3571 * @tree: the enumeration tree built while parsing
3572 *
3573 * parse an Enumerated attribute type.
3574 *
3575 * [57] EnumeratedType ::= NotationType | Enumeration
3576 *
3577 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3578 *
3579 *
3580 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3581 */
3582
3583int
3584xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3585 if ((RAW == 'N') && (NXT(1) == 'O') &&
3586 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3587 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3588 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3589 SKIP(8);
3590 if (!IS_BLANK(CUR)) {
3591 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3593 ctxt->sax->error(ctxt->userData,
3594 "Space required after 'NOTATION'\n");
3595 ctxt->wellFormed = 0;
3596 ctxt->disableSAX = 1;
3597 return(0);
3598 }
3599 SKIP_BLANKS;
3600 *tree = xmlParseNotationType(ctxt);
3601 if (*tree == NULL) return(0);
3602 return(XML_ATTRIBUTE_NOTATION);
3603 }
3604 *tree = xmlParseEnumerationType(ctxt);
3605 if (*tree == NULL) return(0);
3606 return(XML_ATTRIBUTE_ENUMERATION);
3607}
3608
3609/**
3610 * xmlParseAttributeType:
3611 * @ctxt: an XML parser context
3612 * @tree: the enumeration tree built while parsing
3613 *
3614 * parse the Attribute list def for an element
3615 *
3616 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3617 *
3618 * [55] StringType ::= 'CDATA'
3619 *
3620 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3621 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3622 *
3623 * Validity constraints for attribute values syntax are checked in
3624 * xmlValidateAttributeValue()
3625 *
3626 * [ VC: ID ]
3627 * Values of type ID must match the Name production. A name must not
3628 * appear more than once in an XML document as a value of this type;
3629 * i.e., ID values must uniquely identify the elements which bear them.
3630 *
3631 * [ VC: One ID per Element Type ]
3632 * No element type may have more than one ID attribute specified.
3633 *
3634 * [ VC: ID Attribute Default ]
3635 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3636 *
3637 * [ VC: IDREF ]
3638 * Values of type IDREF must match the Name production, and values
3639 * of type IDREFS must match Names; each IDREF Name must match the value
3640 * of an ID attribute on some element in the XML document; i.e. IDREF
3641 * values must match the value of some ID attribute.
3642 *
3643 * [ VC: Entity Name ]
3644 * Values of type ENTITY must match the Name production, values
3645 * of type ENTITIES must match Names; each Entity Name must match the
3646 * name of an unparsed entity declared in the DTD.
3647 *
3648 * [ VC: Name Token ]
3649 * Values of type NMTOKEN must match the Nmtoken production; values
3650 * of type NMTOKENS must match Nmtokens.
3651 *
3652 * Returns the attribute type
3653 */
3654int
3655xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3656 SHRINK;
3657 if ((RAW == 'C') && (NXT(1) == 'D') &&
3658 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3659 (NXT(4) == 'A')) {
3660 SKIP(5);
3661 return(XML_ATTRIBUTE_CDATA);
3662 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3663 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3664 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3665 SKIP(6);
3666 return(XML_ATTRIBUTE_IDREFS);
3667 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3668 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3669 (NXT(4) == 'F')) {
3670 SKIP(5);
3671 return(XML_ATTRIBUTE_IDREF);
3672 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3673 SKIP(2);
3674 return(XML_ATTRIBUTE_ID);
3675 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3676 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3677 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3678 SKIP(6);
3679 return(XML_ATTRIBUTE_ENTITY);
3680 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3681 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3682 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3683 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3684 SKIP(8);
3685 return(XML_ATTRIBUTE_ENTITIES);
3686 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3687 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3688 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3689 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3690 SKIP(8);
3691 return(XML_ATTRIBUTE_NMTOKENS);
3692 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3693 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3694 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3695 (NXT(6) == 'N')) {
3696 SKIP(7);
3697 return(XML_ATTRIBUTE_NMTOKEN);
3698 }
3699 return(xmlParseEnumeratedType(ctxt, tree));
3700}
3701
3702/**
3703 * xmlParseAttributeListDecl:
3704 * @ctxt: an XML parser context
3705 *
3706 * : parse the Attribute list def for an element
3707 *
3708 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3709 *
3710 * [53] AttDef ::= S Name S AttType S DefaultDecl
3711 *
3712 */
3713void
3714xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3715 xmlChar *elemName;
3716 xmlChar *attrName;
3717 xmlEnumerationPtr tree;
3718
3719 if ((RAW == '<') && (NXT(1) == '!') &&
3720 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3721 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3722 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3723 (NXT(8) == 'T')) {
3724 xmlParserInputPtr input = ctxt->input;
3725
3726 SKIP(9);
3727 if (!IS_BLANK(CUR)) {
3728 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3730 ctxt->sax->error(ctxt->userData,
3731 "Space required after '<!ATTLIST'\n");
3732 ctxt->wellFormed = 0;
3733 ctxt->disableSAX = 1;
3734 }
3735 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003736 elemName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003737 if (elemName == NULL) {
3738 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3740 ctxt->sax->error(ctxt->userData,
3741 "ATTLIST: no name for Element\n");
3742 ctxt->wellFormed = 0;
3743 ctxt->disableSAX = 1;
3744 return;
3745 }
3746 SKIP_BLANKS;
3747 GROW;
3748 while (RAW != '>') {
3749 const xmlChar *check = CUR_PTR;
3750 int type;
3751 int def;
3752 xmlChar *defaultValue = NULL;
3753
3754 GROW;
3755 tree = NULL;
Daniel Veillard29631a82001-03-05 09:49:20 +00003756 attrName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003757 if (attrName == NULL) {
3758 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3759 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3760 ctxt->sax->error(ctxt->userData,
3761 "ATTLIST: no name for Attribute\n");
3762 ctxt->wellFormed = 0;
3763 ctxt->disableSAX = 1;
3764 break;
3765 }
3766 GROW;
3767 if (!IS_BLANK(CUR)) {
3768 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3770 ctxt->sax->error(ctxt->userData,
3771 "Space required after the attribute name\n");
3772 ctxt->wellFormed = 0;
3773 ctxt->disableSAX = 1;
3774 if (attrName != NULL)
3775 xmlFree(attrName);
3776 if (defaultValue != NULL)
3777 xmlFree(defaultValue);
3778 break;
3779 }
3780 SKIP_BLANKS;
3781
3782 type = xmlParseAttributeType(ctxt, &tree);
3783 if (type <= 0) {
3784 if (attrName != NULL)
3785 xmlFree(attrName);
3786 if (defaultValue != NULL)
3787 xmlFree(defaultValue);
3788 break;
3789 }
3790
3791 GROW;
3792 if (!IS_BLANK(CUR)) {
3793 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3795 ctxt->sax->error(ctxt->userData,
3796 "Space required after the attribute type\n");
3797 ctxt->wellFormed = 0;
3798 ctxt->disableSAX = 1;
3799 if (attrName != NULL)
3800 xmlFree(attrName);
3801 if (defaultValue != NULL)
3802 xmlFree(defaultValue);
3803 if (tree != NULL)
3804 xmlFreeEnumeration(tree);
3805 break;
3806 }
3807 SKIP_BLANKS;
3808
3809 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3810 if (def <= 0) {
3811 if (attrName != NULL)
3812 xmlFree(attrName);
3813 if (defaultValue != NULL)
3814 xmlFree(defaultValue);
3815 if (tree != NULL)
3816 xmlFreeEnumeration(tree);
3817 break;
3818 }
3819
3820 GROW;
3821 if (RAW != '>') {
3822 if (!IS_BLANK(CUR)) {
3823 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3825 ctxt->sax->error(ctxt->userData,
3826 "Space required after the attribute default value\n");
3827 ctxt->wellFormed = 0;
3828 ctxt->disableSAX = 1;
3829 if (attrName != NULL)
3830 xmlFree(attrName);
3831 if (defaultValue != NULL)
3832 xmlFree(defaultValue);
3833 if (tree != NULL)
3834 xmlFreeEnumeration(tree);
3835 break;
3836 }
3837 SKIP_BLANKS;
3838 }
3839 if (check == CUR_PTR) {
3840 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3842 ctxt->sax->error(ctxt->userData,
3843 "xmlParseAttributeListDecl: detected internal error\n");
3844 if (attrName != NULL)
3845 xmlFree(attrName);
3846 if (defaultValue != NULL)
3847 xmlFree(defaultValue);
3848 if (tree != NULL)
3849 xmlFreeEnumeration(tree);
3850 break;
3851 }
3852 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3853 (ctxt->sax->attributeDecl != NULL))
3854 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3855 type, def, defaultValue, tree);
3856 if (attrName != NULL)
3857 xmlFree(attrName);
3858 if (defaultValue != NULL)
3859 xmlFree(defaultValue);
3860 GROW;
3861 }
3862 if (RAW == '>') {
3863 if (input != ctxt->input) {
3864 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3866 ctxt->sax->error(ctxt->userData,
3867"Attribute list declaration doesn't start and stop in the same entity\n");
3868 ctxt->wellFormed = 0;
3869 ctxt->disableSAX = 1;
3870 }
3871 NEXT;
3872 }
3873
3874 xmlFree(elemName);
3875 }
3876}
3877
3878/**
3879 * xmlParseElementMixedContentDecl:
3880 * @ctxt: an XML parser context
3881 *
3882 * parse the declaration for a Mixed Element content
3883 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3884 *
3885 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3886 * '(' S? '#PCDATA' S? ')'
3887 *
3888 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3889 *
3890 * [ VC: No Duplicate Types ]
3891 * The same name must not appear more than once in a single
3892 * mixed-content declaration.
3893 *
3894 * returns: the list of the xmlElementContentPtr describing the element choices
3895 */
3896xmlElementContentPtr
3897xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3898 xmlElementContentPtr ret = NULL, cur = NULL, n;
3899 xmlChar *elem = NULL;
3900
3901 GROW;
3902 if ((RAW == '#') && (NXT(1) == 'P') &&
3903 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3904 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3905 (NXT(6) == 'A')) {
3906 SKIP(7);
3907 SKIP_BLANKS;
3908 SHRINK;
3909 if (RAW == ')') {
3910 ctxt->entity = ctxt->input;
3911 NEXT;
3912 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3913 if (RAW == '*') {
3914 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3915 NEXT;
3916 }
3917 return(ret);
3918 }
3919 if ((RAW == '(') || (RAW == '|')) {
3920 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3921 if (ret == NULL) return(NULL);
3922 }
3923 while (RAW == '|') {
3924 NEXT;
3925 if (elem == NULL) {
3926 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3927 if (ret == NULL) return(NULL);
3928 ret->c1 = cur;
3929 cur = ret;
3930 } else {
3931 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3932 if (n == NULL) return(NULL);
3933 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3934 cur->c2 = n;
3935 cur = n;
3936 xmlFree(elem);
3937 }
3938 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003939 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003940 if (elem == NULL) {
3941 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3943 ctxt->sax->error(ctxt->userData,
3944 "xmlParseElementMixedContentDecl : Name expected\n");
3945 ctxt->wellFormed = 0;
3946 ctxt->disableSAX = 1;
3947 xmlFreeElementContent(cur);
3948 return(NULL);
3949 }
3950 SKIP_BLANKS;
3951 GROW;
3952 }
3953 if ((RAW == ')') && (NXT(1) == '*')) {
3954 if (elem != NULL) {
3955 cur->c2 = xmlNewElementContent(elem,
3956 XML_ELEMENT_CONTENT_ELEMENT);
3957 xmlFree(elem);
3958 }
3959 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3960 ctxt->entity = ctxt->input;
3961 SKIP(2);
3962 } else {
3963 if (elem != NULL) xmlFree(elem);
3964 xmlFreeElementContent(ret);
3965 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
3966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3967 ctxt->sax->error(ctxt->userData,
3968 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
3969 ctxt->wellFormed = 0;
3970 ctxt->disableSAX = 1;
3971 return(NULL);
3972 }
3973
3974 } else {
3975 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
3976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3977 ctxt->sax->error(ctxt->userData,
3978 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3979 ctxt->wellFormed = 0;
3980 ctxt->disableSAX = 1;
3981 }
3982 return(ret);
3983}
3984
3985/**
3986 * xmlParseElementChildrenContentDecl:
3987 * @ctxt: an XML parser context
3988 *
3989 * parse the declaration for a Mixed Element content
3990 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3991 *
3992 *
3993 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3994 *
3995 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3996 *
3997 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3998 *
3999 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4000 *
4001 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4002 * TODO Parameter-entity replacement text must be properly nested
4003 * with parenthetized groups. That is to say, if either of the
4004 * opening or closing parentheses in a choice, seq, or Mixed
4005 * construct is contained in the replacement text for a parameter
4006 * entity, both must be contained in the same replacement text. For
4007 * interoperability, if a parameter-entity reference appears in a
4008 * choice, seq, or Mixed construct, its replacement text should not
4009 * be empty, and neither the first nor last non-blank character of
4010 * the replacement text should be a connector (| or ,).
4011 *
4012 * returns: the tree of xmlElementContentPtr describing the element
4013 * hierarchy.
4014 */
4015xmlElementContentPtr
4016#ifdef VMS
4017xmlParseElementChildrenContentD
4018#else
4019xmlParseElementChildrenContentDecl
4020#endif
4021(xmlParserCtxtPtr ctxt) {
4022 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4023 xmlChar *elem;
4024 xmlChar type = 0;
4025
4026 SKIP_BLANKS;
4027 GROW;
4028 if (RAW == '(') {
4029 /* Recurse on first child */
4030 NEXT;
4031 SKIP_BLANKS;
4032 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4033 SKIP_BLANKS;
4034 GROW;
4035 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004036 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004037 if (elem == NULL) {
4038 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4040 ctxt->sax->error(ctxt->userData,
4041 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4042 ctxt->wellFormed = 0;
4043 ctxt->disableSAX = 1;
4044 return(NULL);
4045 }
4046 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4047 GROW;
4048 if (RAW == '?') {
4049 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4050 NEXT;
4051 } else if (RAW == '*') {
4052 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4053 NEXT;
4054 } else if (RAW == '+') {
4055 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4056 NEXT;
4057 } else {
4058 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4059 }
4060 xmlFree(elem);
4061 GROW;
4062 }
4063 SKIP_BLANKS;
4064 SHRINK;
4065 while (RAW != ')') {
4066 /*
4067 * Each loop we parse one separator and one element.
4068 */
4069 if (RAW == ',') {
4070 if (type == 0) type = CUR;
4071
4072 /*
4073 * Detect "Name | Name , Name" error
4074 */
4075 else if (type != CUR) {
4076 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4078 ctxt->sax->error(ctxt->userData,
4079 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4080 type);
4081 ctxt->wellFormed = 0;
4082 ctxt->disableSAX = 1;
4083 if ((op != NULL) && (op != ret))
4084 xmlFreeElementContent(op);
4085 if ((last != NULL) && (last != ret) &&
4086 (last != ret->c1) && (last != ret->c2))
4087 xmlFreeElementContent(last);
4088 if (ret != NULL)
4089 xmlFreeElementContent(ret);
4090 return(NULL);
4091 }
4092 NEXT;
4093
4094 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4095 if (op == NULL) {
4096 xmlFreeElementContent(ret);
4097 return(NULL);
4098 }
4099 if (last == NULL) {
4100 op->c1 = ret;
4101 ret = cur = op;
4102 } else {
4103 cur->c2 = op;
4104 op->c1 = last;
4105 cur =op;
4106 last = NULL;
4107 }
4108 } else if (RAW == '|') {
4109 if (type == 0) type = CUR;
4110
4111 /*
4112 * Detect "Name , Name | Name" error
4113 */
4114 else if (type != CUR) {
4115 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4117 ctxt->sax->error(ctxt->userData,
4118 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4119 type);
4120 ctxt->wellFormed = 0;
4121 ctxt->disableSAX = 1;
4122 if ((op != NULL) && (op != ret) && (op != last))
4123 xmlFreeElementContent(op);
4124 if ((last != NULL) && (last != ret) &&
4125 (last != ret->c1) && (last != ret->c2))
4126 xmlFreeElementContent(last);
4127 if (ret != NULL)
4128 xmlFreeElementContent(ret);
4129 return(NULL);
4130 }
4131 NEXT;
4132
4133 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4134 if (op == NULL) {
4135 if ((op != NULL) && (op != ret))
4136 xmlFreeElementContent(op);
4137 if ((last != NULL) && (last != ret) &&
4138 (last != ret->c1) && (last != ret->c2))
4139 xmlFreeElementContent(last);
4140 if (ret != NULL)
4141 xmlFreeElementContent(ret);
4142 return(NULL);
4143 }
4144 if (last == NULL) {
4145 op->c1 = ret;
4146 ret = cur = op;
4147 } else {
4148 cur->c2 = op;
4149 op->c1 = last;
4150 cur =op;
4151 last = NULL;
4152 }
4153 } else {
4154 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4155 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4156 ctxt->sax->error(ctxt->userData,
4157 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4158 ctxt->wellFormed = 0;
4159 ctxt->disableSAX = 1;
4160 if ((op != NULL) && (op != ret))
4161 xmlFreeElementContent(op);
4162 if ((last != NULL) && (last != ret) &&
4163 (last != ret->c1) && (last != ret->c2))
4164 xmlFreeElementContent(last);
4165 if (ret != NULL)
4166 xmlFreeElementContent(ret);
4167 return(NULL);
4168 }
4169 GROW;
4170 SKIP_BLANKS;
4171 GROW;
4172 if (RAW == '(') {
4173 /* Recurse on second child */
4174 NEXT;
4175 SKIP_BLANKS;
4176 last = xmlParseElementChildrenContentDecl(ctxt);
4177 SKIP_BLANKS;
4178 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004179 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004180 if (elem == NULL) {
4181 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4183 ctxt->sax->error(ctxt->userData,
4184 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4185 ctxt->wellFormed = 0;
4186 ctxt->disableSAX = 1;
4187 if ((op != NULL) && (op != ret))
4188 xmlFreeElementContent(op);
4189 if ((last != NULL) && (last != ret) &&
4190 (last != ret->c1) && (last != ret->c2))
4191 xmlFreeElementContent(last);
4192 if (ret != NULL)
4193 xmlFreeElementContent(ret);
4194 return(NULL);
4195 }
4196 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4197 xmlFree(elem);
4198 if (RAW == '?') {
4199 last->ocur = XML_ELEMENT_CONTENT_OPT;
4200 NEXT;
4201 } else if (RAW == '*') {
4202 last->ocur = XML_ELEMENT_CONTENT_MULT;
4203 NEXT;
4204 } else if (RAW == '+') {
4205 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4206 NEXT;
4207 } else {
4208 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4209 }
4210 }
4211 SKIP_BLANKS;
4212 GROW;
4213 }
4214 if ((cur != NULL) && (last != NULL)) {
4215 cur->c2 = last;
4216 }
4217 ctxt->entity = ctxt->input;
4218 NEXT;
4219 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004220 if (ret != NULL)
4221 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004222 NEXT;
4223 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004224 if (ret != NULL)
4225 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004226 NEXT;
4227 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004228 if (ret != NULL)
4229 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004230 NEXT;
4231 }
4232 return(ret);
4233}
4234
4235/**
4236 * xmlParseElementContentDecl:
4237 * @ctxt: an XML parser context
4238 * @name: the name of the element being defined.
4239 * @result: the Element Content pointer will be stored here if any
4240 *
4241 * parse the declaration for an Element content either Mixed or Children,
4242 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4243 *
4244 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4245 *
4246 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4247 */
4248
4249int
4250xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4251 xmlElementContentPtr *result) {
4252
4253 xmlElementContentPtr tree = NULL;
4254 xmlParserInputPtr input = ctxt->input;
4255 int res;
4256
4257 *result = NULL;
4258
4259 if (RAW != '(') {
4260 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4262 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004263 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004264 ctxt->wellFormed = 0;
4265 ctxt->disableSAX = 1;
4266 return(-1);
4267 }
4268 NEXT;
4269 GROW;
4270 SKIP_BLANKS;
4271 if ((RAW == '#') && (NXT(1) == 'P') &&
4272 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4273 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4274 (NXT(6) == 'A')) {
4275 tree = xmlParseElementMixedContentDecl(ctxt);
4276 res = XML_ELEMENT_TYPE_MIXED;
4277 } else {
4278 tree = xmlParseElementChildrenContentDecl(ctxt);
4279 res = XML_ELEMENT_TYPE_ELEMENT;
4280 }
4281 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4282 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4284 ctxt->sax->error(ctxt->userData,
4285"Element content declaration doesn't start and stop in the same entity\n");
4286 ctxt->wellFormed = 0;
4287 ctxt->disableSAX = 1;
4288 }
4289 SKIP_BLANKS;
4290 *result = tree;
4291 return(res);
4292}
4293
4294/**
4295 * xmlParseElementDecl:
4296 * @ctxt: an XML parser context
4297 *
4298 * parse an Element declaration.
4299 *
4300 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4301 *
4302 * [ VC: Unique Element Type Declaration ]
4303 * No element type may be declared more than once
4304 *
4305 * Returns the type of the element, or -1 in case of error
4306 */
4307int
4308xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4309 xmlChar *name;
4310 int ret = -1;
4311 xmlElementContentPtr content = NULL;
4312
4313 GROW;
4314 if ((RAW == '<') && (NXT(1) == '!') &&
4315 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4316 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4317 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4318 (NXT(8) == 'T')) {
4319 xmlParserInputPtr input = ctxt->input;
4320
4321 SKIP(9);
4322 if (!IS_BLANK(CUR)) {
4323 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4325 ctxt->sax->error(ctxt->userData,
4326 "Space required after 'ELEMENT'\n");
4327 ctxt->wellFormed = 0;
4328 ctxt->disableSAX = 1;
4329 }
4330 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00004331 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004332 if (name == NULL) {
4333 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4335 ctxt->sax->error(ctxt->userData,
4336 "xmlParseElementDecl: no name for Element\n");
4337 ctxt->wellFormed = 0;
4338 ctxt->disableSAX = 1;
4339 return(-1);
4340 }
4341 while ((RAW == 0) && (ctxt->inputNr > 1))
4342 xmlPopInput(ctxt);
4343 if (!IS_BLANK(CUR)) {
4344 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4345 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4346 ctxt->sax->error(ctxt->userData,
4347 "Space required after the element name\n");
4348 ctxt->wellFormed = 0;
4349 ctxt->disableSAX = 1;
4350 }
4351 SKIP_BLANKS;
4352 if ((RAW == 'E') && (NXT(1) == 'M') &&
4353 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4354 (NXT(4) == 'Y')) {
4355 SKIP(5);
4356 /*
4357 * Element must always be empty.
4358 */
4359 ret = XML_ELEMENT_TYPE_EMPTY;
4360 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4361 (NXT(2) == 'Y')) {
4362 SKIP(3);
4363 /*
4364 * Element is a generic container.
4365 */
4366 ret = XML_ELEMENT_TYPE_ANY;
4367 } else if (RAW == '(') {
4368 ret = xmlParseElementContentDecl(ctxt, name, &content);
4369 } else {
4370 /*
4371 * [ WFC: PEs in Internal Subset ] error handling.
4372 */
4373 if ((RAW == '%') && (ctxt->external == 0) &&
4374 (ctxt->inputNr == 1)) {
4375 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4377 ctxt->sax->error(ctxt->userData,
4378 "PEReference: forbidden within markup decl in internal subset\n");
4379 } else {
4380 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4382 ctxt->sax->error(ctxt->userData,
4383 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4384 }
4385 ctxt->wellFormed = 0;
4386 ctxt->disableSAX = 1;
4387 if (name != NULL) xmlFree(name);
4388 return(-1);
4389 }
4390
4391 SKIP_BLANKS;
4392 /*
4393 * Pop-up of finished entities.
4394 */
4395 while ((RAW == 0) && (ctxt->inputNr > 1))
4396 xmlPopInput(ctxt);
4397 SKIP_BLANKS;
4398
4399 if (RAW != '>') {
4400 ctxt->errNo = XML_ERR_GT_REQUIRED;
4401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4402 ctxt->sax->error(ctxt->userData,
4403 "xmlParseElementDecl: expected '>' at the end\n");
4404 ctxt->wellFormed = 0;
4405 ctxt->disableSAX = 1;
4406 } else {
4407 if (input != ctxt->input) {
4408 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4410 ctxt->sax->error(ctxt->userData,
4411"Element declaration doesn't start and stop in the same entity\n");
4412 ctxt->wellFormed = 0;
4413 ctxt->disableSAX = 1;
4414 }
4415
4416 NEXT;
4417 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4418 (ctxt->sax->elementDecl != NULL))
4419 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4420 content);
4421 }
4422 if (content != NULL) {
4423 xmlFreeElementContent(content);
4424 }
4425 if (name != NULL) {
4426 xmlFree(name);
4427 }
4428 }
4429 return(ret);
4430}
4431
4432/**
4433 * xmlParseMarkupDecl:
4434 * @ctxt: an XML parser context
4435 *
4436 * parse Markup declarations
4437 *
4438 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4439 * NotationDecl | PI | Comment
4440 *
4441 * [ VC: Proper Declaration/PE Nesting ]
4442 * Parameter-entity replacement text must be properly nested with
4443 * markup declarations. That is to say, if either the first character
4444 * or the last character of a markup declaration (markupdecl above) is
4445 * contained in the replacement text for a parameter-entity reference,
4446 * both must be contained in the same replacement text.
4447 *
4448 * [ WFC: PEs in Internal Subset ]
4449 * In the internal DTD subset, parameter-entity references can occur
4450 * only where markup declarations can occur, not within markup declarations.
4451 * (This does not apply to references that occur in external parameter
4452 * entities or to the external subset.)
4453 */
4454void
4455xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4456 GROW;
4457 xmlParseElementDecl(ctxt);
4458 xmlParseAttributeListDecl(ctxt);
4459 xmlParseEntityDecl(ctxt);
4460 xmlParseNotationDecl(ctxt);
4461 xmlParsePI(ctxt);
4462 xmlParseComment(ctxt);
4463 /*
4464 * This is only for internal subset. On external entities,
4465 * the replacement is done before parsing stage
4466 */
4467 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4468 xmlParsePEReference(ctxt);
4469 ctxt->instate = XML_PARSER_DTD;
4470}
4471
4472/**
4473 * xmlParseTextDecl:
4474 * @ctxt: an XML parser context
4475 *
4476 * parse an XML declaration header for external entities
4477 *
4478 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4479 *
4480 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4481 */
4482
4483void
4484xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4485 xmlChar *version;
4486
4487 /*
4488 * We know that '<?xml' is here.
4489 */
4490 if ((RAW == '<') && (NXT(1) == '?') &&
4491 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4492 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4493 SKIP(5);
4494 } else {
4495 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4497 ctxt->sax->error(ctxt->userData,
4498 "Text declaration '<?xml' required\n");
4499 ctxt->wellFormed = 0;
4500 ctxt->disableSAX = 1;
4501
4502 return;
4503 }
4504
4505 if (!IS_BLANK(CUR)) {
4506 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4508 ctxt->sax->error(ctxt->userData,
4509 "Space needed after '<?xml'\n");
4510 ctxt->wellFormed = 0;
4511 ctxt->disableSAX = 1;
4512 }
4513 SKIP_BLANKS;
4514
4515 /*
4516 * We may have the VersionInfo here.
4517 */
4518 version = xmlParseVersionInfo(ctxt);
4519 if (version == NULL)
4520 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4521 ctxt->input->version = version;
4522
4523 /*
4524 * We must have the encoding declaration
4525 */
4526 if (!IS_BLANK(CUR)) {
4527 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4529 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4530 ctxt->wellFormed = 0;
4531 ctxt->disableSAX = 1;
4532 }
4533 xmlParseEncodingDecl(ctxt);
4534 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4535 /*
4536 * The XML REC instructs us to stop parsing right here
4537 */
4538 return;
4539 }
4540
4541 SKIP_BLANKS;
4542 if ((RAW == '?') && (NXT(1) == '>')) {
4543 SKIP(2);
4544 } else if (RAW == '>') {
4545 /* Deprecated old WD ... */
4546 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4548 ctxt->sax->error(ctxt->userData,
4549 "XML declaration must end-up with '?>'\n");
4550 ctxt->wellFormed = 0;
4551 ctxt->disableSAX = 1;
4552 NEXT;
4553 } else {
4554 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4556 ctxt->sax->error(ctxt->userData,
4557 "parsing XML declaration: '?>' expected\n");
4558 ctxt->wellFormed = 0;
4559 ctxt->disableSAX = 1;
4560 MOVETO_ENDTAG(CUR_PTR);
4561 NEXT;
4562 }
4563}
4564
4565/*
4566 * xmlParseConditionalSections
4567 * @ctxt: an XML parser context
4568 *
4569 * [61] conditionalSect ::= includeSect | ignoreSect
4570 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4571 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4572 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4573 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4574 */
4575
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004576static void
Owen Taylor3473f882001-02-23 17:55:21 +00004577xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4578 SKIP(3);
4579 SKIP_BLANKS;
4580 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4581 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4582 (NXT(6) == 'E')) {
4583 SKIP(7);
4584 SKIP_BLANKS;
4585 if (RAW != '[') {
4586 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4588 ctxt->sax->error(ctxt->userData,
4589 "XML conditional section '[' expected\n");
4590 ctxt->wellFormed = 0;
4591 ctxt->disableSAX = 1;
4592 } else {
4593 NEXT;
4594 }
4595 if (xmlParserDebugEntities) {
4596 if ((ctxt->input != NULL) && (ctxt->input->filename))
4597 xmlGenericError(xmlGenericErrorContext,
4598 "%s(%d): ", ctxt->input->filename,
4599 ctxt->input->line);
4600 xmlGenericError(xmlGenericErrorContext,
4601 "Entering INCLUDE Conditional Section\n");
4602 }
4603
4604 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4605 (NXT(2) != '>'))) {
4606 const xmlChar *check = CUR_PTR;
4607 int cons = ctxt->input->consumed;
4608 int tok = ctxt->token;
4609
4610 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4611 xmlParseConditionalSections(ctxt);
4612 } else if (IS_BLANK(CUR)) {
4613 NEXT;
4614 } else if (RAW == '%') {
4615 xmlParsePEReference(ctxt);
4616 } else
4617 xmlParseMarkupDecl(ctxt);
4618
4619 /*
4620 * Pop-up of finished entities.
4621 */
4622 while ((RAW == 0) && (ctxt->inputNr > 1))
4623 xmlPopInput(ctxt);
4624
4625 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4626 (tok == ctxt->token)) {
4627 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4629 ctxt->sax->error(ctxt->userData,
4630 "Content error in the external subset\n");
4631 ctxt->wellFormed = 0;
4632 ctxt->disableSAX = 1;
4633 break;
4634 }
4635 }
4636 if (xmlParserDebugEntities) {
4637 if ((ctxt->input != NULL) && (ctxt->input->filename))
4638 xmlGenericError(xmlGenericErrorContext,
4639 "%s(%d): ", ctxt->input->filename,
4640 ctxt->input->line);
4641 xmlGenericError(xmlGenericErrorContext,
4642 "Leaving INCLUDE Conditional Section\n");
4643 }
4644
4645 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4646 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4647 int state;
4648 int instate;
4649 int depth = 0;
4650
4651 SKIP(6);
4652 SKIP_BLANKS;
4653 if (RAW != '[') {
4654 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4656 ctxt->sax->error(ctxt->userData,
4657 "XML conditional section '[' expected\n");
4658 ctxt->wellFormed = 0;
4659 ctxt->disableSAX = 1;
4660 } else {
4661 NEXT;
4662 }
4663 if (xmlParserDebugEntities) {
4664 if ((ctxt->input != NULL) && (ctxt->input->filename))
4665 xmlGenericError(xmlGenericErrorContext,
4666 "%s(%d): ", ctxt->input->filename,
4667 ctxt->input->line);
4668 xmlGenericError(xmlGenericErrorContext,
4669 "Entering IGNORE Conditional Section\n");
4670 }
4671
4672 /*
4673 * Parse up to the end of the conditionnal section
4674 * But disable SAX event generating DTD building in the meantime
4675 */
4676 state = ctxt->disableSAX;
4677 instate = ctxt->instate;
4678 ctxt->disableSAX = 1;
4679 ctxt->instate = XML_PARSER_IGNORE;
4680
4681 while (depth >= 0) {
4682 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4683 depth++;
4684 SKIP(3);
4685 continue;
4686 }
4687 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4688 if (--depth >= 0) SKIP(3);
4689 continue;
4690 }
4691 NEXT;
4692 continue;
4693 }
4694
4695 ctxt->disableSAX = state;
4696 ctxt->instate = instate;
4697
4698 if (xmlParserDebugEntities) {
4699 if ((ctxt->input != NULL) && (ctxt->input->filename))
4700 xmlGenericError(xmlGenericErrorContext,
4701 "%s(%d): ", ctxt->input->filename,
4702 ctxt->input->line);
4703 xmlGenericError(xmlGenericErrorContext,
4704 "Leaving IGNORE Conditional Section\n");
4705 }
4706
4707 } else {
4708 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4710 ctxt->sax->error(ctxt->userData,
4711 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4712 ctxt->wellFormed = 0;
4713 ctxt->disableSAX = 1;
4714 }
4715
4716 if (RAW == 0)
4717 SHRINK;
4718
4719 if (RAW == 0) {
4720 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4722 ctxt->sax->error(ctxt->userData,
4723 "XML conditional section not closed\n");
4724 ctxt->wellFormed = 0;
4725 ctxt->disableSAX = 1;
4726 } else {
4727 SKIP(3);
4728 }
4729}
4730
4731/**
4732 * xmlParseExternalSubset:
4733 * @ctxt: an XML parser context
4734 * @ExternalID: the external identifier
4735 * @SystemID: the system identifier (or URL)
4736 *
4737 * parse Markup declarations from an external subset
4738 *
4739 * [30] extSubset ::= textDecl? extSubsetDecl
4740 *
4741 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4742 */
4743void
4744xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4745 const xmlChar *SystemID) {
4746 GROW;
4747 if ((RAW == '<') && (NXT(1) == '?') &&
4748 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4749 (NXT(4) == 'l')) {
4750 xmlParseTextDecl(ctxt);
4751 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4752 /*
4753 * The XML REC instructs us to stop parsing right here
4754 */
4755 ctxt->instate = XML_PARSER_EOF;
4756 return;
4757 }
4758 }
4759 if (ctxt->myDoc == NULL) {
4760 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4761 }
4762 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4763 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4764
4765 ctxt->instate = XML_PARSER_DTD;
4766 ctxt->external = 1;
4767 while (((RAW == '<') && (NXT(1) == '?')) ||
4768 ((RAW == '<') && (NXT(1) == '!')) ||
4769 IS_BLANK(CUR)) {
4770 const xmlChar *check = CUR_PTR;
4771 int cons = ctxt->input->consumed;
4772 int tok = ctxt->token;
4773
4774 GROW;
4775 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4776 xmlParseConditionalSections(ctxt);
4777 } else if (IS_BLANK(CUR)) {
4778 NEXT;
4779 } else if (RAW == '%') {
4780 xmlParsePEReference(ctxt);
4781 } else
4782 xmlParseMarkupDecl(ctxt);
4783
4784 /*
4785 * Pop-up of finished entities.
4786 */
4787 while ((RAW == 0) && (ctxt->inputNr > 1))
4788 xmlPopInput(ctxt);
4789
4790 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4791 (tok == ctxt->token)) {
4792 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4794 ctxt->sax->error(ctxt->userData,
4795 "Content error in the external subset\n");
4796 ctxt->wellFormed = 0;
4797 ctxt->disableSAX = 1;
4798 break;
4799 }
4800 }
4801
4802 if (RAW != 0) {
4803 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4804 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4805 ctxt->sax->error(ctxt->userData,
4806 "Extra content at the end of the document\n");
4807 ctxt->wellFormed = 0;
4808 ctxt->disableSAX = 1;
4809 }
4810
4811}
4812
4813/**
4814 * xmlParseReference:
4815 * @ctxt: an XML parser context
4816 *
4817 * parse and handle entity references in content, depending on the SAX
4818 * interface, this may end-up in a call to character() if this is a
4819 * CharRef, a predefined entity, if there is no reference() callback.
4820 * or if the parser was asked to switch to that mode.
4821 *
4822 * [67] Reference ::= EntityRef | CharRef
4823 */
4824void
4825xmlParseReference(xmlParserCtxtPtr ctxt) {
4826 xmlEntityPtr ent;
4827 xmlChar *val;
4828 if (RAW != '&') return;
4829
4830 if (NXT(1) == '#') {
4831 int i = 0;
4832 xmlChar out[10];
4833 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004834 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004835
4836 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4837 /*
4838 * So we are using non-UTF-8 buffers
4839 * Check that the char fit on 8bits, if not
4840 * generate a CharRef.
4841 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004842 if (value <= 0xFF) {
4843 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004844 out[1] = 0;
4845 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4846 (!ctxt->disableSAX))
4847 ctxt->sax->characters(ctxt->userData, out, 1);
4848 } else {
4849 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004850 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004851 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004852 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004853 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4854 (!ctxt->disableSAX))
4855 ctxt->sax->reference(ctxt->userData, out);
4856 }
4857 } else {
4858 /*
4859 * Just encode the value in UTF-8
4860 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004861 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004862 out[i] = 0;
4863 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4864 (!ctxt->disableSAX))
4865 ctxt->sax->characters(ctxt->userData, out, i);
4866 }
4867 } else {
4868 ent = xmlParseEntityRef(ctxt);
4869 if (ent == NULL) return;
4870 if ((ent->name != NULL) &&
4871 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4872 xmlNodePtr list = NULL;
4873 int ret;
4874
4875
4876 /*
4877 * The first reference to the entity trigger a parsing phase
4878 * where the ent->children is filled with the result from
4879 * the parsing.
4880 */
4881 if (ent->children == NULL) {
4882 xmlChar *value;
4883 value = ent->content;
4884
4885 /*
4886 * Check that this entity is well formed
4887 */
4888 if ((value != NULL) &&
4889 (value[1] == 0) && (value[0] == '<') &&
4890 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4891 /*
4892 * DONE: get definite answer on this !!!
4893 * Lots of entity decls are used to declare a single
4894 * char
4895 * <!ENTITY lt "<">
4896 * Which seems to be valid since
4897 * 2.4: The ampersand character (&) and the left angle
4898 * bracket (<) may appear in their literal form only
4899 * when used ... They are also legal within the literal
4900 * entity value of an internal entity declaration;i
4901 * see "4.3.2 Well-Formed Parsed Entities".
4902 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4903 * Looking at the OASIS test suite and James Clark
4904 * tests, this is broken. However the XML REC uses
4905 * it. Is the XML REC not well-formed ????
4906 * This is a hack to avoid this problem
4907 *
4908 * ANSWER: since lt gt amp .. are already defined,
4909 * this is a redefinition and hence the fact that the
4910 * contentis not well balanced is not a Wf error, this
4911 * is lousy but acceptable.
4912 */
4913 list = xmlNewDocText(ctxt->myDoc, value);
4914 if (list != NULL) {
4915 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4916 (ent->children == NULL)) {
4917 ent->children = list;
4918 ent->last = list;
4919 list->parent = (xmlNodePtr) ent;
4920 } else {
4921 xmlFreeNodeList(list);
4922 }
4923 } else if (list != NULL) {
4924 xmlFreeNodeList(list);
4925 }
4926 } else {
4927 /*
4928 * 4.3.2: An internal general parsed entity is well-formed
4929 * if its replacement text matches the production labeled
4930 * content.
4931 */
4932 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4933 ctxt->depth++;
4934 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4935 ctxt->sax, NULL, ctxt->depth,
4936 value, &list);
4937 ctxt->depth--;
4938 } else if (ent->etype ==
4939 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4940 ctxt->depth++;
4941 ret = xmlParseExternalEntity(ctxt->myDoc,
4942 ctxt->sax, NULL, ctxt->depth,
4943 ent->URI, ent->ExternalID, &list);
4944 ctxt->depth--;
4945 } else {
4946 ret = -1;
4947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4948 ctxt->sax->error(ctxt->userData,
4949 "Internal: invalid entity type\n");
4950 }
4951 if (ret == XML_ERR_ENTITY_LOOP) {
4952 ctxt->errNo = XML_ERR_ENTITY_LOOP;
4953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4954 ctxt->sax->error(ctxt->userData,
4955 "Detected entity reference loop\n");
4956 ctxt->wellFormed = 0;
4957 ctxt->disableSAX = 1;
4958 } else if ((ret == 0) && (list != NULL)) {
4959 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4960 (ent->children == NULL)) {
4961 ent->children = list;
4962 while (list != NULL) {
4963 list->parent = (xmlNodePtr) ent;
4964 if (list->next == NULL)
4965 ent->last = list;
4966 list = list->next;
4967 }
4968 } else {
4969 xmlFreeNodeList(list);
4970 }
4971 } else if (ret > 0) {
4972 ctxt->errNo = ret;
4973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4974 ctxt->sax->error(ctxt->userData,
4975 "Entity value required\n");
4976 ctxt->wellFormed = 0;
4977 ctxt->disableSAX = 1;
4978 } else if (list != NULL) {
4979 xmlFreeNodeList(list);
4980 }
4981 }
4982 }
4983 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4984 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
4985 /*
4986 * Create a node.
4987 */
4988 ctxt->sax->reference(ctxt->userData, ent->name);
4989 return;
4990 } else if (ctxt->replaceEntities) {
4991 if ((ctxt->node != NULL) && (ent->children != NULL)) {
4992 /*
4993 * Seems we are generating the DOM content, do
4994 * a simple tree copy
4995 */
4996 xmlNodePtr new;
4997 new = xmlCopyNodeList(ent->children);
4998
4999 xmlAddChildList(ctxt->node, new);
5000 /*
5001 * This is to avoid a nasty side effect, see
5002 * characters() in SAX.c
5003 */
5004 ctxt->nodemem = 0;
5005 ctxt->nodelen = 0;
5006 return;
5007 } else {
5008 /*
5009 * Probably running in SAX mode
5010 */
5011 xmlParserInputPtr input;
5012
5013 input = xmlNewEntityInputStream(ctxt, ent);
5014 xmlPushInput(ctxt, input);
5015 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5016 (RAW == '<') && (NXT(1) == '?') &&
5017 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5018 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5019 xmlParseTextDecl(ctxt);
5020 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5021 /*
5022 * The XML REC instructs us to stop parsing right here
5023 */
5024 ctxt->instate = XML_PARSER_EOF;
5025 return;
5026 }
5027 if (input->standalone == 1) {
5028 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5030 ctxt->sax->error(ctxt->userData,
5031 "external parsed entities cannot be standalone\n");
5032 ctxt->wellFormed = 0;
5033 ctxt->disableSAX = 1;
5034 }
5035 }
5036 return;
5037 }
5038 }
5039 } else {
5040 val = ent->content;
5041 if (val == NULL) return;
5042 /*
5043 * inline the entity.
5044 */
5045 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5046 (!ctxt->disableSAX))
5047 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5048 }
5049 }
5050}
5051
5052/**
5053 * xmlParseEntityRef:
5054 * @ctxt: an XML parser context
5055 *
5056 * parse ENTITY references declarations
5057 *
5058 * [68] EntityRef ::= '&' Name ';'
5059 *
5060 * [ WFC: Entity Declared ]
5061 * In a document without any DTD, a document with only an internal DTD
5062 * subset which contains no parameter entity references, or a document
5063 * with "standalone='yes'", the Name given in the entity reference
5064 * must match that in an entity declaration, except that well-formed
5065 * documents need not declare any of the following entities: amp, lt,
5066 * gt, apos, quot. The declaration of a parameter entity must precede
5067 * any reference to it. Similarly, the declaration of a general entity
5068 * must precede any reference to it which appears in a default value in an
5069 * attribute-list declaration. Note that if entities are declared in the
5070 * external subset or in external parameter entities, a non-validating
5071 * processor is not obligated to read and process their declarations;
5072 * for such documents, the rule that an entity must be declared is a
5073 * well-formedness constraint only if standalone='yes'.
5074 *
5075 * [ WFC: Parsed Entity ]
5076 * An entity reference must not contain the name of an unparsed entity
5077 *
5078 * Returns the xmlEntityPtr if found, or NULL otherwise.
5079 */
5080xmlEntityPtr
5081xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5082 xmlChar *name;
5083 xmlEntityPtr ent = NULL;
5084
5085 GROW;
5086
5087 if (RAW == '&') {
5088 NEXT;
5089 name = xmlParseName(ctxt);
5090 if (name == NULL) {
5091 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5093 ctxt->sax->error(ctxt->userData,
5094 "xmlParseEntityRef: no name\n");
5095 ctxt->wellFormed = 0;
5096 ctxt->disableSAX = 1;
5097 } else {
5098 if (RAW == ';') {
5099 NEXT;
5100 /*
5101 * Ask first SAX for entity resolution, otherwise try the
5102 * predefined set.
5103 */
5104 if (ctxt->sax != NULL) {
5105 if (ctxt->sax->getEntity != NULL)
5106 ent = ctxt->sax->getEntity(ctxt->userData, name);
5107 if (ent == NULL)
5108 ent = xmlGetPredefinedEntity(name);
5109 }
5110 /*
5111 * [ WFC: Entity Declared ]
5112 * In a document without any DTD, a document with only an
5113 * internal DTD subset which contains no parameter entity
5114 * references, or a document with "standalone='yes'", the
5115 * Name given in the entity reference must match that in an
5116 * entity declaration, except that well-formed documents
5117 * need not declare any of the following entities: amp, lt,
5118 * gt, apos, quot.
5119 * The declaration of a parameter entity must precede any
5120 * reference to it.
5121 * Similarly, the declaration of a general entity must
5122 * precede any reference to it which appears in a default
5123 * value in an attribute-list declaration. Note that if
5124 * entities are declared in the external subset or in
5125 * external parameter entities, a non-validating processor
5126 * is not obligated to read and process their declarations;
5127 * for such documents, the rule that an entity must be
5128 * declared is a well-formedness constraint only if
5129 * standalone='yes'.
5130 */
5131 if (ent == NULL) {
5132 if ((ctxt->standalone == 1) ||
5133 ((ctxt->hasExternalSubset == 0) &&
5134 (ctxt->hasPErefs == 0))) {
5135 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5136 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5137 ctxt->sax->error(ctxt->userData,
5138 "Entity '%s' not defined\n", name);
5139 ctxt->wellFormed = 0;
5140 ctxt->disableSAX = 1;
5141 } else {
5142 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5143 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5144 ctxt->sax->warning(ctxt->userData,
5145 "Entity '%s' not defined\n", name);
5146 }
5147 }
5148
5149 /*
5150 * [ WFC: Parsed Entity ]
5151 * An entity reference must not contain the name of an
5152 * unparsed entity
5153 */
5154 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5155 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5157 ctxt->sax->error(ctxt->userData,
5158 "Entity reference to unparsed entity %s\n", name);
5159 ctxt->wellFormed = 0;
5160 ctxt->disableSAX = 1;
5161 }
5162
5163 /*
5164 * [ WFC: No External Entity References ]
5165 * Attribute values cannot contain direct or indirect
5166 * entity references to external entities.
5167 */
5168 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5169 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5170 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5172 ctxt->sax->error(ctxt->userData,
5173 "Attribute references external entity '%s'\n", name);
5174 ctxt->wellFormed = 0;
5175 ctxt->disableSAX = 1;
5176 }
5177 /*
5178 * [ WFC: No < in Attribute Values ]
5179 * The replacement text of any entity referred to directly or
5180 * indirectly in an attribute value (other than "&lt;") must
5181 * not contain a <.
5182 */
5183 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5184 (ent != NULL) &&
5185 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5186 (ent->content != NULL) &&
5187 (xmlStrchr(ent->content, '<'))) {
5188 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5190 ctxt->sax->error(ctxt->userData,
5191 "'<' in entity '%s' is not allowed in attributes values\n", name);
5192 ctxt->wellFormed = 0;
5193 ctxt->disableSAX = 1;
5194 }
5195
5196 /*
5197 * Internal check, no parameter entities here ...
5198 */
5199 else {
5200 switch (ent->etype) {
5201 case XML_INTERNAL_PARAMETER_ENTITY:
5202 case XML_EXTERNAL_PARAMETER_ENTITY:
5203 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5205 ctxt->sax->error(ctxt->userData,
5206 "Attempt to reference the parameter entity '%s'\n", name);
5207 ctxt->wellFormed = 0;
5208 ctxt->disableSAX = 1;
5209 break;
5210 default:
5211 break;
5212 }
5213 }
5214
5215 /*
5216 * [ WFC: No Recursion ]
5217 * A parsed entity must not contain a recursive reference
5218 * to itself, either directly or indirectly.
5219 * Done somewhere else
5220 */
5221
5222 } else {
5223 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5225 ctxt->sax->error(ctxt->userData,
5226 "xmlParseEntityRef: expecting ';'\n");
5227 ctxt->wellFormed = 0;
5228 ctxt->disableSAX = 1;
5229 }
5230 xmlFree(name);
5231 }
5232 }
5233 return(ent);
5234}
5235
5236/**
5237 * xmlParseStringEntityRef:
5238 * @ctxt: an XML parser context
5239 * @str: a pointer to an index in the string
5240 *
5241 * parse ENTITY references declarations, but this version parses it from
5242 * a string value.
5243 *
5244 * [68] EntityRef ::= '&' Name ';'
5245 *
5246 * [ WFC: Entity Declared ]
5247 * In a document without any DTD, a document with only an internal DTD
5248 * subset which contains no parameter entity references, or a document
5249 * with "standalone='yes'", the Name given in the entity reference
5250 * must match that in an entity declaration, except that well-formed
5251 * documents need not declare any of the following entities: amp, lt,
5252 * gt, apos, quot. The declaration of a parameter entity must precede
5253 * any reference to it. Similarly, the declaration of a general entity
5254 * must precede any reference to it which appears in a default value in an
5255 * attribute-list declaration. Note that if entities are declared in the
5256 * external subset or in external parameter entities, a non-validating
5257 * processor is not obligated to read and process their declarations;
5258 * for such documents, the rule that an entity must be declared is a
5259 * well-formedness constraint only if standalone='yes'.
5260 *
5261 * [ WFC: Parsed Entity ]
5262 * An entity reference must not contain the name of an unparsed entity
5263 *
5264 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5265 * is updated to the current location in the string.
5266 */
5267xmlEntityPtr
5268xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5269 xmlChar *name;
5270 const xmlChar *ptr;
5271 xmlChar cur;
5272 xmlEntityPtr ent = NULL;
5273
5274 if ((str == NULL) || (*str == NULL))
5275 return(NULL);
5276 ptr = *str;
5277 cur = *ptr;
5278 if (cur == '&') {
5279 ptr++;
5280 cur = *ptr;
5281 name = xmlParseStringName(ctxt, &ptr);
5282 if (name == NULL) {
5283 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5285 ctxt->sax->error(ctxt->userData,
5286 "xmlParseEntityRef: no name\n");
5287 ctxt->wellFormed = 0;
5288 ctxt->disableSAX = 1;
5289 } else {
5290 if (*ptr == ';') {
5291 ptr++;
5292 /*
5293 * Ask first SAX for entity resolution, otherwise try the
5294 * predefined set.
5295 */
5296 if (ctxt->sax != NULL) {
5297 if (ctxt->sax->getEntity != NULL)
5298 ent = ctxt->sax->getEntity(ctxt->userData, name);
5299 if (ent == NULL)
5300 ent = xmlGetPredefinedEntity(name);
5301 }
5302 /*
5303 * [ WFC: Entity Declared ]
5304 * In a document without any DTD, a document with only an
5305 * internal DTD subset which contains no parameter entity
5306 * references, or a document with "standalone='yes'", the
5307 * Name given in the entity reference must match that in an
5308 * entity declaration, except that well-formed documents
5309 * need not declare any of the following entities: amp, lt,
5310 * gt, apos, quot.
5311 * The declaration of a parameter entity must precede any
5312 * reference to it.
5313 * Similarly, the declaration of a general entity must
5314 * precede any reference to it which appears in a default
5315 * value in an attribute-list declaration. Note that if
5316 * entities are declared in the external subset or in
5317 * external parameter entities, a non-validating processor
5318 * is not obligated to read and process their declarations;
5319 * for such documents, the rule that an entity must be
5320 * declared is a well-formedness constraint only if
5321 * standalone='yes'.
5322 */
5323 if (ent == NULL) {
5324 if ((ctxt->standalone == 1) ||
5325 ((ctxt->hasExternalSubset == 0) &&
5326 (ctxt->hasPErefs == 0))) {
5327 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5329 ctxt->sax->error(ctxt->userData,
5330 "Entity '%s' not defined\n", name);
5331 ctxt->wellFormed = 0;
5332 ctxt->disableSAX = 1;
5333 } else {
5334 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5335 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5336 ctxt->sax->warning(ctxt->userData,
5337 "Entity '%s' not defined\n", name);
5338 }
5339 }
5340
5341 /*
5342 * [ WFC: Parsed Entity ]
5343 * An entity reference must not contain the name of an
5344 * unparsed entity
5345 */
5346 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5347 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5349 ctxt->sax->error(ctxt->userData,
5350 "Entity reference to unparsed entity %s\n", name);
5351 ctxt->wellFormed = 0;
5352 ctxt->disableSAX = 1;
5353 }
5354
5355 /*
5356 * [ WFC: No External Entity References ]
5357 * Attribute values cannot contain direct or indirect
5358 * entity references to external entities.
5359 */
5360 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5361 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5362 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5364 ctxt->sax->error(ctxt->userData,
5365 "Attribute references external entity '%s'\n", name);
5366 ctxt->wellFormed = 0;
5367 ctxt->disableSAX = 1;
5368 }
5369 /*
5370 * [ WFC: No < in Attribute Values ]
5371 * The replacement text of any entity referred to directly or
5372 * indirectly in an attribute value (other than "&lt;") must
5373 * not contain a <.
5374 */
5375 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5376 (ent != NULL) &&
5377 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5378 (ent->content != NULL) &&
5379 (xmlStrchr(ent->content, '<'))) {
5380 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5382 ctxt->sax->error(ctxt->userData,
5383 "'<' in entity '%s' is not allowed in attributes values\n", name);
5384 ctxt->wellFormed = 0;
5385 ctxt->disableSAX = 1;
5386 }
5387
5388 /*
5389 * Internal check, no parameter entities here ...
5390 */
5391 else {
5392 switch (ent->etype) {
5393 case XML_INTERNAL_PARAMETER_ENTITY:
5394 case XML_EXTERNAL_PARAMETER_ENTITY:
5395 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5397 ctxt->sax->error(ctxt->userData,
5398 "Attempt to reference the parameter entity '%s'\n", name);
5399 ctxt->wellFormed = 0;
5400 ctxt->disableSAX = 1;
5401 break;
5402 default:
5403 break;
5404 }
5405 }
5406
5407 /*
5408 * [ WFC: No Recursion ]
5409 * A parsed entity must not contain a recursive reference
5410 * to itself, either directly or indirectly.
5411 * Done somewhwere else
5412 */
5413
5414 } else {
5415 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5417 ctxt->sax->error(ctxt->userData,
5418 "xmlParseEntityRef: expecting ';'\n");
5419 ctxt->wellFormed = 0;
5420 ctxt->disableSAX = 1;
5421 }
5422 xmlFree(name);
5423 }
5424 }
5425 *str = ptr;
5426 return(ent);
5427}
5428
5429/**
5430 * xmlParsePEReference:
5431 * @ctxt: an XML parser context
5432 *
5433 * parse PEReference declarations
5434 * The entity content is handled directly by pushing it's content as
5435 * a new input stream.
5436 *
5437 * [69] PEReference ::= '%' Name ';'
5438 *
5439 * [ WFC: No Recursion ]
5440 * A parsed entity must not contain a recursive
5441 * reference to itself, either directly or indirectly.
5442 *
5443 * [ WFC: Entity Declared ]
5444 * In a document without any DTD, a document with only an internal DTD
5445 * subset which contains no parameter entity references, or a document
5446 * with "standalone='yes'", ... ... The declaration of a parameter
5447 * entity must precede any reference to it...
5448 *
5449 * [ VC: Entity Declared ]
5450 * In a document with an external subset or external parameter entities
5451 * with "standalone='no'", ... ... The declaration of a parameter entity
5452 * must precede any reference to it...
5453 *
5454 * [ WFC: In DTD ]
5455 * Parameter-entity references may only appear in the DTD.
5456 * NOTE: misleading but this is handled.
5457 */
5458void
5459xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5460 xmlChar *name;
5461 xmlEntityPtr entity = NULL;
5462 xmlParserInputPtr input;
5463
5464 if (RAW == '%') {
5465 NEXT;
Daniel Veillard29631a82001-03-05 09:49:20 +00005466 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005467 if (name == NULL) {
5468 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5470 ctxt->sax->error(ctxt->userData,
5471 "xmlParsePEReference: no name\n");
5472 ctxt->wellFormed = 0;
5473 ctxt->disableSAX = 1;
5474 } else {
5475 if (RAW == ';') {
5476 NEXT;
5477 if ((ctxt->sax != NULL) &&
5478 (ctxt->sax->getParameterEntity != NULL))
5479 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5480 name);
5481 if (entity == NULL) {
5482 /*
5483 * [ WFC: Entity Declared ]
5484 * In a document without any DTD, a document with only an
5485 * internal DTD subset which contains no parameter entity
5486 * references, or a document with "standalone='yes'", ...
5487 * ... The declaration of a parameter entity must precede
5488 * any reference to it...
5489 */
5490 if ((ctxt->standalone == 1) ||
5491 ((ctxt->hasExternalSubset == 0) &&
5492 (ctxt->hasPErefs == 0))) {
5493 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5494 if ((!ctxt->disableSAX) &&
5495 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5496 ctxt->sax->error(ctxt->userData,
5497 "PEReference: %%%s; not found\n", name);
5498 ctxt->wellFormed = 0;
5499 ctxt->disableSAX = 1;
5500 } else {
5501 /*
5502 * [ VC: Entity Declared ]
5503 * In a document with an external subset or external
5504 * parameter entities with "standalone='no'", ...
5505 * ... The declaration of a parameter entity must precede
5506 * any reference to it...
5507 */
5508 if ((!ctxt->disableSAX) &&
5509 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5510 ctxt->sax->warning(ctxt->userData,
5511 "PEReference: %%%s; not found\n", name);
5512 ctxt->valid = 0;
5513 }
5514 } else {
5515 /*
5516 * Internal checking in case the entity quest barfed
5517 */
5518 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5519 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5520 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5521 ctxt->sax->warning(ctxt->userData,
5522 "Internal: %%%s; is not a parameter entity\n", name);
5523 } else {
5524 /*
5525 * TODO !!!
5526 * handle the extra spaces added before and after
5527 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5528 */
5529 input = xmlNewEntityInputStream(ctxt, entity);
5530 xmlPushInput(ctxt, input);
5531 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5532 (RAW == '<') && (NXT(1) == '?') &&
5533 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5534 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5535 xmlParseTextDecl(ctxt);
5536 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5537 /*
5538 * The XML REC instructs us to stop parsing
5539 * right here
5540 */
5541 ctxt->instate = XML_PARSER_EOF;
5542 xmlFree(name);
5543 return;
5544 }
5545 }
5546 if (ctxt->token == 0)
5547 ctxt->token = ' ';
5548 }
5549 }
5550 ctxt->hasPErefs = 1;
5551 } else {
5552 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5554 ctxt->sax->error(ctxt->userData,
5555 "xmlParsePEReference: expecting ';'\n");
5556 ctxt->wellFormed = 0;
5557 ctxt->disableSAX = 1;
5558 }
5559 xmlFree(name);
5560 }
5561 }
5562}
5563
5564/**
5565 * xmlParseStringPEReference:
5566 * @ctxt: an XML parser context
5567 * @str: a pointer to an index in the string
5568 *
5569 * parse PEReference declarations
5570 *
5571 * [69] PEReference ::= '%' Name ';'
5572 *
5573 * [ WFC: No Recursion ]
5574 * A parsed entity must not contain a recursive
5575 * reference to itself, either directly or indirectly.
5576 *
5577 * [ WFC: Entity Declared ]
5578 * In a document without any DTD, a document with only an internal DTD
5579 * subset which contains no parameter entity references, or a document
5580 * with "standalone='yes'", ... ... The declaration of a parameter
5581 * entity must precede any reference to it...
5582 *
5583 * [ VC: Entity Declared ]
5584 * In a document with an external subset or external parameter entities
5585 * with "standalone='no'", ... ... The declaration of a parameter entity
5586 * must precede any reference to it...
5587 *
5588 * [ WFC: In DTD ]
5589 * Parameter-entity references may only appear in the DTD.
5590 * NOTE: misleading but this is handled.
5591 *
5592 * Returns the string of the entity content.
5593 * str is updated to the current value of the index
5594 */
5595xmlEntityPtr
5596xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5597 const xmlChar *ptr;
5598 xmlChar cur;
5599 xmlChar *name;
5600 xmlEntityPtr entity = NULL;
5601
5602 if ((str == NULL) || (*str == NULL)) return(NULL);
5603 ptr = *str;
5604 cur = *ptr;
5605 if (cur == '%') {
5606 ptr++;
5607 cur = *ptr;
5608 name = xmlParseStringName(ctxt, &ptr);
5609 if (name == NULL) {
5610 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5612 ctxt->sax->error(ctxt->userData,
5613 "xmlParseStringPEReference: no name\n");
5614 ctxt->wellFormed = 0;
5615 ctxt->disableSAX = 1;
5616 } else {
5617 cur = *ptr;
5618 if (cur == ';') {
5619 ptr++;
5620 cur = *ptr;
5621 if ((ctxt->sax != NULL) &&
5622 (ctxt->sax->getParameterEntity != NULL))
5623 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5624 name);
5625 if (entity == NULL) {
5626 /*
5627 * [ WFC: Entity Declared ]
5628 * In a document without any DTD, a document with only an
5629 * internal DTD subset which contains no parameter entity
5630 * references, or a document with "standalone='yes'", ...
5631 * ... The declaration of a parameter entity must precede
5632 * any reference to it...
5633 */
5634 if ((ctxt->standalone == 1) ||
5635 ((ctxt->hasExternalSubset == 0) &&
5636 (ctxt->hasPErefs == 0))) {
5637 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5639 ctxt->sax->error(ctxt->userData,
5640 "PEReference: %%%s; not found\n", name);
5641 ctxt->wellFormed = 0;
5642 ctxt->disableSAX = 1;
5643 } else {
5644 /*
5645 * [ VC: Entity Declared ]
5646 * In a document with an external subset or external
5647 * parameter entities with "standalone='no'", ...
5648 * ... The declaration of a parameter entity must
5649 * precede any reference to it...
5650 */
5651 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5652 ctxt->sax->warning(ctxt->userData,
5653 "PEReference: %%%s; not found\n", name);
5654 ctxt->valid = 0;
5655 }
5656 } else {
5657 /*
5658 * Internal checking in case the entity quest barfed
5659 */
5660 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5661 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5662 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5663 ctxt->sax->warning(ctxt->userData,
5664 "Internal: %%%s; is not a parameter entity\n", name);
5665 }
5666 }
5667 ctxt->hasPErefs = 1;
5668 } else {
5669 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5671 ctxt->sax->error(ctxt->userData,
5672 "xmlParseStringPEReference: expecting ';'\n");
5673 ctxt->wellFormed = 0;
5674 ctxt->disableSAX = 1;
5675 }
5676 xmlFree(name);
5677 }
5678 }
5679 *str = ptr;
5680 return(entity);
5681}
5682
5683/**
5684 * xmlParseDocTypeDecl:
5685 * @ctxt: an XML parser context
5686 *
5687 * parse a DOCTYPE declaration
5688 *
5689 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5690 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5691 *
5692 * [ VC: Root Element Type ]
5693 * The Name in the document type declaration must match the element
5694 * type of the root element.
5695 */
5696
5697void
5698xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5699 xmlChar *name = NULL;
5700 xmlChar *ExternalID = NULL;
5701 xmlChar *URI = NULL;
5702
5703 /*
5704 * We know that '<!DOCTYPE' has been detected.
5705 */
5706 SKIP(9);
5707
5708 SKIP_BLANKS;
5709
5710 /*
5711 * Parse the DOCTYPE name.
5712 */
5713 name = xmlParseName(ctxt);
5714 if (name == NULL) {
5715 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5717 ctxt->sax->error(ctxt->userData,
5718 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5719 ctxt->wellFormed = 0;
5720 ctxt->disableSAX = 1;
5721 }
5722 ctxt->intSubName = name;
5723
5724 SKIP_BLANKS;
5725
5726 /*
5727 * Check for SystemID and ExternalID
5728 */
5729 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5730
5731 if ((URI != NULL) || (ExternalID != NULL)) {
5732 ctxt->hasExternalSubset = 1;
5733 }
5734 ctxt->extSubURI = URI;
5735 ctxt->extSubSystem = ExternalID;
5736
5737 SKIP_BLANKS;
5738
5739 /*
5740 * Create and update the internal subset.
5741 */
5742 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5743 (!ctxt->disableSAX))
5744 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5745
5746 /*
5747 * Is there any internal subset declarations ?
5748 * they are handled separately in xmlParseInternalSubset()
5749 */
5750 if (RAW == '[')
5751 return;
5752
5753 /*
5754 * We should be at the end of the DOCTYPE declaration.
5755 */
5756 if (RAW != '>') {
5757 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5759 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5760 ctxt->wellFormed = 0;
5761 ctxt->disableSAX = 1;
5762 }
5763 NEXT;
5764}
5765
5766/**
5767 * xmlParseInternalsubset:
5768 * @ctxt: an XML parser context
5769 *
5770 * parse the internal subset declaration
5771 *
5772 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5773 */
5774
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005775static void
Owen Taylor3473f882001-02-23 17:55:21 +00005776xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5777 /*
5778 * Is there any DTD definition ?
5779 */
5780 if (RAW == '[') {
5781 ctxt->instate = XML_PARSER_DTD;
5782 NEXT;
5783 /*
5784 * Parse the succession of Markup declarations and
5785 * PEReferences.
5786 * Subsequence (markupdecl | PEReference | S)*
5787 */
5788 while (RAW != ']') {
5789 const xmlChar *check = CUR_PTR;
5790 int cons = ctxt->input->consumed;
5791
5792 SKIP_BLANKS;
5793 xmlParseMarkupDecl(ctxt);
5794 xmlParsePEReference(ctxt);
5795
5796 /*
5797 * Pop-up of finished entities.
5798 */
5799 while ((RAW == 0) && (ctxt->inputNr > 1))
5800 xmlPopInput(ctxt);
5801
5802 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5803 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5804 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5805 ctxt->sax->error(ctxt->userData,
5806 "xmlParseInternalSubset: error detected in Markup declaration\n");
5807 ctxt->wellFormed = 0;
5808 ctxt->disableSAX = 1;
5809 break;
5810 }
5811 }
5812 if (RAW == ']') {
5813 NEXT;
5814 SKIP_BLANKS;
5815 }
5816 }
5817
5818 /*
5819 * We should be at the end of the DOCTYPE declaration.
5820 */
5821 if (RAW != '>') {
5822 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5824 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5825 ctxt->wellFormed = 0;
5826 ctxt->disableSAX = 1;
5827 }
5828 NEXT;
5829}
5830
5831/**
5832 * xmlParseAttribute:
5833 * @ctxt: an XML parser context
5834 * @value: a xmlChar ** used to store the value of the attribute
5835 *
5836 * parse an attribute
5837 *
5838 * [41] Attribute ::= Name Eq AttValue
5839 *
5840 * [ WFC: No External Entity References ]
5841 * Attribute values cannot contain direct or indirect entity references
5842 * to external entities.
5843 *
5844 * [ WFC: No < in Attribute Values ]
5845 * The replacement text of any entity referred to directly or indirectly in
5846 * an attribute value (other than "&lt;") must not contain a <.
5847 *
5848 * [ VC: Attribute Value Type ]
5849 * The attribute must have been declared; the value must be of the type
5850 * declared for it.
5851 *
5852 * [25] Eq ::= S? '=' S?
5853 *
5854 * With namespace:
5855 *
5856 * [NS 11] Attribute ::= QName Eq AttValue
5857 *
5858 * Also the case QName == xmlns:??? is handled independently as a namespace
5859 * definition.
5860 *
5861 * Returns the attribute name, and the value in *value.
5862 */
5863
5864xmlChar *
5865xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5866 xmlChar *name, *val;
5867
5868 *value = NULL;
5869 name = xmlParseName(ctxt);
5870 if (name == NULL) {
5871 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5873 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5874 ctxt->wellFormed = 0;
5875 ctxt->disableSAX = 1;
5876 return(NULL);
5877 }
5878
5879 /*
5880 * read the value
5881 */
5882 SKIP_BLANKS;
5883 if (RAW == '=') {
5884 NEXT;
5885 SKIP_BLANKS;
5886 val = xmlParseAttValue(ctxt);
5887 ctxt->instate = XML_PARSER_CONTENT;
5888 } else {
5889 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5890 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5891 ctxt->sax->error(ctxt->userData,
5892 "Specification mandate value for attribute %s\n", name);
5893 ctxt->wellFormed = 0;
5894 ctxt->disableSAX = 1;
5895 xmlFree(name);
5896 return(NULL);
5897 }
5898
5899 /*
5900 * Check that xml:lang conforms to the specification
5901 * No more registered as an error, just generate a warning now
5902 * since this was deprecated in XML second edition
5903 */
5904 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5905 if (!xmlCheckLanguageID(val)) {
5906 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5907 ctxt->sax->warning(ctxt->userData,
5908 "Malformed value for xml:lang : %s\n", val);
5909 }
5910 }
5911
5912 /*
5913 * Check that xml:space conforms to the specification
5914 */
5915 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5916 if (xmlStrEqual(val, BAD_CAST "default"))
5917 *(ctxt->space) = 0;
5918 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5919 *(ctxt->space) = 1;
5920 else {
5921 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5923 ctxt->sax->error(ctxt->userData,
5924"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5925 val);
5926 ctxt->wellFormed = 0;
5927 ctxt->disableSAX = 1;
5928 }
5929 }
5930
5931 *value = val;
5932 return(name);
5933}
5934
5935/**
5936 * xmlParseStartTag:
5937 * @ctxt: an XML parser context
5938 *
5939 * parse a start of tag either for rule element or
5940 * EmptyElement. In both case we don't parse the tag closing chars.
5941 *
5942 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5943 *
5944 * [ WFC: Unique Att Spec ]
5945 * No attribute name may appear more than once in the same start-tag or
5946 * empty-element tag.
5947 *
5948 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5949 *
5950 * [ WFC: Unique Att Spec ]
5951 * No attribute name may appear more than once in the same start-tag or
5952 * empty-element tag.
5953 *
5954 * With namespace:
5955 *
5956 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5957 *
5958 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
5959 *
5960 * Returns the element name parsed
5961 */
5962
5963xmlChar *
5964xmlParseStartTag(xmlParserCtxtPtr ctxt) {
5965 xmlChar *name;
5966 xmlChar *attname;
5967 xmlChar *attvalue;
5968 const xmlChar **atts = NULL;
5969 int nbatts = 0;
5970 int maxatts = 0;
5971 int i;
5972
5973 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00005974 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00005975
5976 name = xmlParseName(ctxt);
5977 if (name == NULL) {
5978 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5979 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5980 ctxt->sax->error(ctxt->userData,
5981 "xmlParseStartTag: invalid element name\n");
5982 ctxt->wellFormed = 0;
5983 ctxt->disableSAX = 1;
5984 return(NULL);
5985 }
5986
5987 /*
5988 * Now parse the attributes, it ends up with the ending
5989 *
5990 * (S Attribute)* S?
5991 */
5992 SKIP_BLANKS;
5993 GROW;
5994
Daniel Veillard21a0f912001-02-25 19:54:14 +00005995 while ((RAW != '>') &&
5996 ((RAW != '/') || (NXT(1) != '>')) &&
5997 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005998 const xmlChar *q = CUR_PTR;
5999 int cons = ctxt->input->consumed;
6000
6001 attname = xmlParseAttribute(ctxt, &attvalue);
6002 if ((attname != NULL) && (attvalue != NULL)) {
6003 /*
6004 * [ WFC: Unique Att Spec ]
6005 * No attribute name may appear more than once in the same
6006 * start-tag or empty-element tag.
6007 */
6008 for (i = 0; i < nbatts;i += 2) {
6009 if (xmlStrEqual(atts[i], attname)) {
6010 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6012 ctxt->sax->error(ctxt->userData,
6013 "Attribute %s redefined\n",
6014 attname);
6015 ctxt->wellFormed = 0;
6016 ctxt->disableSAX = 1;
6017 xmlFree(attname);
6018 xmlFree(attvalue);
6019 goto failed;
6020 }
6021 }
6022
6023 /*
6024 * Add the pair to atts
6025 */
6026 if (atts == NULL) {
6027 maxatts = 10;
6028 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6029 if (atts == NULL) {
6030 xmlGenericError(xmlGenericErrorContext,
6031 "malloc of %ld byte failed\n",
6032 maxatts * (long)sizeof(xmlChar *));
6033 return(NULL);
6034 }
6035 } else if (nbatts + 4 > maxatts) {
6036 maxatts *= 2;
6037 atts = (const xmlChar **) xmlRealloc((void *) atts,
6038 maxatts * sizeof(xmlChar *));
6039 if (atts == NULL) {
6040 xmlGenericError(xmlGenericErrorContext,
6041 "realloc of %ld byte failed\n",
6042 maxatts * (long)sizeof(xmlChar *));
6043 return(NULL);
6044 }
6045 }
6046 atts[nbatts++] = attname;
6047 atts[nbatts++] = attvalue;
6048 atts[nbatts] = NULL;
6049 atts[nbatts + 1] = NULL;
6050 } else {
6051 if (attname != NULL)
6052 xmlFree(attname);
6053 if (attvalue != NULL)
6054 xmlFree(attvalue);
6055 }
6056
6057failed:
6058
6059 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6060 break;
6061 if (!IS_BLANK(RAW)) {
6062 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6063 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6064 ctxt->sax->error(ctxt->userData,
6065 "attributes construct error\n");
6066 ctxt->wellFormed = 0;
6067 ctxt->disableSAX = 1;
6068 }
6069 SKIP_BLANKS;
6070 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6071 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6073 ctxt->sax->error(ctxt->userData,
6074 "xmlParseStartTag: problem parsing attributes\n");
6075 ctxt->wellFormed = 0;
6076 ctxt->disableSAX = 1;
6077 break;
6078 }
6079 GROW;
6080 }
6081
6082 /*
6083 * SAX: Start of Element !
6084 */
6085 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6086 (!ctxt->disableSAX))
6087 ctxt->sax->startElement(ctxt->userData, name, atts);
6088
6089 if (atts != NULL) {
6090 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6091 xmlFree((void *) atts);
6092 }
6093 return(name);
6094}
6095
6096/**
6097 * xmlParseEndTag:
6098 * @ctxt: an XML parser context
6099 *
6100 * parse an end of tag
6101 *
6102 * [42] ETag ::= '</' Name S? '>'
6103 *
6104 * With namespace
6105 *
6106 * [NS 9] ETag ::= '</' QName S? '>'
6107 */
6108
6109void
6110xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6111 xmlChar *name;
6112 xmlChar *oldname;
6113
6114 GROW;
6115 if ((RAW != '<') || (NXT(1) != '/')) {
6116 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6118 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6119 ctxt->wellFormed = 0;
6120 ctxt->disableSAX = 1;
6121 return;
6122 }
6123 SKIP(2);
6124
6125 name = xmlParseName(ctxt);
6126
6127 /*
6128 * We should definitely be at the ending "S? '>'" part
6129 */
6130 GROW;
6131 SKIP_BLANKS;
6132 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6133 ctxt->errNo = XML_ERR_GT_REQUIRED;
6134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6135 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6136 ctxt->wellFormed = 0;
6137 ctxt->disableSAX = 1;
6138 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006139 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006140
6141 /*
6142 * [ WFC: Element Type Match ]
6143 * The Name in an element's end-tag must match the element type in the
6144 * start-tag.
6145 *
6146 */
6147 if ((name == NULL) || (ctxt->name == NULL) ||
6148 (!xmlStrEqual(name, ctxt->name))) {
6149 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6150 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6151 if ((name != NULL) && (ctxt->name != NULL)) {
6152 ctxt->sax->error(ctxt->userData,
6153 "Opening and ending tag mismatch: %s and %s\n",
6154 ctxt->name, name);
6155 } else if (ctxt->name != NULL) {
6156 ctxt->sax->error(ctxt->userData,
6157 "Ending tag eror for: %s\n", ctxt->name);
6158 } else {
6159 ctxt->sax->error(ctxt->userData,
6160 "Ending tag error: internal error ???\n");
6161 }
6162
6163 }
6164 ctxt->wellFormed = 0;
6165 ctxt->disableSAX = 1;
6166 }
6167
6168 /*
6169 * SAX: End of Tag
6170 */
6171 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6172 (!ctxt->disableSAX))
6173 ctxt->sax->endElement(ctxt->userData, name);
6174
6175 if (name != NULL)
6176 xmlFree(name);
6177 oldname = namePop(ctxt);
6178 spacePop(ctxt);
6179 if (oldname != NULL) {
6180#ifdef DEBUG_STACK
6181 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6182#endif
6183 xmlFree(oldname);
6184 }
6185 return;
6186}
6187
6188/**
6189 * xmlParseCDSect:
6190 * @ctxt: an XML parser context
6191 *
6192 * Parse escaped pure raw content.
6193 *
6194 * [18] CDSect ::= CDStart CData CDEnd
6195 *
6196 * [19] CDStart ::= '<![CDATA['
6197 *
6198 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6199 *
6200 * [21] CDEnd ::= ']]>'
6201 */
6202void
6203xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6204 xmlChar *buf = NULL;
6205 int len = 0;
6206 int size = XML_PARSER_BUFFER_SIZE;
6207 int r, rl;
6208 int s, sl;
6209 int cur, l;
6210 int count = 0;
6211
6212 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6213 (NXT(2) == '[') && (NXT(3) == 'C') &&
6214 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6215 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6216 (NXT(8) == '[')) {
6217 SKIP(9);
6218 } else
6219 return;
6220
6221 ctxt->instate = XML_PARSER_CDATA_SECTION;
6222 r = CUR_CHAR(rl);
6223 if (!IS_CHAR(r)) {
6224 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6226 ctxt->sax->error(ctxt->userData,
6227 "CData section not finished\n");
6228 ctxt->wellFormed = 0;
6229 ctxt->disableSAX = 1;
6230 ctxt->instate = XML_PARSER_CONTENT;
6231 return;
6232 }
6233 NEXTL(rl);
6234 s = CUR_CHAR(sl);
6235 if (!IS_CHAR(s)) {
6236 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6238 ctxt->sax->error(ctxt->userData,
6239 "CData section not finished\n");
6240 ctxt->wellFormed = 0;
6241 ctxt->disableSAX = 1;
6242 ctxt->instate = XML_PARSER_CONTENT;
6243 return;
6244 }
6245 NEXTL(sl);
6246 cur = CUR_CHAR(l);
6247 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6248 if (buf == NULL) {
6249 xmlGenericError(xmlGenericErrorContext,
6250 "malloc of %d byte failed\n", size);
6251 return;
6252 }
6253 while (IS_CHAR(cur) &&
6254 ((r != ']') || (s != ']') || (cur != '>'))) {
6255 if (len + 5 >= size) {
6256 size *= 2;
6257 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6258 if (buf == NULL) {
6259 xmlGenericError(xmlGenericErrorContext,
6260 "realloc of %d byte failed\n", size);
6261 return;
6262 }
6263 }
6264 COPY_BUF(rl,buf,len,r);
6265 r = s;
6266 rl = sl;
6267 s = cur;
6268 sl = l;
6269 count++;
6270 if (count > 50) {
6271 GROW;
6272 count = 0;
6273 }
6274 NEXTL(l);
6275 cur = CUR_CHAR(l);
6276 }
6277 buf[len] = 0;
6278 ctxt->instate = XML_PARSER_CONTENT;
6279 if (cur != '>') {
6280 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6281 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6282 ctxt->sax->error(ctxt->userData,
6283 "CData section not finished\n%.50s\n", buf);
6284 ctxt->wellFormed = 0;
6285 ctxt->disableSAX = 1;
6286 xmlFree(buf);
6287 return;
6288 }
6289 NEXTL(l);
6290
6291 /*
6292 * Ok the buffer is to be consumed as cdata.
6293 */
6294 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6295 if (ctxt->sax->cdataBlock != NULL)
6296 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6297 }
6298 xmlFree(buf);
6299}
6300
6301/**
6302 * xmlParseContent:
6303 * @ctxt: an XML parser context
6304 *
6305 * Parse a content:
6306 *
6307 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6308 */
6309
6310void
6311xmlParseContent(xmlParserCtxtPtr ctxt) {
6312 GROW;
6313 while (((RAW != 0) || (ctxt->token != 0)) &&
6314 ((RAW != '<') || (NXT(1) != '/'))) {
6315 const xmlChar *test = CUR_PTR;
6316 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006317 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006318 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006319
6320 /*
6321 * Handle possible processed charrefs.
6322 */
6323 if (ctxt->token != 0) {
6324 xmlParseCharData(ctxt, 0);
6325 }
6326 /*
6327 * First case : a Processing Instruction.
6328 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006329 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006330 xmlParsePI(ctxt);
6331 }
6332
6333 /*
6334 * Second case : a CDSection
6335 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006336 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006337 (NXT(2) == '[') && (NXT(3) == 'C') &&
6338 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6339 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6340 (NXT(8) == '[')) {
6341 xmlParseCDSect(ctxt);
6342 }
6343
6344 /*
6345 * Third case : a comment
6346 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006347 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006348 (NXT(2) == '-') && (NXT(3) == '-')) {
6349 xmlParseComment(ctxt);
6350 ctxt->instate = XML_PARSER_CONTENT;
6351 }
6352
6353 /*
6354 * Fourth case : a sub-element.
6355 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006356 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006357 xmlParseElement(ctxt);
6358 }
6359
6360 /*
6361 * Fifth case : a reference. If if has not been resolved,
6362 * parsing returns it's Name, create the node
6363 */
6364
Daniel Veillard21a0f912001-02-25 19:54:14 +00006365 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006366 xmlParseReference(ctxt);
6367 }
6368
6369 /*
6370 * Last case, text. Note that References are handled directly.
6371 */
6372 else {
6373 xmlParseCharData(ctxt, 0);
6374 }
6375
6376 GROW;
6377 /*
6378 * Pop-up of finished entities.
6379 */
6380 while ((RAW == 0) && (ctxt->inputNr > 1))
6381 xmlPopInput(ctxt);
6382 SHRINK;
6383
6384 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6385 (tok == ctxt->token)) {
6386 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6388 ctxt->sax->error(ctxt->userData,
6389 "detected an error in element content\n");
6390 ctxt->wellFormed = 0;
6391 ctxt->disableSAX = 1;
6392 ctxt->instate = XML_PARSER_EOF;
6393 break;
6394 }
6395 }
6396}
6397
6398/**
6399 * xmlParseElement:
6400 * @ctxt: an XML parser context
6401 *
6402 * parse an XML element, this is highly recursive
6403 *
6404 * [39] element ::= EmptyElemTag | STag content ETag
6405 *
6406 * [ WFC: Element Type Match ]
6407 * The Name in an element's end-tag must match the element type in the
6408 * start-tag.
6409 *
6410 * [ VC: Element Valid ]
6411 * An element is valid if there is a declaration matching elementdecl
6412 * where the Name matches the element type and one of the following holds:
6413 * - The declaration matches EMPTY and the element has no content.
6414 * - The declaration matches children and the sequence of child elements
6415 * belongs to the language generated by the regular expression in the
6416 * content model, with optional white space (characters matching the
6417 * nonterminal S) between each pair of child elements.
6418 * - The declaration matches Mixed and the content consists of character
6419 * data and child elements whose types match names in the content model.
6420 * - The declaration matches ANY, and the types of any child elements have
6421 * been declared.
6422 */
6423
6424void
6425xmlParseElement(xmlParserCtxtPtr ctxt) {
6426 const xmlChar *openTag = CUR_PTR;
6427 xmlChar *name;
6428 xmlChar *oldname;
6429 xmlParserNodeInfo node_info;
6430 xmlNodePtr ret;
6431
6432 /* Capture start position */
6433 if (ctxt->record_info) {
6434 node_info.begin_pos = ctxt->input->consumed +
6435 (CUR_PTR - ctxt->input->base);
6436 node_info.begin_line = ctxt->input->line;
6437 }
6438
6439 if (ctxt->spaceNr == 0)
6440 spacePush(ctxt, -1);
6441 else
6442 spacePush(ctxt, *ctxt->space);
6443
6444 name = xmlParseStartTag(ctxt);
6445 if (name == NULL) {
6446 spacePop(ctxt);
6447 return;
6448 }
6449 namePush(ctxt, name);
6450 ret = ctxt->node;
6451
6452 /*
6453 * [ VC: Root Element Type ]
6454 * The Name in the document type declaration must match the element
6455 * type of the root element.
6456 */
6457 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6458 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6459 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6460
6461 /*
6462 * Check for an Empty Element.
6463 */
6464 if ((RAW == '/') && (NXT(1) == '>')) {
6465 SKIP(2);
6466 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6467 (!ctxt->disableSAX))
6468 ctxt->sax->endElement(ctxt->userData, name);
6469 oldname = namePop(ctxt);
6470 spacePop(ctxt);
6471 if (oldname != NULL) {
6472#ifdef DEBUG_STACK
6473 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6474#endif
6475 xmlFree(oldname);
6476 }
6477 if ( ret != NULL && ctxt->record_info ) {
6478 node_info.end_pos = ctxt->input->consumed +
6479 (CUR_PTR - ctxt->input->base);
6480 node_info.end_line = ctxt->input->line;
6481 node_info.node = ret;
6482 xmlParserAddNodeInfo(ctxt, &node_info);
6483 }
6484 return;
6485 }
6486 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006487 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006488 } else {
6489 ctxt->errNo = XML_ERR_GT_REQUIRED;
6490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6491 ctxt->sax->error(ctxt->userData,
6492 "Couldn't find end of Start Tag\n%.30s\n",
6493 openTag);
6494 ctxt->wellFormed = 0;
6495 ctxt->disableSAX = 1;
6496
6497 /*
6498 * end of parsing of this node.
6499 */
6500 nodePop(ctxt);
6501 oldname = namePop(ctxt);
6502 spacePop(ctxt);
6503 if (oldname != NULL) {
6504#ifdef DEBUG_STACK
6505 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6506#endif
6507 xmlFree(oldname);
6508 }
6509
6510 /*
6511 * Capture end position and add node
6512 */
6513 if ( ret != NULL && ctxt->record_info ) {
6514 node_info.end_pos = ctxt->input->consumed +
6515 (CUR_PTR - ctxt->input->base);
6516 node_info.end_line = ctxt->input->line;
6517 node_info.node = ret;
6518 xmlParserAddNodeInfo(ctxt, &node_info);
6519 }
6520 return;
6521 }
6522
6523 /*
6524 * Parse the content of the element:
6525 */
6526 xmlParseContent(ctxt);
6527 if (!IS_CHAR(RAW)) {
6528 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6530 ctxt->sax->error(ctxt->userData,
6531 "Premature end of data in tag %.30s\n", openTag);
6532 ctxt->wellFormed = 0;
6533 ctxt->disableSAX = 1;
6534
6535 /*
6536 * end of parsing of this node.
6537 */
6538 nodePop(ctxt);
6539 oldname = namePop(ctxt);
6540 spacePop(ctxt);
6541 if (oldname != NULL) {
6542#ifdef DEBUG_STACK
6543 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6544#endif
6545 xmlFree(oldname);
6546 }
6547 return;
6548 }
6549
6550 /*
6551 * parse the end of tag: '</' should be here.
6552 */
6553 xmlParseEndTag(ctxt);
6554
6555 /*
6556 * Capture end position and add node
6557 */
6558 if ( ret != NULL && ctxt->record_info ) {
6559 node_info.end_pos = ctxt->input->consumed +
6560 (CUR_PTR - ctxt->input->base);
6561 node_info.end_line = ctxt->input->line;
6562 node_info.node = ret;
6563 xmlParserAddNodeInfo(ctxt, &node_info);
6564 }
6565}
6566
6567/**
6568 * xmlParseVersionNum:
6569 * @ctxt: an XML parser context
6570 *
6571 * parse the XML version value.
6572 *
6573 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6574 *
6575 * Returns the string giving the XML version number, or NULL
6576 */
6577xmlChar *
6578xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6579 xmlChar *buf = NULL;
6580 int len = 0;
6581 int size = 10;
6582 xmlChar cur;
6583
6584 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6585 if (buf == NULL) {
6586 xmlGenericError(xmlGenericErrorContext,
6587 "malloc of %d byte failed\n", size);
6588 return(NULL);
6589 }
6590 cur = CUR;
6591 while (((cur >= 'a') && (cur <= 'z')) ||
6592 ((cur >= 'A') && (cur <= 'Z')) ||
6593 ((cur >= '0') && (cur <= '9')) ||
6594 (cur == '_') || (cur == '.') ||
6595 (cur == ':') || (cur == '-')) {
6596 if (len + 1 >= size) {
6597 size *= 2;
6598 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6599 if (buf == NULL) {
6600 xmlGenericError(xmlGenericErrorContext,
6601 "realloc of %d byte failed\n", size);
6602 return(NULL);
6603 }
6604 }
6605 buf[len++] = cur;
6606 NEXT;
6607 cur=CUR;
6608 }
6609 buf[len] = 0;
6610 return(buf);
6611}
6612
6613/**
6614 * xmlParseVersionInfo:
6615 * @ctxt: an XML parser context
6616 *
6617 * parse the XML version.
6618 *
6619 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6620 *
6621 * [25] Eq ::= S? '=' S?
6622 *
6623 * Returns the version string, e.g. "1.0"
6624 */
6625
6626xmlChar *
6627xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6628 xmlChar *version = NULL;
6629 const xmlChar *q;
6630
6631 if ((RAW == 'v') && (NXT(1) == 'e') &&
6632 (NXT(2) == 'r') && (NXT(3) == 's') &&
6633 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6634 (NXT(6) == 'n')) {
6635 SKIP(7);
6636 SKIP_BLANKS;
6637 if (RAW != '=') {
6638 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6640 ctxt->sax->error(ctxt->userData,
6641 "xmlParseVersionInfo : expected '='\n");
6642 ctxt->wellFormed = 0;
6643 ctxt->disableSAX = 1;
6644 return(NULL);
6645 }
6646 NEXT;
6647 SKIP_BLANKS;
6648 if (RAW == '"') {
6649 NEXT;
6650 q = CUR_PTR;
6651 version = xmlParseVersionNum(ctxt);
6652 if (RAW != '"') {
6653 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6655 ctxt->sax->error(ctxt->userData,
6656 "String not closed\n%.50s\n", q);
6657 ctxt->wellFormed = 0;
6658 ctxt->disableSAX = 1;
6659 } else
6660 NEXT;
6661 } else if (RAW == '\''){
6662 NEXT;
6663 q = CUR_PTR;
6664 version = xmlParseVersionNum(ctxt);
6665 if (RAW != '\'') {
6666 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6668 ctxt->sax->error(ctxt->userData,
6669 "String not closed\n%.50s\n", q);
6670 ctxt->wellFormed = 0;
6671 ctxt->disableSAX = 1;
6672 } else
6673 NEXT;
6674 } else {
6675 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6677 ctxt->sax->error(ctxt->userData,
6678 "xmlParseVersionInfo : expected ' or \"\n");
6679 ctxt->wellFormed = 0;
6680 ctxt->disableSAX = 1;
6681 }
6682 }
6683 return(version);
6684}
6685
6686/**
6687 * xmlParseEncName:
6688 * @ctxt: an XML parser context
6689 *
6690 * parse the XML encoding name
6691 *
6692 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6693 *
6694 * Returns the encoding name value or NULL
6695 */
6696xmlChar *
6697xmlParseEncName(xmlParserCtxtPtr ctxt) {
6698 xmlChar *buf = NULL;
6699 int len = 0;
6700 int size = 10;
6701 xmlChar cur;
6702
6703 cur = CUR;
6704 if (((cur >= 'a') && (cur <= 'z')) ||
6705 ((cur >= 'A') && (cur <= 'Z'))) {
6706 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6707 if (buf == NULL) {
6708 xmlGenericError(xmlGenericErrorContext,
6709 "malloc of %d byte failed\n", size);
6710 return(NULL);
6711 }
6712
6713 buf[len++] = cur;
6714 NEXT;
6715 cur = CUR;
6716 while (((cur >= 'a') && (cur <= 'z')) ||
6717 ((cur >= 'A') && (cur <= 'Z')) ||
6718 ((cur >= '0') && (cur <= '9')) ||
6719 (cur == '.') || (cur == '_') ||
6720 (cur == '-')) {
6721 if (len + 1 >= size) {
6722 size *= 2;
6723 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6724 if (buf == NULL) {
6725 xmlGenericError(xmlGenericErrorContext,
6726 "realloc of %d byte failed\n", size);
6727 return(NULL);
6728 }
6729 }
6730 buf[len++] = cur;
6731 NEXT;
6732 cur = CUR;
6733 if (cur == 0) {
6734 SHRINK;
6735 GROW;
6736 cur = CUR;
6737 }
6738 }
6739 buf[len] = 0;
6740 } else {
6741 ctxt->errNo = XML_ERR_ENCODING_NAME;
6742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6743 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6744 ctxt->wellFormed = 0;
6745 ctxt->disableSAX = 1;
6746 }
6747 return(buf);
6748}
6749
6750/**
6751 * xmlParseEncodingDecl:
6752 * @ctxt: an XML parser context
6753 *
6754 * parse the XML encoding declaration
6755 *
6756 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6757 *
6758 * this setups the conversion filters.
6759 *
6760 * Returns the encoding value or NULL
6761 */
6762
6763xmlChar *
6764xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6765 xmlChar *encoding = NULL;
6766 const xmlChar *q;
6767
6768 SKIP_BLANKS;
6769 if ((RAW == 'e') && (NXT(1) == 'n') &&
6770 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6771 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6772 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6773 SKIP(8);
6774 SKIP_BLANKS;
6775 if (RAW != '=') {
6776 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6778 ctxt->sax->error(ctxt->userData,
6779 "xmlParseEncodingDecl : expected '='\n");
6780 ctxt->wellFormed = 0;
6781 ctxt->disableSAX = 1;
6782 return(NULL);
6783 }
6784 NEXT;
6785 SKIP_BLANKS;
6786 if (RAW == '"') {
6787 NEXT;
6788 q = CUR_PTR;
6789 encoding = xmlParseEncName(ctxt);
6790 if (RAW != '"') {
6791 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6793 ctxt->sax->error(ctxt->userData,
6794 "String not closed\n%.50s\n", q);
6795 ctxt->wellFormed = 0;
6796 ctxt->disableSAX = 1;
6797 } else
6798 NEXT;
6799 } else if (RAW == '\''){
6800 NEXT;
6801 q = CUR_PTR;
6802 encoding = xmlParseEncName(ctxt);
6803 if (RAW != '\'') {
6804 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6806 ctxt->sax->error(ctxt->userData,
6807 "String not closed\n%.50s\n", q);
6808 ctxt->wellFormed = 0;
6809 ctxt->disableSAX = 1;
6810 } else
6811 NEXT;
6812 } else if (RAW == '"'){
6813 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6815 ctxt->sax->error(ctxt->userData,
6816 "xmlParseEncodingDecl : expected ' or \"\n");
6817 ctxt->wellFormed = 0;
6818 ctxt->disableSAX = 1;
6819 }
6820 if (encoding != NULL) {
6821 xmlCharEncoding enc;
6822 xmlCharEncodingHandlerPtr handler;
6823
6824 if (ctxt->input->encoding != NULL)
6825 xmlFree((xmlChar *) ctxt->input->encoding);
6826 ctxt->input->encoding = encoding;
6827
6828 enc = xmlParseCharEncoding((const char *) encoding);
6829 /*
6830 * registered set of known encodings
6831 */
6832 if (enc != XML_CHAR_ENCODING_ERROR) {
6833 xmlSwitchEncoding(ctxt, enc);
6834 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6835 xmlFree(encoding);
6836 return(NULL);
6837 }
6838 } else {
6839 /*
6840 * fallback for unknown encodings
6841 */
6842 handler = xmlFindCharEncodingHandler((const char *) encoding);
6843 if (handler != NULL) {
6844 xmlSwitchToEncoding(ctxt, handler);
6845 } else {
6846 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6848 ctxt->sax->error(ctxt->userData,
6849 "Unsupported encoding %s\n", encoding);
6850 return(NULL);
6851 }
6852 }
6853 }
6854 }
6855 return(encoding);
6856}
6857
6858/**
6859 * xmlParseSDDecl:
6860 * @ctxt: an XML parser context
6861 *
6862 * parse the XML standalone declaration
6863 *
6864 * [32] SDDecl ::= S 'standalone' Eq
6865 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6866 *
6867 * [ VC: Standalone Document Declaration ]
6868 * TODO The standalone document declaration must have the value "no"
6869 * if any external markup declarations contain declarations of:
6870 * - attributes with default values, if elements to which these
6871 * attributes apply appear in the document without specifications
6872 * of values for these attributes, or
6873 * - entities (other than amp, lt, gt, apos, quot), if references
6874 * to those entities appear in the document, or
6875 * - attributes with values subject to normalization, where the
6876 * attribute appears in the document with a value which will change
6877 * as a result of normalization, or
6878 * - element types with element content, if white space occurs directly
6879 * within any instance of those types.
6880 *
6881 * Returns 1 if standalone, 0 otherwise
6882 */
6883
6884int
6885xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6886 int standalone = -1;
6887
6888 SKIP_BLANKS;
6889 if ((RAW == 's') && (NXT(1) == 't') &&
6890 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6891 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6892 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6893 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6894 SKIP(10);
6895 SKIP_BLANKS;
6896 if (RAW != '=') {
6897 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6899 ctxt->sax->error(ctxt->userData,
6900 "XML standalone declaration : expected '='\n");
6901 ctxt->wellFormed = 0;
6902 ctxt->disableSAX = 1;
6903 return(standalone);
6904 }
6905 NEXT;
6906 SKIP_BLANKS;
6907 if (RAW == '\''){
6908 NEXT;
6909 if ((RAW == 'n') && (NXT(1) == 'o')) {
6910 standalone = 0;
6911 SKIP(2);
6912 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6913 (NXT(2) == 's')) {
6914 standalone = 1;
6915 SKIP(3);
6916 } else {
6917 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6919 ctxt->sax->error(ctxt->userData,
6920 "standalone accepts only 'yes' or 'no'\n");
6921 ctxt->wellFormed = 0;
6922 ctxt->disableSAX = 1;
6923 }
6924 if (RAW != '\'') {
6925 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6927 ctxt->sax->error(ctxt->userData, "String not closed\n");
6928 ctxt->wellFormed = 0;
6929 ctxt->disableSAX = 1;
6930 } else
6931 NEXT;
6932 } else if (RAW == '"'){
6933 NEXT;
6934 if ((RAW == 'n') && (NXT(1) == 'o')) {
6935 standalone = 0;
6936 SKIP(2);
6937 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6938 (NXT(2) == 's')) {
6939 standalone = 1;
6940 SKIP(3);
6941 } else {
6942 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6944 ctxt->sax->error(ctxt->userData,
6945 "standalone accepts only 'yes' or 'no'\n");
6946 ctxt->wellFormed = 0;
6947 ctxt->disableSAX = 1;
6948 }
6949 if (RAW != '"') {
6950 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6952 ctxt->sax->error(ctxt->userData, "String not closed\n");
6953 ctxt->wellFormed = 0;
6954 ctxt->disableSAX = 1;
6955 } else
6956 NEXT;
6957 } else {
6958 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6960 ctxt->sax->error(ctxt->userData,
6961 "Standalone value not found\n");
6962 ctxt->wellFormed = 0;
6963 ctxt->disableSAX = 1;
6964 }
6965 }
6966 return(standalone);
6967}
6968
6969/**
6970 * xmlParseXMLDecl:
6971 * @ctxt: an XML parser context
6972 *
6973 * parse an XML declaration header
6974 *
6975 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
6976 */
6977
6978void
6979xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
6980 xmlChar *version;
6981
6982 /*
6983 * We know that '<?xml' is here.
6984 */
6985 SKIP(5);
6986
6987 if (!IS_BLANK(RAW)) {
6988 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6990 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
6991 ctxt->wellFormed = 0;
6992 ctxt->disableSAX = 1;
6993 }
6994 SKIP_BLANKS;
6995
6996 /*
6997 * We should have the VersionInfo here.
6998 */
6999 version = xmlParseVersionInfo(ctxt);
7000 if (version == NULL)
7001 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7002 ctxt->version = xmlStrdup(version);
7003 xmlFree(version);
7004
7005 /*
7006 * We may have the encoding declaration
7007 */
7008 if (!IS_BLANK(RAW)) {
7009 if ((RAW == '?') && (NXT(1) == '>')) {
7010 SKIP(2);
7011 return;
7012 }
7013 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7015 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7016 ctxt->wellFormed = 0;
7017 ctxt->disableSAX = 1;
7018 }
7019 xmlParseEncodingDecl(ctxt);
7020 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7021 /*
7022 * The XML REC instructs us to stop parsing right here
7023 */
7024 return;
7025 }
7026
7027 /*
7028 * We may have the standalone status.
7029 */
7030 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7031 if ((RAW == '?') && (NXT(1) == '>')) {
7032 SKIP(2);
7033 return;
7034 }
7035 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7037 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7038 ctxt->wellFormed = 0;
7039 ctxt->disableSAX = 1;
7040 }
7041 SKIP_BLANKS;
7042 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7043
7044 SKIP_BLANKS;
7045 if ((RAW == '?') && (NXT(1) == '>')) {
7046 SKIP(2);
7047 } else if (RAW == '>') {
7048 /* Deprecated old WD ... */
7049 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7051 ctxt->sax->error(ctxt->userData,
7052 "XML declaration must end-up with '?>'\n");
7053 ctxt->wellFormed = 0;
7054 ctxt->disableSAX = 1;
7055 NEXT;
7056 } else {
7057 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7059 ctxt->sax->error(ctxt->userData,
7060 "parsing XML declaration: '?>' expected\n");
7061 ctxt->wellFormed = 0;
7062 ctxt->disableSAX = 1;
7063 MOVETO_ENDTAG(CUR_PTR);
7064 NEXT;
7065 }
7066}
7067
7068/**
7069 * xmlParseMisc:
7070 * @ctxt: an XML parser context
7071 *
7072 * parse an XML Misc* optionnal field.
7073 *
7074 * [27] Misc ::= Comment | PI | S
7075 */
7076
7077void
7078xmlParseMisc(xmlParserCtxtPtr ctxt) {
7079 while (((RAW == '<') && (NXT(1) == '?')) ||
7080 ((RAW == '<') && (NXT(1) == '!') &&
7081 (NXT(2) == '-') && (NXT(3) == '-')) ||
7082 IS_BLANK(CUR)) {
7083 if ((RAW == '<') && (NXT(1) == '?')) {
7084 xmlParsePI(ctxt);
7085 } else if (IS_BLANK(CUR)) {
7086 NEXT;
7087 } else
7088 xmlParseComment(ctxt);
7089 }
7090}
7091
7092/**
7093 * xmlParseDocument:
7094 * @ctxt: an XML parser context
7095 *
7096 * parse an XML document (and build a tree if using the standard SAX
7097 * interface).
7098 *
7099 * [1] document ::= prolog element Misc*
7100 *
7101 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7102 *
7103 * Returns 0, -1 in case of error. the parser context is augmented
7104 * as a result of the parsing.
7105 */
7106
7107int
7108xmlParseDocument(xmlParserCtxtPtr ctxt) {
7109 xmlChar start[4];
7110 xmlCharEncoding enc;
7111
7112 xmlInitParser();
7113
7114 GROW;
7115
7116 /*
7117 * SAX: beginning of the document processing.
7118 */
7119 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7120 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7121
7122 /*
7123 * Get the 4 first bytes and decode the charset
7124 * if enc != XML_CHAR_ENCODING_NONE
7125 * plug some encoding conversion routines.
7126 */
7127 start[0] = RAW;
7128 start[1] = NXT(1);
7129 start[2] = NXT(2);
7130 start[3] = NXT(3);
7131 enc = xmlDetectCharEncoding(start, 4);
7132 if (enc != XML_CHAR_ENCODING_NONE) {
7133 xmlSwitchEncoding(ctxt, enc);
7134 }
7135
7136
7137 if (CUR == 0) {
7138 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7139 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7140 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7141 ctxt->wellFormed = 0;
7142 ctxt->disableSAX = 1;
7143 }
7144
7145 /*
7146 * Check for the XMLDecl in the Prolog.
7147 */
7148 GROW;
7149 if ((RAW == '<') && (NXT(1) == '?') &&
7150 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7151 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7152
7153 /*
7154 * Note that we will switch encoding on the fly.
7155 */
7156 xmlParseXMLDecl(ctxt);
7157 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7158 /*
7159 * The XML REC instructs us to stop parsing right here
7160 */
7161 return(-1);
7162 }
7163 ctxt->standalone = ctxt->input->standalone;
7164 SKIP_BLANKS;
7165 } else {
7166 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7167 }
7168 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7169 ctxt->sax->startDocument(ctxt->userData);
7170
7171 /*
7172 * The Misc part of the Prolog
7173 */
7174 GROW;
7175 xmlParseMisc(ctxt);
7176
7177 /*
7178 * Then possibly doc type declaration(s) and more Misc
7179 * (doctypedecl Misc*)?
7180 */
7181 GROW;
7182 if ((RAW == '<') && (NXT(1) == '!') &&
7183 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7184 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7185 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7186 (NXT(8) == 'E')) {
7187
7188 ctxt->inSubset = 1;
7189 xmlParseDocTypeDecl(ctxt);
7190 if (RAW == '[') {
7191 ctxt->instate = XML_PARSER_DTD;
7192 xmlParseInternalSubset(ctxt);
7193 }
7194
7195 /*
7196 * Create and update the external subset.
7197 */
7198 ctxt->inSubset = 2;
7199 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7200 (!ctxt->disableSAX))
7201 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7202 ctxt->extSubSystem, ctxt->extSubURI);
7203 ctxt->inSubset = 0;
7204
7205
7206 ctxt->instate = XML_PARSER_PROLOG;
7207 xmlParseMisc(ctxt);
7208 }
7209
7210 /*
7211 * Time to start parsing the tree itself
7212 */
7213 GROW;
7214 if (RAW != '<') {
7215 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7216 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7217 ctxt->sax->error(ctxt->userData,
7218 "Start tag expected, '<' not found\n");
7219 ctxt->wellFormed = 0;
7220 ctxt->disableSAX = 1;
7221 ctxt->instate = XML_PARSER_EOF;
7222 } else {
7223 ctxt->instate = XML_PARSER_CONTENT;
7224 xmlParseElement(ctxt);
7225 ctxt->instate = XML_PARSER_EPILOG;
7226
7227
7228 /*
7229 * The Misc part at the end
7230 */
7231 xmlParseMisc(ctxt);
7232
7233 if (RAW != 0) {
7234 ctxt->errNo = XML_ERR_DOCUMENT_END;
7235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7236 ctxt->sax->error(ctxt->userData,
7237 "Extra content at the end of the document\n");
7238 ctxt->wellFormed = 0;
7239 ctxt->disableSAX = 1;
7240 }
7241 ctxt->instate = XML_PARSER_EOF;
7242 }
7243
7244 /*
7245 * SAX: end of the document processing.
7246 */
7247 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7248 (!ctxt->disableSAX))
7249 ctxt->sax->endDocument(ctxt->userData);
7250
7251 if (! ctxt->wellFormed) return(-1);
7252 return(0);
7253}
7254
7255/**
7256 * xmlParseExtParsedEnt:
7257 * @ctxt: an XML parser context
7258 *
7259 * parse a genreral parsed entity
7260 * An external general parsed entity is well-formed if it matches the
7261 * production labeled extParsedEnt.
7262 *
7263 * [78] extParsedEnt ::= TextDecl? content
7264 *
7265 * Returns 0, -1 in case of error. the parser context is augmented
7266 * as a result of the parsing.
7267 */
7268
7269int
7270xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7271 xmlChar start[4];
7272 xmlCharEncoding enc;
7273
7274 xmlDefaultSAXHandlerInit();
7275
7276 GROW;
7277
7278 /*
7279 * SAX: beginning of the document processing.
7280 */
7281 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7282 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7283
7284 /*
7285 * Get the 4 first bytes and decode the charset
7286 * if enc != XML_CHAR_ENCODING_NONE
7287 * plug some encoding conversion routines.
7288 */
7289 start[0] = RAW;
7290 start[1] = NXT(1);
7291 start[2] = NXT(2);
7292 start[3] = NXT(3);
7293 enc = xmlDetectCharEncoding(start, 4);
7294 if (enc != XML_CHAR_ENCODING_NONE) {
7295 xmlSwitchEncoding(ctxt, enc);
7296 }
7297
7298
7299 if (CUR == 0) {
7300 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7302 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7303 ctxt->wellFormed = 0;
7304 ctxt->disableSAX = 1;
7305 }
7306
7307 /*
7308 * Check for the XMLDecl in the Prolog.
7309 */
7310 GROW;
7311 if ((RAW == '<') && (NXT(1) == '?') &&
7312 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7313 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7314
7315 /*
7316 * Note that we will switch encoding on the fly.
7317 */
7318 xmlParseXMLDecl(ctxt);
7319 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7320 /*
7321 * The XML REC instructs us to stop parsing right here
7322 */
7323 return(-1);
7324 }
7325 SKIP_BLANKS;
7326 } else {
7327 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7328 }
7329 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7330 ctxt->sax->startDocument(ctxt->userData);
7331
7332 /*
7333 * Doing validity checking on chunk doesn't make sense
7334 */
7335 ctxt->instate = XML_PARSER_CONTENT;
7336 ctxt->validate = 0;
7337 ctxt->loadsubset = 0;
7338 ctxt->depth = 0;
7339
7340 xmlParseContent(ctxt);
7341
7342 if ((RAW == '<') && (NXT(1) == '/')) {
7343 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7345 ctxt->sax->error(ctxt->userData,
7346 "chunk is not well balanced\n");
7347 ctxt->wellFormed = 0;
7348 ctxt->disableSAX = 1;
7349 } else if (RAW != 0) {
7350 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7352 ctxt->sax->error(ctxt->userData,
7353 "extra content at the end of well balanced chunk\n");
7354 ctxt->wellFormed = 0;
7355 ctxt->disableSAX = 1;
7356 }
7357
7358 /*
7359 * SAX: end of the document processing.
7360 */
7361 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7362 (!ctxt->disableSAX))
7363 ctxt->sax->endDocument(ctxt->userData);
7364
7365 if (! ctxt->wellFormed) return(-1);
7366 return(0);
7367}
7368
7369/************************************************************************
7370 * *
7371 * Progressive parsing interfaces *
7372 * *
7373 ************************************************************************/
7374
7375/**
7376 * xmlParseLookupSequence:
7377 * @ctxt: an XML parser context
7378 * @first: the first char to lookup
7379 * @next: the next char to lookup or zero
7380 * @third: the next char to lookup or zero
7381 *
7382 * Try to find if a sequence (first, next, third) or just (first next) or
7383 * (first) is available in the input stream.
7384 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7385 * to avoid rescanning sequences of bytes, it DOES change the state of the
7386 * parser, do not use liberally.
7387 *
7388 * Returns the index to the current parsing point if the full sequence
7389 * is available, -1 otherwise.
7390 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007391static int
Owen Taylor3473f882001-02-23 17:55:21 +00007392xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7393 xmlChar next, xmlChar third) {
7394 int base, len;
7395 xmlParserInputPtr in;
7396 const xmlChar *buf;
7397
7398 in = ctxt->input;
7399 if (in == NULL) return(-1);
7400 base = in->cur - in->base;
7401 if (base < 0) return(-1);
7402 if (ctxt->checkIndex > base)
7403 base = ctxt->checkIndex;
7404 if (in->buf == NULL) {
7405 buf = in->base;
7406 len = in->length;
7407 } else {
7408 buf = in->buf->buffer->content;
7409 len = in->buf->buffer->use;
7410 }
7411 /* take into account the sequence length */
7412 if (third) len -= 2;
7413 else if (next) len --;
7414 for (;base < len;base++) {
7415 if (buf[base] == first) {
7416 if (third != 0) {
7417 if ((buf[base + 1] != next) ||
7418 (buf[base + 2] != third)) continue;
7419 } else if (next != 0) {
7420 if (buf[base + 1] != next) continue;
7421 }
7422 ctxt->checkIndex = 0;
7423#ifdef DEBUG_PUSH
7424 if (next == 0)
7425 xmlGenericError(xmlGenericErrorContext,
7426 "PP: lookup '%c' found at %d\n",
7427 first, base);
7428 else if (third == 0)
7429 xmlGenericError(xmlGenericErrorContext,
7430 "PP: lookup '%c%c' found at %d\n",
7431 first, next, base);
7432 else
7433 xmlGenericError(xmlGenericErrorContext,
7434 "PP: lookup '%c%c%c' found at %d\n",
7435 first, next, third, base);
7436#endif
7437 return(base - (in->cur - in->base));
7438 }
7439 }
7440 ctxt->checkIndex = base;
7441#ifdef DEBUG_PUSH
7442 if (next == 0)
7443 xmlGenericError(xmlGenericErrorContext,
7444 "PP: lookup '%c' failed\n", first);
7445 else if (third == 0)
7446 xmlGenericError(xmlGenericErrorContext,
7447 "PP: lookup '%c%c' failed\n", first, next);
7448 else
7449 xmlGenericError(xmlGenericErrorContext,
7450 "PP: lookup '%c%c%c' failed\n", first, next, third);
7451#endif
7452 return(-1);
7453}
7454
7455/**
7456 * xmlParseTryOrFinish:
7457 * @ctxt: an XML parser context
7458 * @terminate: last chunk indicator
7459 *
7460 * Try to progress on parsing
7461 *
7462 * Returns zero if no parsing was possible
7463 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007464static int
Owen Taylor3473f882001-02-23 17:55:21 +00007465xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7466 int ret = 0;
7467 int avail;
7468 xmlChar cur, next;
7469
7470#ifdef DEBUG_PUSH
7471 switch (ctxt->instate) {
7472 case XML_PARSER_EOF:
7473 xmlGenericError(xmlGenericErrorContext,
7474 "PP: try EOF\n"); break;
7475 case XML_PARSER_START:
7476 xmlGenericError(xmlGenericErrorContext,
7477 "PP: try START\n"); break;
7478 case XML_PARSER_MISC:
7479 xmlGenericError(xmlGenericErrorContext,
7480 "PP: try MISC\n");break;
7481 case XML_PARSER_COMMENT:
7482 xmlGenericError(xmlGenericErrorContext,
7483 "PP: try COMMENT\n");break;
7484 case XML_PARSER_PROLOG:
7485 xmlGenericError(xmlGenericErrorContext,
7486 "PP: try PROLOG\n");break;
7487 case XML_PARSER_START_TAG:
7488 xmlGenericError(xmlGenericErrorContext,
7489 "PP: try START_TAG\n");break;
7490 case XML_PARSER_CONTENT:
7491 xmlGenericError(xmlGenericErrorContext,
7492 "PP: try CONTENT\n");break;
7493 case XML_PARSER_CDATA_SECTION:
7494 xmlGenericError(xmlGenericErrorContext,
7495 "PP: try CDATA_SECTION\n");break;
7496 case XML_PARSER_END_TAG:
7497 xmlGenericError(xmlGenericErrorContext,
7498 "PP: try END_TAG\n");break;
7499 case XML_PARSER_ENTITY_DECL:
7500 xmlGenericError(xmlGenericErrorContext,
7501 "PP: try ENTITY_DECL\n");break;
7502 case XML_PARSER_ENTITY_VALUE:
7503 xmlGenericError(xmlGenericErrorContext,
7504 "PP: try ENTITY_VALUE\n");break;
7505 case XML_PARSER_ATTRIBUTE_VALUE:
7506 xmlGenericError(xmlGenericErrorContext,
7507 "PP: try ATTRIBUTE_VALUE\n");break;
7508 case XML_PARSER_DTD:
7509 xmlGenericError(xmlGenericErrorContext,
7510 "PP: try DTD\n");break;
7511 case XML_PARSER_EPILOG:
7512 xmlGenericError(xmlGenericErrorContext,
7513 "PP: try EPILOG\n");break;
7514 case XML_PARSER_PI:
7515 xmlGenericError(xmlGenericErrorContext,
7516 "PP: try PI\n");break;
7517 case XML_PARSER_IGNORE:
7518 xmlGenericError(xmlGenericErrorContext,
7519 "PP: try IGNORE\n");break;
7520 }
7521#endif
7522
7523 while (1) {
7524 /*
7525 * Pop-up of finished entities.
7526 */
7527 while ((RAW == 0) && (ctxt->inputNr > 1))
7528 xmlPopInput(ctxt);
7529
7530 if (ctxt->input ==NULL) break;
7531 if (ctxt->input->buf == NULL)
7532 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7533 else
7534 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7535 if (avail < 1)
7536 goto done;
7537 switch (ctxt->instate) {
7538 case XML_PARSER_EOF:
7539 /*
7540 * Document parsing is done !
7541 */
7542 goto done;
7543 case XML_PARSER_START:
7544 /*
7545 * Very first chars read from the document flow.
7546 */
Owen Taylor3473f882001-02-23 17:55:21 +00007547 if (avail < 2)
7548 goto done;
7549
7550 cur = ctxt->input->cur[0];
7551 next = ctxt->input->cur[1];
7552 if (cur == 0) {
7553 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7554 ctxt->sax->setDocumentLocator(ctxt->userData,
7555 &xmlDefaultSAXLocator);
7556 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7558 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7559 ctxt->wellFormed = 0;
7560 ctxt->disableSAX = 1;
7561 ctxt->instate = XML_PARSER_EOF;
7562#ifdef DEBUG_PUSH
7563 xmlGenericError(xmlGenericErrorContext,
7564 "PP: entering EOF\n");
7565#endif
7566 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7567 ctxt->sax->endDocument(ctxt->userData);
7568 goto done;
7569 }
7570 if ((cur == '<') && (next == '?')) {
7571 /* PI or XML decl */
7572 if (avail < 5) return(ret);
7573 if ((!terminate) &&
7574 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7575 return(ret);
7576 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7577 ctxt->sax->setDocumentLocator(ctxt->userData,
7578 &xmlDefaultSAXLocator);
7579 if ((ctxt->input->cur[2] == 'x') &&
7580 (ctxt->input->cur[3] == 'm') &&
7581 (ctxt->input->cur[4] == 'l') &&
7582 (IS_BLANK(ctxt->input->cur[5]))) {
7583 ret += 5;
7584#ifdef DEBUG_PUSH
7585 xmlGenericError(xmlGenericErrorContext,
7586 "PP: Parsing XML Decl\n");
7587#endif
7588 xmlParseXMLDecl(ctxt);
7589 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7590 /*
7591 * The XML REC instructs us to stop parsing right
7592 * here
7593 */
7594 ctxt->instate = XML_PARSER_EOF;
7595 return(0);
7596 }
7597 ctxt->standalone = ctxt->input->standalone;
7598 if ((ctxt->encoding == NULL) &&
7599 (ctxt->input->encoding != NULL))
7600 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7601 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7602 (!ctxt->disableSAX))
7603 ctxt->sax->startDocument(ctxt->userData);
7604 ctxt->instate = XML_PARSER_MISC;
7605#ifdef DEBUG_PUSH
7606 xmlGenericError(xmlGenericErrorContext,
7607 "PP: entering MISC\n");
7608#endif
7609 } else {
7610 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7611 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7612 (!ctxt->disableSAX))
7613 ctxt->sax->startDocument(ctxt->userData);
7614 ctxt->instate = XML_PARSER_MISC;
7615#ifdef DEBUG_PUSH
7616 xmlGenericError(xmlGenericErrorContext,
7617 "PP: entering MISC\n");
7618#endif
7619 }
7620 } else {
7621 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7622 ctxt->sax->setDocumentLocator(ctxt->userData,
7623 &xmlDefaultSAXLocator);
7624 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7625 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7626 (!ctxt->disableSAX))
7627 ctxt->sax->startDocument(ctxt->userData);
7628 ctxt->instate = XML_PARSER_MISC;
7629#ifdef DEBUG_PUSH
7630 xmlGenericError(xmlGenericErrorContext,
7631 "PP: entering MISC\n");
7632#endif
7633 }
7634 break;
7635 case XML_PARSER_MISC:
7636 SKIP_BLANKS;
7637 if (ctxt->input->buf == NULL)
7638 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7639 else
7640 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7641 if (avail < 2)
7642 goto done;
7643 cur = ctxt->input->cur[0];
7644 next = ctxt->input->cur[1];
7645 if ((cur == '<') && (next == '?')) {
7646 if ((!terminate) &&
7647 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7648 goto done;
7649#ifdef DEBUG_PUSH
7650 xmlGenericError(xmlGenericErrorContext,
7651 "PP: Parsing PI\n");
7652#endif
7653 xmlParsePI(ctxt);
7654 } else if ((cur == '<') && (next == '!') &&
7655 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7656 if ((!terminate) &&
7657 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7658 goto done;
7659#ifdef DEBUG_PUSH
7660 xmlGenericError(xmlGenericErrorContext,
7661 "PP: Parsing Comment\n");
7662#endif
7663 xmlParseComment(ctxt);
7664 ctxt->instate = XML_PARSER_MISC;
7665 } else if ((cur == '<') && (next == '!') &&
7666 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7667 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7668 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7669 (ctxt->input->cur[8] == 'E')) {
7670 if ((!terminate) &&
7671 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7672 goto done;
7673#ifdef DEBUG_PUSH
7674 xmlGenericError(xmlGenericErrorContext,
7675 "PP: Parsing internal subset\n");
7676#endif
7677 ctxt->inSubset = 1;
7678 xmlParseDocTypeDecl(ctxt);
7679 if (RAW == '[') {
7680 ctxt->instate = XML_PARSER_DTD;
7681#ifdef DEBUG_PUSH
7682 xmlGenericError(xmlGenericErrorContext,
7683 "PP: entering DTD\n");
7684#endif
7685 } else {
7686 /*
7687 * Create and update the external subset.
7688 */
7689 ctxt->inSubset = 2;
7690 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7691 (ctxt->sax->externalSubset != NULL))
7692 ctxt->sax->externalSubset(ctxt->userData,
7693 ctxt->intSubName, ctxt->extSubSystem,
7694 ctxt->extSubURI);
7695 ctxt->inSubset = 0;
7696 ctxt->instate = XML_PARSER_PROLOG;
7697#ifdef DEBUG_PUSH
7698 xmlGenericError(xmlGenericErrorContext,
7699 "PP: entering PROLOG\n");
7700#endif
7701 }
7702 } else if ((cur == '<') && (next == '!') &&
7703 (avail < 9)) {
7704 goto done;
7705 } else {
7706 ctxt->instate = XML_PARSER_START_TAG;
7707#ifdef DEBUG_PUSH
7708 xmlGenericError(xmlGenericErrorContext,
7709 "PP: entering START_TAG\n");
7710#endif
7711 }
7712 break;
7713 case XML_PARSER_IGNORE:
7714 xmlGenericError(xmlGenericErrorContext,
7715 "PP: internal error, state == IGNORE");
7716 ctxt->instate = XML_PARSER_DTD;
7717#ifdef DEBUG_PUSH
7718 xmlGenericError(xmlGenericErrorContext,
7719 "PP: entering DTD\n");
7720#endif
7721 break;
7722 case XML_PARSER_PROLOG:
7723 SKIP_BLANKS;
7724 if (ctxt->input->buf == NULL)
7725 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7726 else
7727 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7728 if (avail < 2)
7729 goto done;
7730 cur = ctxt->input->cur[0];
7731 next = ctxt->input->cur[1];
7732 if ((cur == '<') && (next == '?')) {
7733 if ((!terminate) &&
7734 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7735 goto done;
7736#ifdef DEBUG_PUSH
7737 xmlGenericError(xmlGenericErrorContext,
7738 "PP: Parsing PI\n");
7739#endif
7740 xmlParsePI(ctxt);
7741 } else if ((cur == '<') && (next == '!') &&
7742 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7743 if ((!terminate) &&
7744 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7745 goto done;
7746#ifdef DEBUG_PUSH
7747 xmlGenericError(xmlGenericErrorContext,
7748 "PP: Parsing Comment\n");
7749#endif
7750 xmlParseComment(ctxt);
7751 ctxt->instate = XML_PARSER_PROLOG;
7752 } else if ((cur == '<') && (next == '!') &&
7753 (avail < 4)) {
7754 goto done;
7755 } else {
7756 ctxt->instate = XML_PARSER_START_TAG;
7757#ifdef DEBUG_PUSH
7758 xmlGenericError(xmlGenericErrorContext,
7759 "PP: entering START_TAG\n");
7760#endif
7761 }
7762 break;
7763 case XML_PARSER_EPILOG:
7764 SKIP_BLANKS;
7765 if (ctxt->input->buf == NULL)
7766 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7767 else
7768 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7769 if (avail < 2)
7770 goto done;
7771 cur = ctxt->input->cur[0];
7772 next = ctxt->input->cur[1];
7773 if ((cur == '<') && (next == '?')) {
7774 if ((!terminate) &&
7775 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7776 goto done;
7777#ifdef DEBUG_PUSH
7778 xmlGenericError(xmlGenericErrorContext,
7779 "PP: Parsing PI\n");
7780#endif
7781 xmlParsePI(ctxt);
7782 ctxt->instate = XML_PARSER_EPILOG;
7783 } else if ((cur == '<') && (next == '!') &&
7784 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7785 if ((!terminate) &&
7786 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7787 goto done;
7788#ifdef DEBUG_PUSH
7789 xmlGenericError(xmlGenericErrorContext,
7790 "PP: Parsing Comment\n");
7791#endif
7792 xmlParseComment(ctxt);
7793 ctxt->instate = XML_PARSER_EPILOG;
7794 } else if ((cur == '<') && (next == '!') &&
7795 (avail < 4)) {
7796 goto done;
7797 } else {
7798 ctxt->errNo = XML_ERR_DOCUMENT_END;
7799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7800 ctxt->sax->error(ctxt->userData,
7801 "Extra content at the end of the document\n");
7802 ctxt->wellFormed = 0;
7803 ctxt->disableSAX = 1;
7804 ctxt->instate = XML_PARSER_EOF;
7805#ifdef DEBUG_PUSH
7806 xmlGenericError(xmlGenericErrorContext,
7807 "PP: entering EOF\n");
7808#endif
7809 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7810 (!ctxt->disableSAX))
7811 ctxt->sax->endDocument(ctxt->userData);
7812 goto done;
7813 }
7814 break;
7815 case XML_PARSER_START_TAG: {
7816 xmlChar *name, *oldname;
7817
7818 if ((avail < 2) && (ctxt->inputNr == 1))
7819 goto done;
7820 cur = ctxt->input->cur[0];
7821 if (cur != '<') {
7822 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7824 ctxt->sax->error(ctxt->userData,
7825 "Start tag expect, '<' not found\n");
7826 ctxt->wellFormed = 0;
7827 ctxt->disableSAX = 1;
7828 ctxt->instate = XML_PARSER_EOF;
7829#ifdef DEBUG_PUSH
7830 xmlGenericError(xmlGenericErrorContext,
7831 "PP: entering EOF\n");
7832#endif
7833 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7834 (!ctxt->disableSAX))
7835 ctxt->sax->endDocument(ctxt->userData);
7836 goto done;
7837 }
7838 if ((!terminate) &&
7839 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7840 goto done;
7841 if (ctxt->spaceNr == 0)
7842 spacePush(ctxt, -1);
7843 else
7844 spacePush(ctxt, *ctxt->space);
7845 name = xmlParseStartTag(ctxt);
7846 if (name == NULL) {
7847 spacePop(ctxt);
7848 ctxt->instate = XML_PARSER_EOF;
7849#ifdef DEBUG_PUSH
7850 xmlGenericError(xmlGenericErrorContext,
7851 "PP: entering EOF\n");
7852#endif
7853 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7854 (!ctxt->disableSAX))
7855 ctxt->sax->endDocument(ctxt->userData);
7856 goto done;
7857 }
7858 namePush(ctxt, xmlStrdup(name));
7859
7860 /*
7861 * [ VC: Root Element Type ]
7862 * The Name in the document type declaration must match
7863 * the element type of the root element.
7864 */
7865 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7866 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7867 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7868
7869 /*
7870 * Check for an Empty Element.
7871 */
7872 if ((RAW == '/') && (NXT(1) == '>')) {
7873 SKIP(2);
7874 if ((ctxt->sax != NULL) &&
7875 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7876 ctxt->sax->endElement(ctxt->userData, name);
7877 xmlFree(name);
7878 oldname = namePop(ctxt);
7879 spacePop(ctxt);
7880 if (oldname != NULL) {
7881#ifdef DEBUG_STACK
7882 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7883#endif
7884 xmlFree(oldname);
7885 }
7886 if (ctxt->name == NULL) {
7887 ctxt->instate = XML_PARSER_EPILOG;
7888#ifdef DEBUG_PUSH
7889 xmlGenericError(xmlGenericErrorContext,
7890 "PP: entering EPILOG\n");
7891#endif
7892 } else {
7893 ctxt->instate = XML_PARSER_CONTENT;
7894#ifdef DEBUG_PUSH
7895 xmlGenericError(xmlGenericErrorContext,
7896 "PP: entering CONTENT\n");
7897#endif
7898 }
7899 break;
7900 }
7901 if (RAW == '>') {
7902 NEXT;
7903 } else {
7904 ctxt->errNo = XML_ERR_GT_REQUIRED;
7905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7906 ctxt->sax->error(ctxt->userData,
7907 "Couldn't find end of Start Tag %s\n",
7908 name);
7909 ctxt->wellFormed = 0;
7910 ctxt->disableSAX = 1;
7911
7912 /*
7913 * end of parsing of this node.
7914 */
7915 nodePop(ctxt);
7916 oldname = namePop(ctxt);
7917 spacePop(ctxt);
7918 if (oldname != NULL) {
7919#ifdef DEBUG_STACK
7920 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7921#endif
7922 xmlFree(oldname);
7923 }
7924 }
7925 xmlFree(name);
7926 ctxt->instate = XML_PARSER_CONTENT;
7927#ifdef DEBUG_PUSH
7928 xmlGenericError(xmlGenericErrorContext,
7929 "PP: entering CONTENT\n");
7930#endif
7931 break;
7932 }
7933 case XML_PARSER_CONTENT: {
7934 const xmlChar *test;
7935 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00007936 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00007937
7938 /*
7939 * Handle preparsed entities and charRef
7940 */
7941 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007942 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00007943
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007944 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00007945 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7946 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007947 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00007948 ctxt->token = 0;
7949 }
7950 if ((avail < 2) && (ctxt->inputNr == 1))
7951 goto done;
7952 cur = ctxt->input->cur[0];
7953 next = ctxt->input->cur[1];
7954
7955 test = CUR_PTR;
7956 cons = ctxt->input->consumed;
7957 tok = ctxt->token;
7958 if ((cur == '<') && (next == '?')) {
7959 if ((!terminate) &&
7960 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7961 goto done;
7962#ifdef DEBUG_PUSH
7963 xmlGenericError(xmlGenericErrorContext,
7964 "PP: Parsing PI\n");
7965#endif
7966 xmlParsePI(ctxt);
7967 } else if ((cur == '<') && (next == '!') &&
7968 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7969 if ((!terminate) &&
7970 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7971 goto done;
7972#ifdef DEBUG_PUSH
7973 xmlGenericError(xmlGenericErrorContext,
7974 "PP: Parsing Comment\n");
7975#endif
7976 xmlParseComment(ctxt);
7977 ctxt->instate = XML_PARSER_CONTENT;
7978 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7979 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7980 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7981 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7982 (ctxt->input->cur[8] == '[')) {
7983 SKIP(9);
7984 ctxt->instate = XML_PARSER_CDATA_SECTION;
7985#ifdef DEBUG_PUSH
7986 xmlGenericError(xmlGenericErrorContext,
7987 "PP: entering CDATA_SECTION\n");
7988#endif
7989 break;
7990 } else if ((cur == '<') && (next == '!') &&
7991 (avail < 9)) {
7992 goto done;
7993 } else if ((cur == '<') && (next == '/')) {
7994 ctxt->instate = XML_PARSER_END_TAG;
7995#ifdef DEBUG_PUSH
7996 xmlGenericError(xmlGenericErrorContext,
7997 "PP: entering END_TAG\n");
7998#endif
7999 break;
8000 } else if (cur == '<') {
8001 ctxt->instate = XML_PARSER_START_TAG;
8002#ifdef DEBUG_PUSH
8003 xmlGenericError(xmlGenericErrorContext,
8004 "PP: entering START_TAG\n");
8005#endif
8006 break;
8007 } else if (cur == '&') {
8008 if ((!terminate) &&
8009 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8010 goto done;
8011#ifdef DEBUG_PUSH
8012 xmlGenericError(xmlGenericErrorContext,
8013 "PP: Parsing Reference\n");
8014#endif
8015 xmlParseReference(ctxt);
8016 } else {
8017 /* TODO Avoid the extra copy, handle directly !!! */
8018 /*
8019 * Goal of the following test is:
8020 * - minimize calls to the SAX 'character' callback
8021 * when they are mergeable
8022 * - handle an problem for isBlank when we only parse
8023 * a sequence of blank chars and the next one is
8024 * not available to check against '<' presence.
8025 * - tries to homogenize the differences in SAX
8026 * callbacks beween the push and pull versions
8027 * of the parser.
8028 */
8029 if ((ctxt->inputNr == 1) &&
8030 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8031 if ((!terminate) &&
8032 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8033 goto done;
8034 }
8035 ctxt->checkIndex = 0;
8036#ifdef DEBUG_PUSH
8037 xmlGenericError(xmlGenericErrorContext,
8038 "PP: Parsing char data\n");
8039#endif
8040 xmlParseCharData(ctxt, 0);
8041 }
8042 /*
8043 * Pop-up of finished entities.
8044 */
8045 while ((RAW == 0) && (ctxt->inputNr > 1))
8046 xmlPopInput(ctxt);
8047 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8048 (tok == ctxt->token)) {
8049 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8051 ctxt->sax->error(ctxt->userData,
8052 "detected an error in element content\n");
8053 ctxt->wellFormed = 0;
8054 ctxt->disableSAX = 1;
8055 ctxt->instate = XML_PARSER_EOF;
8056 break;
8057 }
8058 break;
8059 }
8060 case XML_PARSER_CDATA_SECTION: {
8061 /*
8062 * The Push mode need to have the SAX callback for
8063 * cdataBlock merge back contiguous callbacks.
8064 */
8065 int base;
8066
8067 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8068 if (base < 0) {
8069 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8070 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8071 if (ctxt->sax->cdataBlock != NULL)
8072 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8073 XML_PARSER_BIG_BUFFER_SIZE);
8074 }
8075 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8076 ctxt->checkIndex = 0;
8077 }
8078 goto done;
8079 } else {
8080 if ((ctxt->sax != NULL) && (base > 0) &&
8081 (!ctxt->disableSAX)) {
8082 if (ctxt->sax->cdataBlock != NULL)
8083 ctxt->sax->cdataBlock(ctxt->userData,
8084 ctxt->input->cur, base);
8085 }
8086 SKIP(base + 3);
8087 ctxt->checkIndex = 0;
8088 ctxt->instate = XML_PARSER_CONTENT;
8089#ifdef DEBUG_PUSH
8090 xmlGenericError(xmlGenericErrorContext,
8091 "PP: entering CONTENT\n");
8092#endif
8093 }
8094 break;
8095 }
8096 case XML_PARSER_END_TAG:
8097 if (avail < 2)
8098 goto done;
8099 if ((!terminate) &&
8100 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8101 goto done;
8102 xmlParseEndTag(ctxt);
8103 if (ctxt->name == NULL) {
8104 ctxt->instate = XML_PARSER_EPILOG;
8105#ifdef DEBUG_PUSH
8106 xmlGenericError(xmlGenericErrorContext,
8107 "PP: entering EPILOG\n");
8108#endif
8109 } else {
8110 ctxt->instate = XML_PARSER_CONTENT;
8111#ifdef DEBUG_PUSH
8112 xmlGenericError(xmlGenericErrorContext,
8113 "PP: entering CONTENT\n");
8114#endif
8115 }
8116 break;
8117 case XML_PARSER_DTD: {
8118 /*
8119 * Sorry but progressive parsing of the internal subset
8120 * is not expected to be supported. We first check that
8121 * the full content of the internal subset is available and
8122 * the parsing is launched only at that point.
8123 * Internal subset ends up with "']' S? '>'" in an unescaped
8124 * section and not in a ']]>' sequence which are conditional
8125 * sections (whoever argued to keep that crap in XML deserve
8126 * a place in hell !).
8127 */
8128 int base, i;
8129 xmlChar *buf;
8130 xmlChar quote = 0;
8131
8132 base = ctxt->input->cur - ctxt->input->base;
8133 if (base < 0) return(0);
8134 if (ctxt->checkIndex > base)
8135 base = ctxt->checkIndex;
8136 buf = ctxt->input->buf->buffer->content;
8137 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8138 base++) {
8139 if (quote != 0) {
8140 if (buf[base] == quote)
8141 quote = 0;
8142 continue;
8143 }
8144 if (buf[base] == '"') {
8145 quote = '"';
8146 continue;
8147 }
8148 if (buf[base] == '\'') {
8149 quote = '\'';
8150 continue;
8151 }
8152 if (buf[base] == ']') {
8153 if ((unsigned int) base +1 >=
8154 ctxt->input->buf->buffer->use)
8155 break;
8156 if (buf[base + 1] == ']') {
8157 /* conditional crap, skip both ']' ! */
8158 base++;
8159 continue;
8160 }
8161 for (i = 0;
8162 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8163 i++) {
8164 if (buf[base + i] == '>')
8165 goto found_end_int_subset;
8166 }
8167 break;
8168 }
8169 }
8170 /*
8171 * We didn't found the end of the Internal subset
8172 */
8173 if (quote == 0)
8174 ctxt->checkIndex = base;
8175#ifdef DEBUG_PUSH
8176 if (next == 0)
8177 xmlGenericError(xmlGenericErrorContext,
8178 "PP: lookup of int subset end filed\n");
8179#endif
8180 goto done;
8181
8182found_end_int_subset:
8183 xmlParseInternalSubset(ctxt);
8184 ctxt->inSubset = 2;
8185 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8186 (ctxt->sax->externalSubset != NULL))
8187 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8188 ctxt->extSubSystem, ctxt->extSubURI);
8189 ctxt->inSubset = 0;
8190 ctxt->instate = XML_PARSER_PROLOG;
8191 ctxt->checkIndex = 0;
8192#ifdef DEBUG_PUSH
8193 xmlGenericError(xmlGenericErrorContext,
8194 "PP: entering PROLOG\n");
8195#endif
8196 break;
8197 }
8198 case XML_PARSER_COMMENT:
8199 xmlGenericError(xmlGenericErrorContext,
8200 "PP: internal error, state == COMMENT\n");
8201 ctxt->instate = XML_PARSER_CONTENT;
8202#ifdef DEBUG_PUSH
8203 xmlGenericError(xmlGenericErrorContext,
8204 "PP: entering CONTENT\n");
8205#endif
8206 break;
8207 case XML_PARSER_PI:
8208 xmlGenericError(xmlGenericErrorContext,
8209 "PP: internal error, state == PI\n");
8210 ctxt->instate = XML_PARSER_CONTENT;
8211#ifdef DEBUG_PUSH
8212 xmlGenericError(xmlGenericErrorContext,
8213 "PP: entering CONTENT\n");
8214#endif
8215 break;
8216 case XML_PARSER_ENTITY_DECL:
8217 xmlGenericError(xmlGenericErrorContext,
8218 "PP: internal error, state == ENTITY_DECL\n");
8219 ctxt->instate = XML_PARSER_DTD;
8220#ifdef DEBUG_PUSH
8221 xmlGenericError(xmlGenericErrorContext,
8222 "PP: entering DTD\n");
8223#endif
8224 break;
8225 case XML_PARSER_ENTITY_VALUE:
8226 xmlGenericError(xmlGenericErrorContext,
8227 "PP: internal error, state == ENTITY_VALUE\n");
8228 ctxt->instate = XML_PARSER_CONTENT;
8229#ifdef DEBUG_PUSH
8230 xmlGenericError(xmlGenericErrorContext,
8231 "PP: entering DTD\n");
8232#endif
8233 break;
8234 case XML_PARSER_ATTRIBUTE_VALUE:
8235 xmlGenericError(xmlGenericErrorContext,
8236 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8237 ctxt->instate = XML_PARSER_START_TAG;
8238#ifdef DEBUG_PUSH
8239 xmlGenericError(xmlGenericErrorContext,
8240 "PP: entering START_TAG\n");
8241#endif
8242 break;
8243 case XML_PARSER_SYSTEM_LITERAL:
8244 xmlGenericError(xmlGenericErrorContext,
8245 "PP: internal error, state == SYSTEM_LITERAL\n");
8246 ctxt->instate = XML_PARSER_START_TAG;
8247#ifdef DEBUG_PUSH
8248 xmlGenericError(xmlGenericErrorContext,
8249 "PP: entering START_TAG\n");
8250#endif
8251 break;
8252 }
8253 }
8254done:
8255#ifdef DEBUG_PUSH
8256 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8257#endif
8258 return(ret);
8259}
8260
8261/**
Owen Taylor3473f882001-02-23 17:55:21 +00008262 * xmlParseChunk:
8263 * @ctxt: an XML parser context
8264 * @chunk: an char array
8265 * @size: the size in byte of the chunk
8266 * @terminate: last chunk indicator
8267 *
8268 * Parse a Chunk of memory
8269 *
8270 * Returns zero if no error, the xmlParserErrors otherwise.
8271 */
8272int
8273xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8274 int terminate) {
8275 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8276 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8277 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8278 int cur = ctxt->input->cur - ctxt->input->base;
8279
8280 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8281 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8282 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008283 ctxt->input->end =
8284 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008285#ifdef DEBUG_PUSH
8286 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8287#endif
8288
8289 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8290 xmlParseTryOrFinish(ctxt, terminate);
8291 } else if (ctxt->instate != XML_PARSER_EOF) {
8292 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8293 xmlParserInputBufferPtr in = ctxt->input->buf;
8294 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8295 (in->raw != NULL)) {
8296 int nbchars;
8297
8298 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8299 if (nbchars < 0) {
8300 xmlGenericError(xmlGenericErrorContext,
8301 "xmlParseChunk: encoder error\n");
8302 return(XML_ERR_INVALID_ENCODING);
8303 }
8304 }
8305 }
8306 }
8307 xmlParseTryOrFinish(ctxt, terminate);
8308 if (terminate) {
8309 /*
8310 * Check for termination
8311 */
8312 if ((ctxt->instate != XML_PARSER_EOF) &&
8313 (ctxt->instate != XML_PARSER_EPILOG)) {
8314 ctxt->errNo = XML_ERR_DOCUMENT_END;
8315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8316 ctxt->sax->error(ctxt->userData,
8317 "Extra content at the end of the document\n");
8318 ctxt->wellFormed = 0;
8319 ctxt->disableSAX = 1;
8320 }
8321 if (ctxt->instate != XML_PARSER_EOF) {
8322 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8323 (!ctxt->disableSAX))
8324 ctxt->sax->endDocument(ctxt->userData);
8325 }
8326 ctxt->instate = XML_PARSER_EOF;
8327 }
8328 return((xmlParserErrors) ctxt->errNo);
8329}
8330
8331/************************************************************************
8332 * *
8333 * I/O front end functions to the parser *
8334 * *
8335 ************************************************************************/
8336
8337/**
8338 * xmlStopParser:
8339 * @ctxt: an XML parser context
8340 *
8341 * Blocks further parser processing
8342 */
8343void
8344xmlStopParser(xmlParserCtxtPtr ctxt) {
8345 ctxt->instate = XML_PARSER_EOF;
8346 if (ctxt->input != NULL)
8347 ctxt->input->cur = BAD_CAST"";
8348}
8349
8350/**
8351 * xmlCreatePushParserCtxt:
8352 * @sax: a SAX handler
8353 * @user_data: The user data returned on SAX callbacks
8354 * @chunk: a pointer to an array of chars
8355 * @size: number of chars in the array
8356 * @filename: an optional file name or URI
8357 *
8358 * Create a parser context for using the XML parser in push mode
8359 * To allow content encoding detection, @size should be >= 4
8360 * The value of @filename is used for fetching external entities
8361 * and error/warning reports.
8362 *
8363 * Returns the new parser context or NULL
8364 */
8365xmlParserCtxtPtr
8366xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8367 const char *chunk, int size, const char *filename) {
8368 xmlParserCtxtPtr ctxt;
8369 xmlParserInputPtr inputStream;
8370 xmlParserInputBufferPtr buf;
8371 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8372
8373 /*
8374 * plug some encoding conversion routines
8375 */
8376 if ((chunk != NULL) && (size >= 4))
8377 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8378
8379 buf = xmlAllocParserInputBuffer(enc);
8380 if (buf == NULL) return(NULL);
8381
8382 ctxt = xmlNewParserCtxt();
8383 if (ctxt == NULL) {
8384 xmlFree(buf);
8385 return(NULL);
8386 }
8387 if (sax != NULL) {
8388 if (ctxt->sax != &xmlDefaultSAXHandler)
8389 xmlFree(ctxt->sax);
8390 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8391 if (ctxt->sax == NULL) {
8392 xmlFree(buf);
8393 xmlFree(ctxt);
8394 return(NULL);
8395 }
8396 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8397 if (user_data != NULL)
8398 ctxt->userData = user_data;
8399 }
8400 if (filename == NULL) {
8401 ctxt->directory = NULL;
8402 } else {
8403 ctxt->directory = xmlParserGetDirectory(filename);
8404 }
8405
8406 inputStream = xmlNewInputStream(ctxt);
8407 if (inputStream == NULL) {
8408 xmlFreeParserCtxt(ctxt);
8409 return(NULL);
8410 }
8411
8412 if (filename == NULL)
8413 inputStream->filename = NULL;
8414 else
8415 inputStream->filename = xmlMemStrdup(filename);
8416 inputStream->buf = buf;
8417 inputStream->base = inputStream->buf->buffer->content;
8418 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008419 inputStream->end =
8420 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008421 if (enc != XML_CHAR_ENCODING_NONE) {
8422 xmlSwitchEncoding(ctxt, enc);
8423 }
8424
8425 inputPush(ctxt, inputStream);
8426
8427 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8428 (ctxt->input->buf != NULL)) {
8429 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8430#ifdef DEBUG_PUSH
8431 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8432#endif
8433 }
8434
8435 return(ctxt);
8436}
8437
8438/**
8439 * xmlCreateIOParserCtxt:
8440 * @sax: a SAX handler
8441 * @user_data: The user data returned on SAX callbacks
8442 * @ioread: an I/O read function
8443 * @ioclose: an I/O close function
8444 * @ioctx: an I/O handler
8445 * @enc: the charset encoding if known
8446 *
8447 * Create a parser context for using the XML parser with an existing
8448 * I/O stream
8449 *
8450 * Returns the new parser context or NULL
8451 */
8452xmlParserCtxtPtr
8453xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8454 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8455 void *ioctx, xmlCharEncoding enc) {
8456 xmlParserCtxtPtr ctxt;
8457 xmlParserInputPtr inputStream;
8458 xmlParserInputBufferPtr buf;
8459
8460 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8461 if (buf == NULL) return(NULL);
8462
8463 ctxt = xmlNewParserCtxt();
8464 if (ctxt == NULL) {
8465 xmlFree(buf);
8466 return(NULL);
8467 }
8468 if (sax != NULL) {
8469 if (ctxt->sax != &xmlDefaultSAXHandler)
8470 xmlFree(ctxt->sax);
8471 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8472 if (ctxt->sax == NULL) {
8473 xmlFree(buf);
8474 xmlFree(ctxt);
8475 return(NULL);
8476 }
8477 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8478 if (user_data != NULL)
8479 ctxt->userData = user_data;
8480 }
8481
8482 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8483 if (inputStream == NULL) {
8484 xmlFreeParserCtxt(ctxt);
8485 return(NULL);
8486 }
8487 inputPush(ctxt, inputStream);
8488
8489 return(ctxt);
8490}
8491
8492/************************************************************************
8493 * *
8494 * Front ends when parsing a Dtd *
8495 * *
8496 ************************************************************************/
8497
8498/**
8499 * xmlIOParseDTD:
8500 * @sax: the SAX handler block or NULL
8501 * @input: an Input Buffer
8502 * @enc: the charset encoding if known
8503 *
8504 * Load and parse a DTD
8505 *
8506 * Returns the resulting xmlDtdPtr or NULL in case of error.
8507 * @input will be freed at parsing end.
8508 */
8509
8510xmlDtdPtr
8511xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8512 xmlCharEncoding enc) {
8513 xmlDtdPtr ret = NULL;
8514 xmlParserCtxtPtr ctxt;
8515 xmlParserInputPtr pinput = NULL;
8516
8517 if (input == NULL)
8518 return(NULL);
8519
8520 ctxt = xmlNewParserCtxt();
8521 if (ctxt == NULL) {
8522 return(NULL);
8523 }
8524
8525 /*
8526 * Set-up the SAX context
8527 */
8528 if (sax != NULL) {
8529 if (ctxt->sax != NULL)
8530 xmlFree(ctxt->sax);
8531 ctxt->sax = sax;
8532 ctxt->userData = NULL;
8533 }
8534
8535 /*
8536 * generate a parser input from the I/O handler
8537 */
8538
8539 pinput = xmlNewIOInputStream(ctxt, input, enc);
8540 if (pinput == NULL) {
8541 if (sax != NULL) ctxt->sax = NULL;
8542 xmlFreeParserCtxt(ctxt);
8543 return(NULL);
8544 }
8545
8546 /*
8547 * plug some encoding conversion routines here.
8548 */
8549 xmlPushInput(ctxt, pinput);
8550
8551 pinput->filename = NULL;
8552 pinput->line = 1;
8553 pinput->col = 1;
8554 pinput->base = ctxt->input->cur;
8555 pinput->cur = ctxt->input->cur;
8556 pinput->free = NULL;
8557
8558 /*
8559 * let's parse that entity knowing it's an external subset.
8560 */
8561 ctxt->inSubset = 2;
8562 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8563 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8564 BAD_CAST "none", BAD_CAST "none");
8565 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8566
8567 if (ctxt->myDoc != NULL) {
8568 if (ctxt->wellFormed) {
8569 ret = ctxt->myDoc->extSubset;
8570 ctxt->myDoc->extSubset = NULL;
8571 } else {
8572 ret = NULL;
8573 }
8574 xmlFreeDoc(ctxt->myDoc);
8575 ctxt->myDoc = NULL;
8576 }
8577 if (sax != NULL) ctxt->sax = NULL;
8578 xmlFreeParserCtxt(ctxt);
8579
8580 return(ret);
8581}
8582
8583/**
8584 * xmlSAXParseDTD:
8585 * @sax: the SAX handler block
8586 * @ExternalID: a NAME* containing the External ID of the DTD
8587 * @SystemID: a NAME* containing the URL to the DTD
8588 *
8589 * Load and parse an external subset.
8590 *
8591 * Returns the resulting xmlDtdPtr or NULL in case of error.
8592 */
8593
8594xmlDtdPtr
8595xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8596 const xmlChar *SystemID) {
8597 xmlDtdPtr ret = NULL;
8598 xmlParserCtxtPtr ctxt;
8599 xmlParserInputPtr input = NULL;
8600 xmlCharEncoding enc;
8601
8602 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8603
8604 ctxt = xmlNewParserCtxt();
8605 if (ctxt == NULL) {
8606 return(NULL);
8607 }
8608
8609 /*
8610 * Set-up the SAX context
8611 */
8612 if (sax != NULL) {
8613 if (ctxt->sax != NULL)
8614 xmlFree(ctxt->sax);
8615 ctxt->sax = sax;
8616 ctxt->userData = NULL;
8617 }
8618
8619 /*
8620 * Ask the Entity resolver to load the damn thing
8621 */
8622
8623 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8624 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8625 if (input == NULL) {
8626 if (sax != NULL) ctxt->sax = NULL;
8627 xmlFreeParserCtxt(ctxt);
8628 return(NULL);
8629 }
8630
8631 /*
8632 * plug some encoding conversion routines here.
8633 */
8634 xmlPushInput(ctxt, input);
8635 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8636 xmlSwitchEncoding(ctxt, enc);
8637
8638 if (input->filename == NULL)
8639 input->filename = (char *) xmlStrdup(SystemID);
8640 input->line = 1;
8641 input->col = 1;
8642 input->base = ctxt->input->cur;
8643 input->cur = ctxt->input->cur;
8644 input->free = NULL;
8645
8646 /*
8647 * let's parse that entity knowing it's an external subset.
8648 */
8649 ctxt->inSubset = 2;
8650 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8651 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8652 ExternalID, SystemID);
8653 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8654
8655 if (ctxt->myDoc != NULL) {
8656 if (ctxt->wellFormed) {
8657 ret = ctxt->myDoc->extSubset;
8658 ctxt->myDoc->extSubset = NULL;
8659 } else {
8660 ret = NULL;
8661 }
8662 xmlFreeDoc(ctxt->myDoc);
8663 ctxt->myDoc = NULL;
8664 }
8665 if (sax != NULL) ctxt->sax = NULL;
8666 xmlFreeParserCtxt(ctxt);
8667
8668 return(ret);
8669}
8670
8671/**
8672 * xmlParseDTD:
8673 * @ExternalID: a NAME* containing the External ID of the DTD
8674 * @SystemID: a NAME* containing the URL to the DTD
8675 *
8676 * Load and parse an external subset.
8677 *
8678 * Returns the resulting xmlDtdPtr or NULL in case of error.
8679 */
8680
8681xmlDtdPtr
8682xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8683 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8684}
8685
8686/************************************************************************
8687 * *
8688 * Front ends when parsing an Entity *
8689 * *
8690 ************************************************************************/
8691
8692/**
Owen Taylor3473f882001-02-23 17:55:21 +00008693 * xmlParseCtxtExternalEntity:
8694 * @ctx: the existing parsing context
8695 * @URL: the URL for the entity to load
8696 * @ID: the System ID for the entity to load
8697 * @list: the return value for the set of parsed nodes
8698 *
8699 * Parse an external general entity within an existing parsing context
8700 * An external general parsed entity is well-formed if it matches the
8701 * production labeled extParsedEnt.
8702 *
8703 * [78] extParsedEnt ::= TextDecl? content
8704 *
8705 * Returns 0 if the entity is well formed, -1 in case of args problem and
8706 * the parser error code otherwise
8707 */
8708
8709int
8710xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8711 const xmlChar *ID, xmlNodePtr *list) {
8712 xmlParserCtxtPtr ctxt;
8713 xmlDocPtr newDoc;
8714 xmlSAXHandlerPtr oldsax = NULL;
8715 int ret = 0;
8716
8717 if (ctx->depth > 40) {
8718 return(XML_ERR_ENTITY_LOOP);
8719 }
8720
8721 if (list != NULL)
8722 *list = NULL;
8723 if ((URL == NULL) && (ID == NULL))
8724 return(-1);
8725 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8726 return(-1);
8727
8728
8729 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8730 if (ctxt == NULL) return(-1);
8731 ctxt->userData = ctxt;
8732 oldsax = ctxt->sax;
8733 ctxt->sax = ctx->sax;
8734 newDoc = xmlNewDoc(BAD_CAST "1.0");
8735 if (newDoc == NULL) {
8736 xmlFreeParserCtxt(ctxt);
8737 return(-1);
8738 }
8739 if (ctx->myDoc != NULL) {
8740 newDoc->intSubset = ctx->myDoc->intSubset;
8741 newDoc->extSubset = ctx->myDoc->extSubset;
8742 }
8743 if (ctx->myDoc->URL != NULL) {
8744 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8745 }
8746 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8747 if (newDoc->children == NULL) {
8748 ctxt->sax = oldsax;
8749 xmlFreeParserCtxt(ctxt);
8750 newDoc->intSubset = NULL;
8751 newDoc->extSubset = NULL;
8752 xmlFreeDoc(newDoc);
8753 return(-1);
8754 }
8755 nodePush(ctxt, newDoc->children);
8756 if (ctx->myDoc == NULL) {
8757 ctxt->myDoc = newDoc;
8758 } else {
8759 ctxt->myDoc = ctx->myDoc;
8760 newDoc->children->doc = ctx->myDoc;
8761 }
8762
8763 /*
8764 * Parse a possible text declaration first
8765 */
8766 GROW;
8767 if ((RAW == '<') && (NXT(1) == '?') &&
8768 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8769 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8770 xmlParseTextDecl(ctxt);
8771 }
8772
8773 /*
8774 * Doing validity checking on chunk doesn't make sense
8775 */
8776 ctxt->instate = XML_PARSER_CONTENT;
8777 ctxt->validate = ctx->validate;
8778 ctxt->loadsubset = ctx->loadsubset;
8779 ctxt->depth = ctx->depth + 1;
8780 ctxt->replaceEntities = ctx->replaceEntities;
8781 if (ctxt->validate) {
8782 ctxt->vctxt.error = ctx->vctxt.error;
8783 ctxt->vctxt.warning = ctx->vctxt.warning;
8784 /* Allocate the Node stack */
8785 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8786 if (ctxt->vctxt.nodeTab == NULL) {
8787 xmlGenericError(xmlGenericErrorContext,
8788 "xmlParseCtxtExternalEntity: out of memory\n");
8789 ctxt->validate = 0;
8790 ctxt->vctxt.error = NULL;
8791 ctxt->vctxt.warning = NULL;
8792 } else {
8793 ctxt->vctxt.nodeNr = 0;
8794 ctxt->vctxt.nodeMax = 4;
8795 ctxt->vctxt.node = NULL;
8796 }
8797 } else {
8798 ctxt->vctxt.error = NULL;
8799 ctxt->vctxt.warning = NULL;
8800 }
8801
8802 xmlParseContent(ctxt);
8803
8804 if ((RAW == '<') && (NXT(1) == '/')) {
8805 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8807 ctxt->sax->error(ctxt->userData,
8808 "chunk is not well balanced\n");
8809 ctxt->wellFormed = 0;
8810 ctxt->disableSAX = 1;
8811 } else if (RAW != 0) {
8812 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8814 ctxt->sax->error(ctxt->userData,
8815 "extra content at the end of well balanced chunk\n");
8816 ctxt->wellFormed = 0;
8817 ctxt->disableSAX = 1;
8818 }
8819 if (ctxt->node != newDoc->children) {
8820 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8822 ctxt->sax->error(ctxt->userData,
8823 "chunk is not well balanced\n");
8824 ctxt->wellFormed = 0;
8825 ctxt->disableSAX = 1;
8826 }
8827
8828 if (!ctxt->wellFormed) {
8829 if (ctxt->errNo == 0)
8830 ret = 1;
8831 else
8832 ret = ctxt->errNo;
8833 } else {
8834 if (list != NULL) {
8835 xmlNodePtr cur;
8836
8837 /*
8838 * Return the newly created nodeset after unlinking it from
8839 * they pseudo parent.
8840 */
8841 cur = newDoc->children->children;
8842 *list = cur;
8843 while (cur != NULL) {
8844 cur->parent = NULL;
8845 cur = cur->next;
8846 }
8847 newDoc->children->children = NULL;
8848 }
8849 ret = 0;
8850 }
8851 ctxt->sax = oldsax;
8852 xmlFreeParserCtxt(ctxt);
8853 newDoc->intSubset = NULL;
8854 newDoc->extSubset = NULL;
8855 xmlFreeDoc(newDoc);
8856
8857 return(ret);
8858}
8859
8860/**
8861 * xmlParseExternalEntity:
8862 * @doc: the document the chunk pertains to
8863 * @sax: the SAX handler bloc (possibly NULL)
8864 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8865 * @depth: Used for loop detection, use 0
8866 * @URL: the URL for the entity to load
8867 * @ID: the System ID for the entity to load
8868 * @list: the return value for the set of parsed nodes
8869 *
8870 * Parse an external general entity
8871 * An external general parsed entity is well-formed if it matches the
8872 * production labeled extParsedEnt.
8873 *
8874 * [78] extParsedEnt ::= TextDecl? content
8875 *
8876 * Returns 0 if the entity is well formed, -1 in case of args problem and
8877 * the parser error code otherwise
8878 */
8879
8880int
8881xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
8882 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
8883 xmlParserCtxtPtr ctxt;
8884 xmlDocPtr newDoc;
8885 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillarde470df72001-04-18 21:41:07 +00008886 int oldexternal = ctxt->external;
Owen Taylor3473f882001-02-23 17:55:21 +00008887 int ret = 0;
8888
8889 if (depth > 40) {
8890 return(XML_ERR_ENTITY_LOOP);
8891 }
8892
8893
8894
8895 if (list != NULL)
8896 *list = NULL;
8897 if ((URL == NULL) && (ID == NULL))
8898 return(-1);
8899 if (doc == NULL) /* @@ relax but check for dereferences */
8900 return(-1);
8901
8902
8903 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8904 if (ctxt == NULL) return(-1);
8905 ctxt->userData = ctxt;
8906 if (sax != NULL) {
8907 oldsax = ctxt->sax;
8908 ctxt->sax = sax;
8909 if (user_data != NULL)
8910 ctxt->userData = user_data;
8911 }
8912 newDoc = xmlNewDoc(BAD_CAST "1.0");
8913 if (newDoc == NULL) {
8914 xmlFreeParserCtxt(ctxt);
8915 return(-1);
8916 }
8917 if (doc != NULL) {
8918 newDoc->intSubset = doc->intSubset;
8919 newDoc->extSubset = doc->extSubset;
8920 }
8921 if (doc->URL != NULL) {
8922 newDoc->URL = xmlStrdup(doc->URL);
8923 }
8924 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8925 if (newDoc->children == NULL) {
8926 if (sax != NULL)
8927 ctxt->sax = oldsax;
8928 xmlFreeParserCtxt(ctxt);
8929 newDoc->intSubset = NULL;
8930 newDoc->extSubset = NULL;
8931 xmlFreeDoc(newDoc);
8932 return(-1);
8933 }
8934 nodePush(ctxt, newDoc->children);
8935 if (doc == NULL) {
8936 ctxt->myDoc = newDoc;
8937 } else {
8938 ctxt->myDoc = doc;
8939 newDoc->children->doc = doc;
8940 }
8941
8942 /*
8943 * Parse a possible text declaration first
8944 */
8945 GROW;
8946 if ((RAW == '<') && (NXT(1) == '?') &&
8947 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8948 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8949 xmlParseTextDecl(ctxt);
8950 }
8951
8952 /*
8953 * Doing validity checking on chunk doesn't make sense
8954 */
8955 ctxt->instate = XML_PARSER_CONTENT;
8956 ctxt->validate = 0;
Daniel Veillarde470df72001-04-18 21:41:07 +00008957 ctxt->external = 2;
Owen Taylor3473f882001-02-23 17:55:21 +00008958 ctxt->loadsubset = 0;
8959 ctxt->depth = depth;
8960
8961 xmlParseContent(ctxt);
8962
8963 if ((RAW == '<') && (NXT(1) == '/')) {
8964 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8966 ctxt->sax->error(ctxt->userData,
8967 "chunk is not well balanced\n");
8968 ctxt->wellFormed = 0;
8969 ctxt->disableSAX = 1;
8970 } else if (RAW != 0) {
8971 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8973 ctxt->sax->error(ctxt->userData,
8974 "extra content at the end of well balanced chunk\n");
8975 ctxt->wellFormed = 0;
8976 ctxt->disableSAX = 1;
8977 }
8978 if (ctxt->node != newDoc->children) {
8979 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8981 ctxt->sax->error(ctxt->userData,
8982 "chunk is not well balanced\n");
8983 ctxt->wellFormed = 0;
8984 ctxt->disableSAX = 1;
8985 }
8986
8987 if (!ctxt->wellFormed) {
8988 if (ctxt->errNo == 0)
8989 ret = 1;
8990 else
8991 ret = ctxt->errNo;
8992 } else {
8993 if (list != NULL) {
8994 xmlNodePtr cur;
8995
8996 /*
8997 * Return the newly created nodeset after unlinking it from
8998 * they pseudo parent.
8999 */
9000 cur = newDoc->children->children;
9001 *list = cur;
9002 while (cur != NULL) {
9003 cur->parent = NULL;
9004 cur = cur->next;
9005 }
9006 newDoc->children->children = NULL;
9007 }
9008 ret = 0;
9009 }
9010 if (sax != NULL)
9011 ctxt->sax = oldsax;
9012 xmlFreeParserCtxt(ctxt);
9013 newDoc->intSubset = NULL;
9014 newDoc->extSubset = NULL;
9015 xmlFreeDoc(newDoc);
9016
9017 return(ret);
9018}
9019
9020/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009021 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009022 * @doc: the document the chunk pertains to
9023 * @sax: the SAX handler bloc (possibly NULL)
9024 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9025 * @depth: Used for loop detection, use 0
9026 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9027 * @list: the return value for the set of parsed nodes
9028 *
9029 * Parse a well-balanced chunk of an XML document
9030 * called by the parser
9031 * The allowed sequence for the Well Balanced Chunk is the one defined by
9032 * the content production in the XML grammar:
9033 *
9034 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9035 *
9036 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9037 * the parser error code otherwise
9038 */
9039
9040int
9041xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9042 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9043 xmlParserCtxtPtr ctxt;
9044 xmlDocPtr newDoc;
9045 xmlSAXHandlerPtr oldsax = NULL;
9046 int size;
9047 int ret = 0;
9048
9049 if (depth > 40) {
9050 return(XML_ERR_ENTITY_LOOP);
9051 }
9052
9053
9054 if (list != NULL)
9055 *list = NULL;
9056 if (string == NULL)
9057 return(-1);
9058
9059 size = xmlStrlen(string);
9060
9061 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9062 if (ctxt == NULL) return(-1);
9063 ctxt->userData = ctxt;
9064 if (sax != NULL) {
9065 oldsax = ctxt->sax;
9066 ctxt->sax = sax;
9067 if (user_data != NULL)
9068 ctxt->userData = user_data;
9069 }
9070 newDoc = xmlNewDoc(BAD_CAST "1.0");
9071 if (newDoc == NULL) {
9072 xmlFreeParserCtxt(ctxt);
9073 return(-1);
9074 }
9075 if (doc != NULL) {
9076 newDoc->intSubset = doc->intSubset;
9077 newDoc->extSubset = doc->extSubset;
9078 }
9079 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9080 if (newDoc->children == NULL) {
9081 if (sax != NULL)
9082 ctxt->sax = oldsax;
9083 xmlFreeParserCtxt(ctxt);
9084 newDoc->intSubset = NULL;
9085 newDoc->extSubset = NULL;
9086 xmlFreeDoc(newDoc);
9087 return(-1);
9088 }
9089 nodePush(ctxt, newDoc->children);
9090 if (doc == NULL) {
9091 ctxt->myDoc = newDoc;
9092 } else {
9093 ctxt->myDoc = doc;
9094 newDoc->children->doc = doc;
9095 }
9096 ctxt->instate = XML_PARSER_CONTENT;
9097 ctxt->depth = depth;
9098
9099 /*
9100 * Doing validity checking on chunk doesn't make sense
9101 */
9102 ctxt->validate = 0;
9103 ctxt->loadsubset = 0;
9104
9105 xmlParseContent(ctxt);
9106
9107 if ((RAW == '<') && (NXT(1) == '/')) {
9108 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9110 ctxt->sax->error(ctxt->userData,
9111 "chunk is not well balanced\n");
9112 ctxt->wellFormed = 0;
9113 ctxt->disableSAX = 1;
9114 } else if (RAW != 0) {
9115 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9117 ctxt->sax->error(ctxt->userData,
9118 "extra content at the end of well balanced chunk\n");
9119 ctxt->wellFormed = 0;
9120 ctxt->disableSAX = 1;
9121 }
9122 if (ctxt->node != newDoc->children) {
9123 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9125 ctxt->sax->error(ctxt->userData,
9126 "chunk is not well balanced\n");
9127 ctxt->wellFormed = 0;
9128 ctxt->disableSAX = 1;
9129 }
9130
9131 if (!ctxt->wellFormed) {
9132 if (ctxt->errNo == 0)
9133 ret = 1;
9134 else
9135 ret = ctxt->errNo;
9136 } else {
9137 if (list != NULL) {
9138 xmlNodePtr cur;
9139
9140 /*
9141 * Return the newly created nodeset after unlinking it from
9142 * they pseudo parent.
9143 */
9144 cur = newDoc->children->children;
9145 *list = cur;
9146 while (cur != NULL) {
9147 cur->parent = NULL;
9148 cur = cur->next;
9149 }
9150 newDoc->children->children = NULL;
9151 }
9152 ret = 0;
9153 }
9154 if (sax != NULL)
9155 ctxt->sax = oldsax;
9156 xmlFreeParserCtxt(ctxt);
9157 newDoc->intSubset = NULL;
9158 newDoc->extSubset = NULL;
9159 xmlFreeDoc(newDoc);
9160
9161 return(ret);
9162}
9163
9164/**
9165 * xmlSAXParseEntity:
9166 * @sax: the SAX handler block
9167 * @filename: the filename
9168 *
9169 * parse an XML external entity out of context and build a tree.
9170 * It use the given SAX function block to handle the parsing callback.
9171 * If sax is NULL, fallback to the default DOM tree building routines.
9172 *
9173 * [78] extParsedEnt ::= TextDecl? content
9174 *
9175 * This correspond to a "Well Balanced" chunk
9176 *
9177 * Returns the resulting document tree
9178 */
9179
9180xmlDocPtr
9181xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9182 xmlDocPtr ret;
9183 xmlParserCtxtPtr ctxt;
9184 char *directory = NULL;
9185
9186 ctxt = xmlCreateFileParserCtxt(filename);
9187 if (ctxt == NULL) {
9188 return(NULL);
9189 }
9190 if (sax != NULL) {
9191 if (ctxt->sax != NULL)
9192 xmlFree(ctxt->sax);
9193 ctxt->sax = sax;
9194 ctxt->userData = NULL;
9195 }
9196
9197 if ((ctxt->directory == NULL) && (directory == NULL))
9198 directory = xmlParserGetDirectory(filename);
9199
9200 xmlParseExtParsedEnt(ctxt);
9201
9202 if (ctxt->wellFormed)
9203 ret = ctxt->myDoc;
9204 else {
9205 ret = NULL;
9206 xmlFreeDoc(ctxt->myDoc);
9207 ctxt->myDoc = NULL;
9208 }
9209 if (sax != NULL)
9210 ctxt->sax = NULL;
9211 xmlFreeParserCtxt(ctxt);
9212
9213 return(ret);
9214}
9215
9216/**
9217 * xmlParseEntity:
9218 * @filename: the filename
9219 *
9220 * parse an XML external entity out of context and build a tree.
9221 *
9222 * [78] extParsedEnt ::= TextDecl? content
9223 *
9224 * This correspond to a "Well Balanced" chunk
9225 *
9226 * Returns the resulting document tree
9227 */
9228
9229xmlDocPtr
9230xmlParseEntity(const char *filename) {
9231 return(xmlSAXParseEntity(NULL, filename));
9232}
9233
9234/**
9235 * xmlCreateEntityParserCtxt:
9236 * @URL: the entity URL
9237 * @ID: the entity PUBLIC ID
9238 * @base: a posible base for the target URI
9239 *
9240 * Create a parser context for an external entity
9241 * Automatic support for ZLIB/Compress compressed document is provided
9242 * by default if found at compile-time.
9243 *
9244 * Returns the new parser context or NULL
9245 */
9246xmlParserCtxtPtr
9247xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9248 const xmlChar *base) {
9249 xmlParserCtxtPtr ctxt;
9250 xmlParserInputPtr inputStream;
9251 char *directory = NULL;
9252 xmlChar *uri;
9253
9254 ctxt = xmlNewParserCtxt();
9255 if (ctxt == NULL) {
9256 return(NULL);
9257 }
9258
9259 uri = xmlBuildURI(URL, base);
9260
9261 if (uri == NULL) {
9262 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9263 if (inputStream == NULL) {
9264 xmlFreeParserCtxt(ctxt);
9265 return(NULL);
9266 }
9267
9268 inputPush(ctxt, inputStream);
9269
9270 if ((ctxt->directory == NULL) && (directory == NULL))
9271 directory = xmlParserGetDirectory((char *)URL);
9272 if ((ctxt->directory == NULL) && (directory != NULL))
9273 ctxt->directory = directory;
9274 } else {
9275 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9276 if (inputStream == NULL) {
9277 xmlFree(uri);
9278 xmlFreeParserCtxt(ctxt);
9279 return(NULL);
9280 }
9281
9282 inputPush(ctxt, inputStream);
9283
9284 if ((ctxt->directory == NULL) && (directory == NULL))
9285 directory = xmlParserGetDirectory((char *)uri);
9286 if ((ctxt->directory == NULL) && (directory != NULL))
9287 ctxt->directory = directory;
9288 xmlFree(uri);
9289 }
9290
9291 return(ctxt);
9292}
9293
9294/************************************************************************
9295 * *
9296 * Front ends when parsing from a file *
9297 * *
9298 ************************************************************************/
9299
9300/**
9301 * xmlCreateFileParserCtxt:
9302 * @filename: the filename
9303 *
9304 * Create a parser context for a file content.
9305 * Automatic support for ZLIB/Compress compressed document is provided
9306 * by default if found at compile-time.
9307 *
9308 * Returns the new parser context or NULL
9309 */
9310xmlParserCtxtPtr
9311xmlCreateFileParserCtxt(const char *filename)
9312{
9313 xmlParserCtxtPtr ctxt;
9314 xmlParserInputPtr inputStream;
9315 xmlParserInputBufferPtr buf;
9316 char *directory = NULL;
9317
9318 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9319 if (buf == NULL) {
9320 return(NULL);
9321 }
9322
9323 ctxt = xmlNewParserCtxt();
9324 if (ctxt == NULL) {
9325 if (xmlDefaultSAXHandler.error != NULL) {
9326 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9327 }
9328 return(NULL);
9329 }
9330
9331 inputStream = xmlNewInputStream(ctxt);
9332 if (inputStream == NULL) {
9333 xmlFreeParserCtxt(ctxt);
9334 return(NULL);
9335 }
9336
9337 inputStream->filename = xmlMemStrdup(filename);
9338 inputStream->buf = buf;
9339 inputStream->base = inputStream->buf->buffer->content;
9340 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009341 inputStream->end =
9342 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009343
9344 inputPush(ctxt, inputStream);
9345 if ((ctxt->directory == NULL) && (directory == NULL))
9346 directory = xmlParserGetDirectory(filename);
9347 if ((ctxt->directory == NULL) && (directory != NULL))
9348 ctxt->directory = directory;
9349
9350 return(ctxt);
9351}
9352
9353/**
9354 * xmlSAXParseFile:
9355 * @sax: the SAX handler block
9356 * @filename: the filename
9357 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9358 * documents
9359 *
9360 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9361 * compressed document is provided by default if found at compile-time.
9362 * It use the given SAX function block to handle the parsing callback.
9363 * If sax is NULL, fallback to the default DOM tree building routines.
9364 *
9365 * Returns the resulting document tree
9366 */
9367
9368xmlDocPtr
9369xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9370 int recovery) {
9371 xmlDocPtr ret;
9372 xmlParserCtxtPtr ctxt;
9373 char *directory = NULL;
9374
9375 ctxt = xmlCreateFileParserCtxt(filename);
9376 if (ctxt == NULL) {
9377 return(NULL);
9378 }
9379 if (sax != NULL) {
9380 if (ctxt->sax != NULL)
9381 xmlFree(ctxt->sax);
9382 ctxt->sax = sax;
9383 ctxt->userData = NULL;
9384 }
9385
9386 if ((ctxt->directory == NULL) && (directory == NULL))
9387 directory = xmlParserGetDirectory(filename);
9388 if ((ctxt->directory == NULL) && (directory != NULL))
9389 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9390
9391 xmlParseDocument(ctxt);
9392
9393 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9394 else {
9395 ret = NULL;
9396 xmlFreeDoc(ctxt->myDoc);
9397 ctxt->myDoc = NULL;
9398 }
9399 if (sax != NULL)
9400 ctxt->sax = NULL;
9401 xmlFreeParserCtxt(ctxt);
9402
9403 return(ret);
9404}
9405
9406/**
9407 * xmlRecoverDoc:
9408 * @cur: a pointer to an array of xmlChar
9409 *
9410 * parse an XML in-memory document and build a tree.
9411 * In the case the document is not Well Formed, a tree is built anyway
9412 *
9413 * Returns the resulting document tree
9414 */
9415
9416xmlDocPtr
9417xmlRecoverDoc(xmlChar *cur) {
9418 return(xmlSAXParseDoc(NULL, cur, 1));
9419}
9420
9421/**
9422 * xmlParseFile:
9423 * @filename: the filename
9424 *
9425 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9426 * compressed document is provided by default if found at compile-time.
9427 *
9428 * Returns the resulting document tree
9429 */
9430
9431xmlDocPtr
9432xmlParseFile(const char *filename) {
9433 return(xmlSAXParseFile(NULL, filename, 0));
9434}
9435
9436/**
9437 * xmlRecoverFile:
9438 * @filename: the filename
9439 *
9440 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9441 * compressed document is provided by default if found at compile-time.
9442 * In the case the document is not Well Formed, a tree is built anyway
9443 *
9444 * Returns the resulting document tree
9445 */
9446
9447xmlDocPtr
9448xmlRecoverFile(const char *filename) {
9449 return(xmlSAXParseFile(NULL, filename, 1));
9450}
9451
9452
9453/**
9454 * xmlSetupParserForBuffer:
9455 * @ctxt: an XML parser context
9456 * @buffer: a xmlChar * buffer
9457 * @filename: a file name
9458 *
9459 * Setup the parser context to parse a new buffer; Clears any prior
9460 * contents from the parser context. The buffer parameter must not be
9461 * NULL, but the filename parameter can be
9462 */
9463void
9464xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9465 const char* filename)
9466{
9467 xmlParserInputPtr input;
9468
9469 input = xmlNewInputStream(ctxt);
9470 if (input == NULL) {
9471 perror("malloc");
9472 xmlFree(ctxt);
9473 return;
9474 }
9475
9476 xmlClearParserCtxt(ctxt);
9477 if (filename != NULL)
9478 input->filename = xmlMemStrdup(filename);
9479 input->base = buffer;
9480 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009481 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009482 inputPush(ctxt, input);
9483}
9484
9485/**
9486 * xmlSAXUserParseFile:
9487 * @sax: a SAX handler
9488 * @user_data: The user data returned on SAX callbacks
9489 * @filename: a file name
9490 *
9491 * parse an XML file and call the given SAX handler routines.
9492 * Automatic support for ZLIB/Compress compressed document is provided
9493 *
9494 * Returns 0 in case of success or a error number otherwise
9495 */
9496int
9497xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9498 const char *filename) {
9499 int ret = 0;
9500 xmlParserCtxtPtr ctxt;
9501
9502 ctxt = xmlCreateFileParserCtxt(filename);
9503 if (ctxt == NULL) return -1;
9504 if (ctxt->sax != &xmlDefaultSAXHandler)
9505 xmlFree(ctxt->sax);
9506 ctxt->sax = sax;
9507 if (user_data != NULL)
9508 ctxt->userData = user_data;
9509
9510 xmlParseDocument(ctxt);
9511
9512 if (ctxt->wellFormed)
9513 ret = 0;
9514 else {
9515 if (ctxt->errNo != 0)
9516 ret = ctxt->errNo;
9517 else
9518 ret = -1;
9519 }
9520 if (sax != NULL)
9521 ctxt->sax = NULL;
9522 xmlFreeParserCtxt(ctxt);
9523
9524 return ret;
9525}
9526
9527/************************************************************************
9528 * *
9529 * Front ends when parsing from memory *
9530 * *
9531 ************************************************************************/
9532
9533/**
9534 * xmlCreateMemoryParserCtxt:
9535 * @buffer: a pointer to a char array
9536 * @size: the size of the array
9537 *
9538 * Create a parser context for an XML in-memory document.
9539 *
9540 * Returns the new parser context or NULL
9541 */
9542xmlParserCtxtPtr
9543xmlCreateMemoryParserCtxt(char *buffer, int size) {
9544 xmlParserCtxtPtr ctxt;
9545 xmlParserInputPtr input;
9546 xmlParserInputBufferPtr buf;
9547
9548 if (buffer == NULL)
9549 return(NULL);
9550 if (size <= 0)
9551 return(NULL);
9552
9553 ctxt = xmlNewParserCtxt();
9554 if (ctxt == NULL)
9555 return(NULL);
9556
9557 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9558 if (buf == NULL) return(NULL);
9559
9560 input = xmlNewInputStream(ctxt);
9561 if (input == NULL) {
9562 xmlFreeParserCtxt(ctxt);
9563 return(NULL);
9564 }
9565
9566 input->filename = NULL;
9567 input->buf = buf;
9568 input->base = input->buf->buffer->content;
9569 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009570 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009571
9572 inputPush(ctxt, input);
9573 return(ctxt);
9574}
9575
9576/**
9577 * xmlSAXParseMemory:
9578 * @sax: the SAX handler block
9579 * @buffer: an pointer to a char array
9580 * @size: the size of the array
9581 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9582 * documents
9583 *
9584 * parse an XML in-memory block and use the given SAX function block
9585 * to handle the parsing callback. If sax is NULL, fallback to the default
9586 * DOM tree building routines.
9587 *
9588 * Returns the resulting document tree
9589 */
9590xmlDocPtr
9591xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9592 xmlDocPtr ret;
9593 xmlParserCtxtPtr ctxt;
9594
9595 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9596 if (ctxt == NULL) return(NULL);
9597 if (sax != NULL) {
9598 ctxt->sax = sax;
9599 ctxt->userData = NULL;
9600 }
9601
9602 xmlParseDocument(ctxt);
9603
9604 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9605 else {
9606 ret = NULL;
9607 xmlFreeDoc(ctxt->myDoc);
9608 ctxt->myDoc = NULL;
9609 }
9610 if (sax != NULL)
9611 ctxt->sax = NULL;
9612 xmlFreeParserCtxt(ctxt);
9613
9614 return(ret);
9615}
9616
9617/**
9618 * xmlParseMemory:
9619 * @buffer: an pointer to a char array
9620 * @size: the size of the array
9621 *
9622 * parse an XML in-memory block and build a tree.
9623 *
9624 * Returns the resulting document tree
9625 */
9626
9627xmlDocPtr xmlParseMemory(char *buffer, int size) {
9628 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9629}
9630
9631/**
9632 * xmlRecoverMemory:
9633 * @buffer: an pointer to a char array
9634 * @size: the size of the array
9635 *
9636 * parse an XML in-memory block and build a tree.
9637 * In the case the document is not Well Formed, a tree is built anyway
9638 *
9639 * Returns the resulting document tree
9640 */
9641
9642xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9643 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9644}
9645
9646/**
9647 * xmlSAXUserParseMemory:
9648 * @sax: a SAX handler
9649 * @user_data: The user data returned on SAX callbacks
9650 * @buffer: an in-memory XML document input
9651 * @size: the length of the XML document in bytes
9652 *
9653 * A better SAX parsing routine.
9654 * parse an XML in-memory buffer and call the given SAX handler routines.
9655 *
9656 * Returns 0 in case of success or a error number otherwise
9657 */
9658int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9659 char *buffer, int size) {
9660 int ret = 0;
9661 xmlParserCtxtPtr ctxt;
9662 xmlSAXHandlerPtr oldsax = NULL;
9663
9664 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9665 if (ctxt == NULL) return -1;
9666 if (sax != NULL) {
9667 oldsax = ctxt->sax;
9668 ctxt->sax = sax;
9669 }
9670 ctxt->userData = user_data;
9671
9672 xmlParseDocument(ctxt);
9673
9674 if (ctxt->wellFormed)
9675 ret = 0;
9676 else {
9677 if (ctxt->errNo != 0)
9678 ret = ctxt->errNo;
9679 else
9680 ret = -1;
9681 }
9682 if (sax != NULL) {
9683 ctxt->sax = oldsax;
9684 }
9685 xmlFreeParserCtxt(ctxt);
9686
9687 return ret;
9688}
9689
9690/**
9691 * xmlCreateDocParserCtxt:
9692 * @cur: a pointer to an array of xmlChar
9693 *
9694 * Creates a parser context for an XML in-memory document.
9695 *
9696 * Returns the new parser context or NULL
9697 */
9698xmlParserCtxtPtr
9699xmlCreateDocParserCtxt(xmlChar *cur) {
9700 int len;
9701
9702 if (cur == NULL)
9703 return(NULL);
9704 len = xmlStrlen(cur);
9705 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9706}
9707
9708/**
9709 * xmlSAXParseDoc:
9710 * @sax: the SAX handler block
9711 * @cur: a pointer to an array of xmlChar
9712 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9713 * documents
9714 *
9715 * parse an XML in-memory document and build a tree.
9716 * It use the given SAX function block to handle the parsing callback.
9717 * If sax is NULL, fallback to the default DOM tree building routines.
9718 *
9719 * Returns the resulting document tree
9720 */
9721
9722xmlDocPtr
9723xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9724 xmlDocPtr ret;
9725 xmlParserCtxtPtr ctxt;
9726
9727 if (cur == NULL) return(NULL);
9728
9729
9730 ctxt = xmlCreateDocParserCtxt(cur);
9731 if (ctxt == NULL) return(NULL);
9732 if (sax != NULL) {
9733 ctxt->sax = sax;
9734 ctxt->userData = NULL;
9735 }
9736
9737 xmlParseDocument(ctxt);
9738 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9739 else {
9740 ret = NULL;
9741 xmlFreeDoc(ctxt->myDoc);
9742 ctxt->myDoc = NULL;
9743 }
9744 if (sax != NULL)
9745 ctxt->sax = NULL;
9746 xmlFreeParserCtxt(ctxt);
9747
9748 return(ret);
9749}
9750
9751/**
9752 * xmlParseDoc:
9753 * @cur: a pointer to an array of xmlChar
9754 *
9755 * parse an XML in-memory document and build a tree.
9756 *
9757 * Returns the resulting document tree
9758 */
9759
9760xmlDocPtr
9761xmlParseDoc(xmlChar *cur) {
9762 return(xmlSAXParseDoc(NULL, cur, 0));
9763}
9764
9765
9766/************************************************************************
9767 * *
9768 * Miscellaneous *
9769 * *
9770 ************************************************************************/
9771
9772#ifdef LIBXML_XPATH_ENABLED
9773#include <libxml/xpath.h>
9774#endif
9775
9776static int xmlParserInitialized = 0;
9777
9778/**
9779 * xmlInitParser:
9780 *
9781 * Initialization function for the XML parser.
9782 * This is not reentrant. Call once before processing in case of
9783 * use in multithreaded programs.
9784 */
9785
9786void
9787xmlInitParser(void) {
9788 if (xmlParserInitialized) return;
9789
9790 xmlInitCharEncodingHandlers();
9791 xmlInitializePredefinedEntities();
9792 xmlDefaultSAXHandlerInit();
9793 xmlRegisterDefaultInputCallbacks();
9794 xmlRegisterDefaultOutputCallbacks();
9795#ifdef LIBXML_HTML_ENABLED
9796 htmlInitAutoClose();
9797 htmlDefaultSAXHandlerInit();
9798#endif
9799#ifdef LIBXML_XPATH_ENABLED
9800 xmlXPathInit();
9801#endif
9802 xmlParserInitialized = 1;
9803}
9804
9805/**
9806 * xmlCleanupParser:
9807 *
9808 * Cleanup function for the XML parser. It tries to reclaim all
9809 * parsing related global memory allocated for the parser processing.
9810 * It doesn't deallocate any document related memory. Calling this
9811 * function should not prevent reusing the parser.
9812 */
9813
9814void
9815xmlCleanupParser(void) {
9816 xmlParserInitialized = 0;
9817 xmlCleanupCharEncodingHandlers();
9818 xmlCleanupPredefinedEntities();
9819}
9820
9821/**
9822 * xmlPedanticParserDefault:
9823 * @val: int 0 or 1
9824 *
9825 * Set and return the previous value for enabling pedantic warnings.
9826 *
9827 * Returns the last value for 0 for no substitution, 1 for substitution.
9828 */
9829
9830int
9831xmlPedanticParserDefault(int val) {
9832 int old = xmlPedanticParserDefaultValue;
9833
9834 xmlPedanticParserDefaultValue = val;
9835 return(old);
9836}
9837
9838/**
9839 * xmlSubstituteEntitiesDefault:
9840 * @val: int 0 or 1
9841 *
9842 * Set and return the previous value for default entity support.
9843 * Initially the parser always keep entity references instead of substituting
9844 * entity values in the output. This function has to be used to change the
9845 * default parser behaviour
9846 * SAX::subtituteEntities() has to be used for changing that on a file by
9847 * file basis.
9848 *
9849 * Returns the last value for 0 for no substitution, 1 for substitution.
9850 */
9851
9852int
9853xmlSubstituteEntitiesDefault(int val) {
9854 int old = xmlSubstituteEntitiesDefaultValue;
9855
9856 xmlSubstituteEntitiesDefaultValue = val;
9857 return(old);
9858}
9859
9860/**
9861 * xmlKeepBlanksDefault:
9862 * @val: int 0 or 1
9863 *
9864 * Set and return the previous value for default blanks text nodes support.
9865 * The 1.x version of the parser used an heuristic to try to detect
9866 * ignorable white spaces. As a result the SAX callback was generating
9867 * ignorableWhitespace() callbacks instead of characters() one, and when
9868 * using the DOM output text nodes containing those blanks were not generated.
9869 * The 2.x and later version will switch to the XML standard way and
9870 * ignorableWhitespace() are only generated when running the parser in
9871 * validating mode and when the current element doesn't allow CDATA or
9872 * mixed content.
9873 * This function is provided as a way to force the standard behaviour
9874 * on 1.X libs and to switch back to the old mode for compatibility when
9875 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9876 * by using xmlIsBlankNode() commodity function to detect the "empty"
9877 * nodes generated.
9878 * This value also affect autogeneration of indentation when saving code
9879 * if blanks sections are kept, indentation is not generated.
9880 *
9881 * Returns the last value for 0 for no substitution, 1 for substitution.
9882 */
9883
9884int
9885xmlKeepBlanksDefault(int val) {
9886 int old = xmlKeepBlanksDefaultValue;
9887
9888 xmlKeepBlanksDefaultValue = val;
9889 xmlIndentTreeOutput = !val;
9890 return(old);
9891}
9892