blob: 75d22216552fd12a26f8933c2b45765526136340 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Owen Taylor3473f882001-02-23 17:55:21 +0000108/************************************************************************
109 * *
110 * Parser stacks related functions and macros *
111 * *
112 ************************************************************************/
113
114xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
115 const xmlChar ** str);
116
117/*
118 * Generic function for accessing stacks in the Parser Context
119 */
120
121#define PUSH_AND_POP(scope, type, name) \
122scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
123 if (ctxt->name##Nr >= ctxt->name##Max) { \
124 ctxt->name##Max *= 2; \
125 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
126 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
127 if (ctxt->name##Tab == NULL) { \
128 xmlGenericError(xmlGenericErrorContext, \
129 "realloc failed !\n"); \
130 return(0); \
131 } \
132 } \
133 ctxt->name##Tab[ctxt->name##Nr] = value; \
134 ctxt->name = value; \
135 return(ctxt->name##Nr++); \
136} \
137scope type name##Pop(xmlParserCtxtPtr ctxt) { \
138 type ret; \
139 if (ctxt->name##Nr <= 0) return(0); \
140 ctxt->name##Nr--; \
141 if (ctxt->name##Nr > 0) \
142 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
143 else \
144 ctxt->name = NULL; \
145 ret = ctxt->name##Tab[ctxt->name##Nr]; \
146 ctxt->name##Tab[ctxt->name##Nr] = 0; \
147 return(ret); \
148} \
149
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000150/**
151 * inputPop:
152 * @ctxt: an XML parser context
153 *
154 * Pops the top parser input from the input stack
155 *
156 * Returns the input just removed
157 */
158/**
159 * inputPush:
160 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 *
163 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000164 *
165 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000166 */
167/**
168 * namePop:
169 * @ctxt: an XML parser context
170 *
171 * Pops the top element name from the name stack
172 *
173 * Returns the name just removed
174 */
175/**
176 * namePush:
177 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 *
180 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000181 *
182 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 */
184/**
185 * nodePop:
186 * @ctxt: an XML parser context
187 *
188 * Pops the top element node from the node stack
189 *
190 * Returns the node just removed
191 */
192/**
193 * nodePush:
194 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000196 *
197 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000198 *
199 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000200 */
Owen Taylor3473f882001-02-23 17:55:21 +0000201/*
202 * Those macros actually generate the functions
203 */
204PUSH_AND_POP(extern, xmlParserInputPtr, input)
205PUSH_AND_POP(extern, xmlNodePtr, node)
206PUSH_AND_POP(extern, xmlChar*, name)
207
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000208static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000209 if (ctxt->spaceNr >= ctxt->spaceMax) {
210 ctxt->spaceMax *= 2;
211 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
212 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
213 if (ctxt->spaceTab == NULL) {
214 xmlGenericError(xmlGenericErrorContext,
215 "realloc failed !\n");
216 return(0);
217 }
218 }
219 ctxt->spaceTab[ctxt->spaceNr] = val;
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
221 return(ctxt->spaceNr++);
222}
223
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000224static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000225 int ret;
226 if (ctxt->spaceNr <= 0) return(0);
227 ctxt->spaceNr--;
228 if (ctxt->spaceNr > 0)
229 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
230 else
231 ctxt->space = NULL;
232 ret = ctxt->spaceTab[ctxt->spaceNr];
233 ctxt->spaceTab[ctxt->spaceNr] = -1;
234 return(ret);
235}
236
237/*
238 * Macros for accessing the content. Those should be used only by the parser,
239 * and not exported.
240 *
241 * Dirty macros, i.e. one often need to make assumption on the context to
242 * use them
243 *
244 * CUR_PTR return the current pointer to the xmlChar to be parsed.
245 * To be used with extreme caution since operations consuming
246 * characters may move the input buffer to a different location !
247 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
248 * This should be used internally by the parser
249 * only to compare to ASCII values otherwise it would break when
250 * running with UTF-8 encoding.
251 * RAW same as CUR but in the input buffer, bypass any token
252 * extraction that may have been done
253 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
254 * to compare on ASCII based substring.
255 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
256 * strings within the parser.
257 *
258 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
259 *
260 * NEXT Skip to the next character, this does the proper decoding
261 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000262 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000263 * CUR_CHAR(l) returns the current unicode character (int), set l
264 * to the number of xmlChars used for the encoding [0-5].
265 * CUR_SCHAR same but operate on a string instead of the context
266 * COPY_BUF copy the current unicode char to the target buffer, increment
267 * the index
268 * GROW, SHRINK handling of input buffers
269 */
270
Daniel Veillardfdc91562002-07-01 21:52:03 +0000271#define RAW (*ctxt->input->cur)
272#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000273#define NXT(val) ctxt->input->cur[(val)]
274#define CUR_PTR ctxt->input->cur
275
276#define SKIP(val) do { \
277 ctxt->nbChars += (val),ctxt->input->cur += (val); \
278 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000279 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
281 xmlPopInput(ctxt); \
282 } while (0)
283
Daniel Veillard46de64e2002-05-29 08:21:33 +0000284#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
285 xmlSHRINK (ctxt);
286
287static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
288 xmlParserInputShrink(ctxt->input);
289 if ((*ctxt->input->cur == 0) &&
290 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
291 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000292 }
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard46de64e2002-05-29 08:21:33 +0000294#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
295 xmlGROW (ctxt);
296
297static void xmlGROW (xmlParserCtxtPtr ctxt) {
298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
299 if ((*ctxt->input->cur == 0) &&
300 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
301 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000302 }
Owen Taylor3473f882001-02-23 17:55:21 +0000303
304#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
305
306#define NEXT xmlNextChar(ctxt)
307
Daniel Veillard21a0f912001-02-25 19:54:14 +0000308#define NEXT1 { \
309 ctxt->input->cur++; \
310 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000311 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000312 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
313 }
314
Owen Taylor3473f882001-02-23 17:55:21 +0000315#define NEXTL(l) do { \
316 if (*(ctxt->input->cur) == '\n') { \
317 ctxt->input->line++; ctxt->input->col = 1; \
318 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000319 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000320 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000321 } while (0)
322
323#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
324#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
325
326#define COPY_BUF(l,b,i,v) \
327 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000328 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000329
330/**
331 * xmlSkipBlankChars:
332 * @ctxt: the XML parser context
333 *
334 * skip all blanks character found at that point in the input streams.
335 * It pops up finished entities in the process if allowable at that point.
336 *
337 * Returns the number of space chars skipped
338 */
339
340int
341xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000342 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000343
344 /*
345 * It's Okay to use CUR/NEXT here since all the blanks are on
346 * the ASCII range.
347 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000348 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
349 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000350 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000351 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000352 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000353 cur = ctxt->input->cur;
354 while (IS_BLANK(*cur)) {
355 if (*cur == '\n') {
356 ctxt->input->line++; ctxt->input->col = 1;
357 }
358 cur++;
359 res++;
360 if (*cur == 0) {
361 ctxt->input->cur = cur;
362 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
363 cur = ctxt->input->cur;
364 }
365 }
366 ctxt->input->cur = cur;
367 } else {
368 int cur;
369 do {
370 cur = CUR;
371 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
372 NEXT;
373 cur = CUR;
374 res++;
375 }
376 while ((cur == 0) && (ctxt->inputNr > 1) &&
377 (ctxt->instate != XML_PARSER_COMMENT)) {
378 xmlPopInput(ctxt);
379 cur = CUR;
380 }
381 /*
382 * Need to handle support of entities branching here
383 */
384 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
385 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
386 }
Owen Taylor3473f882001-02-23 17:55:21 +0000387 return(res);
388}
389
390/************************************************************************
391 * *
392 * Commodity functions to handle entities *
393 * *
394 ************************************************************************/
395
396/**
397 * xmlPopInput:
398 * @ctxt: an XML parser context
399 *
400 * xmlPopInput: the current input pointed by ctxt->input came to an end
401 * pop it and return the next char.
402 *
403 * Returns the current xmlChar in the parser context
404 */
405xmlChar
406xmlPopInput(xmlParserCtxtPtr ctxt) {
407 if (ctxt->inputNr == 1) return(0); /* End of main Input */
408 if (xmlParserDebugEntities)
409 xmlGenericError(xmlGenericErrorContext,
410 "Popping input %d\n", ctxt->inputNr);
411 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000412 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000413 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
414 return(xmlPopInput(ctxt));
415 return(CUR);
416}
417
418/**
419 * xmlPushInput:
420 * @ctxt: an XML parser context
421 * @input: an XML parser input fragment (entity, XML fragment ...).
422 *
423 * xmlPushInput: switch to a new input stream which is stacked on top
424 * of the previous one(s).
425 */
426void
427xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
428 if (input == NULL) return;
429
430 if (xmlParserDebugEntities) {
431 if ((ctxt->input != NULL) && (ctxt->input->filename))
432 xmlGenericError(xmlGenericErrorContext,
433 "%s(%d): ", ctxt->input->filename,
434 ctxt->input->line);
435 xmlGenericError(xmlGenericErrorContext,
436 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
437 }
438 inputPush(ctxt, input);
439 GROW;
440}
441
442/**
443 * xmlParseCharRef:
444 * @ctxt: an XML parser context
445 *
446 * parse Reference declarations
447 *
448 * [66] CharRef ::= '&#' [0-9]+ ';' |
449 * '&#x' [0-9a-fA-F]+ ';'
450 *
451 * [ WFC: Legal Character ]
452 * Characters referred to using character references must match the
453 * production for Char.
454 *
455 * Returns the value parsed (as an int), 0 in case of error
456 */
457int
458xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000459 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000460 int count = 0;
461
Owen Taylor3473f882001-02-23 17:55:21 +0000462 /*
463 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
464 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000465 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000466 (NXT(2) == 'x')) {
467 SKIP(3);
468 GROW;
469 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000470 if (count++ > 20) {
471 count = 0;
472 GROW;
473 }
474 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000475 val = val * 16 + (CUR - '0');
476 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
477 val = val * 16 + (CUR - 'a') + 10;
478 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
479 val = val * 16 + (CUR - 'A') + 10;
480 else {
481 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
483 ctxt->sax->error(ctxt->userData,
484 "xmlParseCharRef: invalid hexadecimal value\n");
485 ctxt->wellFormed = 0;
486 ctxt->disableSAX = 1;
487 val = 0;
488 break;
489 }
490 NEXT;
491 count++;
492 }
493 if (RAW == ';') {
494 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
495 ctxt->nbChars ++;
496 ctxt->input->cur++;
497 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000498 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000499 SKIP(2);
500 GROW;
501 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000502 if (count++ > 20) {
503 count = 0;
504 GROW;
505 }
506 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000507 val = val * 10 + (CUR - '0');
508 else {
509 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
511 ctxt->sax->error(ctxt->userData,
512 "xmlParseCharRef: invalid decimal value\n");
513 ctxt->wellFormed = 0;
514 ctxt->disableSAX = 1;
515 val = 0;
516 break;
517 }
518 NEXT;
519 count++;
520 }
521 if (RAW == ';') {
522 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
523 ctxt->nbChars ++;
524 ctxt->input->cur++;
525 }
526 } else {
527 ctxt->errNo = XML_ERR_INVALID_CHARREF;
528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
529 ctxt->sax->error(ctxt->userData,
530 "xmlParseCharRef: invalid value\n");
531 ctxt->wellFormed = 0;
532 ctxt->disableSAX = 1;
533 }
534
535 /*
536 * [ WFC: Legal Character ]
537 * Characters referred to using character references must match the
538 * production for Char.
539 */
540 if (IS_CHAR(val)) {
541 return(val);
542 } else {
543 ctxt->errNo = XML_ERR_INVALID_CHAR;
544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000545 ctxt->sax->error(ctxt->userData,
546 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000547 val);
548 ctxt->wellFormed = 0;
549 ctxt->disableSAX = 1;
550 }
551 return(0);
552}
553
554/**
555 * xmlParseStringCharRef:
556 * @ctxt: an XML parser context
557 * @str: a pointer to an index in the string
558 *
559 * parse Reference declarations, variant parsing from a string rather
560 * than an an input flow.
561 *
562 * [66] CharRef ::= '&#' [0-9]+ ';' |
563 * '&#x' [0-9a-fA-F]+ ';'
564 *
565 * [ WFC: Legal Character ]
566 * Characters referred to using character references must match the
567 * production for Char.
568 *
569 * Returns the value parsed (as an int), 0 in case of error, str will be
570 * updated to the current value of the index
571 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000572static int
Owen Taylor3473f882001-02-23 17:55:21 +0000573xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
574 const xmlChar *ptr;
575 xmlChar cur;
576 int val = 0;
577
578 if ((str == NULL) || (*str == NULL)) return(0);
579 ptr = *str;
580 cur = *ptr;
581 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
582 ptr += 3;
583 cur = *ptr;
584 while (cur != ';') { /* Non input consuming loop */
585 if ((cur >= '0') && (cur <= '9'))
586 val = val * 16 + (cur - '0');
587 else if ((cur >= 'a') && (cur <= 'f'))
588 val = val * 16 + (cur - 'a') + 10;
589 else if ((cur >= 'A') && (cur <= 'F'))
590 val = val * 16 + (cur - 'A') + 10;
591 else {
592 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
594 ctxt->sax->error(ctxt->userData,
595 "xmlParseStringCharRef: invalid hexadecimal value\n");
596 ctxt->wellFormed = 0;
597 ctxt->disableSAX = 1;
598 val = 0;
599 break;
600 }
601 ptr++;
602 cur = *ptr;
603 }
604 if (cur == ';')
605 ptr++;
606 } else if ((cur == '&') && (ptr[1] == '#')){
607 ptr += 2;
608 cur = *ptr;
609 while (cur != ';') { /* Non input consuming loops */
610 if ((cur >= '0') && (cur <= '9'))
611 val = val * 10 + (cur - '0');
612 else {
613 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
615 ctxt->sax->error(ctxt->userData,
616 "xmlParseStringCharRef: invalid decimal value\n");
617 ctxt->wellFormed = 0;
618 ctxt->disableSAX = 1;
619 val = 0;
620 break;
621 }
622 ptr++;
623 cur = *ptr;
624 }
625 if (cur == ';')
626 ptr++;
627 } else {
628 ctxt->errNo = XML_ERR_INVALID_CHARREF;
629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
630 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000631 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000632 ctxt->wellFormed = 0;
633 ctxt->disableSAX = 1;
634 return(0);
635 }
636 *str = ptr;
637
638 /*
639 * [ WFC: Legal Character ]
640 * Characters referred to using character references must match the
641 * production for Char.
642 */
643 if (IS_CHAR(val)) {
644 return(val);
645 } else {
646 ctxt->errNo = XML_ERR_INVALID_CHAR;
647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
648 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000649 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000650 ctxt->wellFormed = 0;
651 ctxt->disableSAX = 1;
652 }
653 return(0);
654}
655
656/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000657 * xmlNewBlanksWrapperInputStream:
658 * @ctxt: an XML parser context
659 * @entity: an Entity pointer
660 *
661 * Create a new input stream for wrapping
662 * blanks around a PEReference
663 *
664 * Returns the new input stream or NULL
665 */
666
667static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
668
Daniel Veillardf4862f02002-09-10 11:13:43 +0000669static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000670xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
671 xmlParserInputPtr input;
672 xmlChar *buffer;
673 size_t length;
674 if (entity == NULL) {
675 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
677 ctxt->sax->error(ctxt->userData,
678 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
679 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
680 return(NULL);
681 }
682 if (xmlParserDebugEntities)
683 xmlGenericError(xmlGenericErrorContext,
684 "new blanks wrapper for entity: %s\n", entity->name);
685 input = xmlNewInputStream(ctxt);
686 if (input == NULL) {
687 return(NULL);
688 }
689 length = xmlStrlen(entity->name) + 5;
690 buffer = xmlMalloc(length);
691 if (buffer == NULL) {
692 return(NULL);
693 }
694 buffer [0] = ' ';
695 buffer [1] = '%';
696 buffer [length-3] = ';';
697 buffer [length-2] = ' ';
698 buffer [length-1] = 0;
699 memcpy(buffer + 2, entity->name, length - 5);
700 input->free = deallocblankswrapper;
701 input->base = buffer;
702 input->cur = buffer;
703 input->length = length;
704 input->end = &buffer[length];
705 return(input);
706}
707
708/**
Owen Taylor3473f882001-02-23 17:55:21 +0000709 * xmlParserHandlePEReference:
710 * @ctxt: the parser context
711 *
712 * [69] PEReference ::= '%' Name ';'
713 *
714 * [ WFC: No Recursion ]
715 * A parsed entity must not contain a recursive
716 * reference to itself, either directly or indirectly.
717 *
718 * [ WFC: Entity Declared ]
719 * In a document without any DTD, a document with only an internal DTD
720 * subset which contains no parameter entity references, or a document
721 * with "standalone='yes'", ... ... The declaration of a parameter
722 * entity must precede any reference to it...
723 *
724 * [ VC: Entity Declared ]
725 * In a document with an external subset or external parameter entities
726 * with "standalone='no'", ... ... The declaration of a parameter entity
727 * must precede any reference to it...
728 *
729 * [ WFC: In DTD ]
730 * Parameter-entity references may only appear in the DTD.
731 * NOTE: misleading but this is handled.
732 *
733 * A PEReference may have been detected in the current input stream
734 * the handling is done accordingly to
735 * http://www.w3.org/TR/REC-xml#entproc
736 * i.e.
737 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000738 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000739 */
740void
741xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
742 xmlChar *name;
743 xmlEntityPtr entity = NULL;
744 xmlParserInputPtr input;
745
Owen Taylor3473f882001-02-23 17:55:21 +0000746 if (RAW != '%') return;
747 switch(ctxt->instate) {
748 case XML_PARSER_CDATA_SECTION:
749 return;
750 case XML_PARSER_COMMENT:
751 return;
752 case XML_PARSER_START_TAG:
753 return;
754 case XML_PARSER_END_TAG:
755 return;
756 case XML_PARSER_EOF:
757 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
759 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
760 ctxt->wellFormed = 0;
761 ctxt->disableSAX = 1;
762 return;
763 case XML_PARSER_PROLOG:
764 case XML_PARSER_START:
765 case XML_PARSER_MISC:
766 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
768 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
769 ctxt->wellFormed = 0;
770 ctxt->disableSAX = 1;
771 return;
772 case XML_PARSER_ENTITY_DECL:
773 case XML_PARSER_CONTENT:
774 case XML_PARSER_ATTRIBUTE_VALUE:
775 case XML_PARSER_PI:
776 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000777 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000778 /* we just ignore it there */
779 return;
780 case XML_PARSER_EPILOG:
781 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
783 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
784 ctxt->wellFormed = 0;
785 ctxt->disableSAX = 1;
786 return;
787 case XML_PARSER_ENTITY_VALUE:
788 /*
789 * NOTE: in the case of entity values, we don't do the
790 * substitution here since we need the literal
791 * entity value to be able to save the internal
792 * subset of the document.
793 * This will be handled by xmlStringDecodeEntities
794 */
795 return;
796 case XML_PARSER_DTD:
797 /*
798 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
799 * In the internal DTD subset, parameter-entity references
800 * can occur only where markup declarations can occur, not
801 * within markup declarations.
802 * In that case this is handled in xmlParseMarkupDecl
803 */
804 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
805 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000806 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
807 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000808 break;
809 case XML_PARSER_IGNORE:
810 return;
811 }
812
813 NEXT;
814 name = xmlParseName(ctxt);
815 if (xmlParserDebugEntities)
816 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000817 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000818 if (name == NULL) {
819 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000821 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000822 ctxt->wellFormed = 0;
823 ctxt->disableSAX = 1;
824 } else {
825 if (RAW == ';') {
826 NEXT;
827 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
828 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
829 if (entity == NULL) {
830
831 /*
832 * [ WFC: Entity Declared ]
833 * In a document without any DTD, a document with only an
834 * internal DTD subset which contains no parameter entity
835 * references, or a document with "standalone='yes'", ...
836 * ... The declaration of a parameter entity must precede
837 * any reference to it...
838 */
839 if ((ctxt->standalone == 1) ||
840 ((ctxt->hasExternalSubset == 0) &&
841 (ctxt->hasPErefs == 0))) {
842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
843 ctxt->sax->error(ctxt->userData,
844 "PEReference: %%%s; not found\n", name);
845 ctxt->wellFormed = 0;
846 ctxt->disableSAX = 1;
847 } else {
848 /*
849 * [ VC: Entity Declared ]
850 * In a document with an external subset or external
851 * parameter entities with "standalone='no'", ...
852 * ... The declaration of a parameter entity must precede
853 * any reference to it...
854 */
855 if ((!ctxt->disableSAX) &&
856 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
857 ctxt->vctxt.error(ctxt->vctxt.userData,
858 "PEReference: %%%s; not found\n", name);
859 } else if ((!ctxt->disableSAX) &&
860 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
861 ctxt->sax->warning(ctxt->userData,
862 "PEReference: %%%s; not found\n", name);
863 ctxt->valid = 0;
864 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000865 } else if (ctxt->input->free != deallocblankswrapper) {
866 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
867 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000868 } else {
869 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
870 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000871 xmlChar start[4];
872 xmlCharEncoding enc;
873
Owen Taylor3473f882001-02-23 17:55:21 +0000874 /*
875 * handle the extra spaces added before and after
876 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000877 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000878 */
879 input = xmlNewEntityInputStream(ctxt, entity);
880 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000881
882 /*
883 * Get the 4 first bytes and decode the charset
884 * if enc != XML_CHAR_ENCODING_NONE
885 * plug some encoding conversion routines.
886 */
887 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000888 if (entity->length >= 4) {
889 start[0] = RAW;
890 start[1] = NXT(1);
891 start[2] = NXT(2);
892 start[3] = NXT(3);
893 enc = xmlDetectCharEncoding(start, 4);
894 if (enc != XML_CHAR_ENCODING_NONE) {
895 xmlSwitchEncoding(ctxt, enc);
896 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000897 }
898
Owen Taylor3473f882001-02-23 17:55:21 +0000899 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
900 (RAW == '<') && (NXT(1) == '?') &&
901 (NXT(2) == 'x') && (NXT(3) == 'm') &&
902 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
903 xmlParseTextDecl(ctxt);
904 }
Owen Taylor3473f882001-02-23 17:55:21 +0000905 } else {
906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
907 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000908 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000909 name);
910 ctxt->wellFormed = 0;
911 ctxt->disableSAX = 1;
912 }
913 }
914 } else {
915 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
917 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000918 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000919 ctxt->wellFormed = 0;
920 ctxt->disableSAX = 1;
921 }
922 xmlFree(name);
923 }
924}
925
926/*
927 * Macro used to grow the current buffer.
928 */
929#define growBuffer(buffer) { \
930 buffer##_size *= 2; \
931 buffer = (xmlChar *) \
932 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
933 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000934 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +0000935 return(NULL); \
936 } \
937}
938
939/**
940 * xmlStringDecodeEntities:
941 * @ctxt: the parser context
942 * @str: the input string
943 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
944 * @end: an end marker xmlChar, 0 if none
945 * @end2: an end marker xmlChar, 0 if none
946 * @end3: an end marker xmlChar, 0 if none
947 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000948 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000949 *
950 * [67] Reference ::= EntityRef | CharRef
951 *
952 * [69] PEReference ::= '%' Name ';'
953 *
954 * Returns A newly allocated string with the substitution done. The caller
955 * must deallocate it !
956 */
957xmlChar *
958xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
959 xmlChar end, xmlChar end2, xmlChar end3) {
960 xmlChar *buffer = NULL;
961 int buffer_size = 0;
962
963 xmlChar *current = NULL;
964 xmlEntityPtr ent;
965 int c,l;
966 int nbchars = 0;
967
968 if (str == NULL)
969 return(NULL);
970
971 if (ctxt->depth > 40) {
972 ctxt->errNo = XML_ERR_ENTITY_LOOP;
973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
974 ctxt->sax->error(ctxt->userData,
975 "Detected entity reference loop\n");
976 ctxt->wellFormed = 0;
977 ctxt->disableSAX = 1;
978 return(NULL);
979 }
980
981 /*
982 * allocate a translation buffer.
983 */
984 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
985 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
986 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000987 xmlGenericError(xmlGenericErrorContext,
988 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000989 return(NULL);
990 }
991
992 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000993 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000994 * we are operating on already parsed values.
995 */
996 c = CUR_SCHAR(str, l);
997 while ((c != 0) && (c != end) && /* non input consuming loop */
998 (c != end2) && (c != end3)) {
999
1000 if (c == 0) break;
1001 if ((c == '&') && (str[1] == '#')) {
1002 int val = xmlParseStringCharRef(ctxt, &str);
1003 if (val != 0) {
1004 COPY_BUF(0,buffer,nbchars,val);
1005 }
1006 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1007 if (xmlParserDebugEntities)
1008 xmlGenericError(xmlGenericErrorContext,
1009 "String decoding Entity Reference: %.30s\n",
1010 str);
1011 ent = xmlParseStringEntityRef(ctxt, &str);
1012 if ((ent != NULL) &&
1013 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1014 if (ent->content != NULL) {
1015 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1016 } else {
1017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1018 ctxt->sax->error(ctxt->userData,
1019 "internal error entity has no content\n");
1020 }
1021 } else if ((ent != NULL) && (ent->content != NULL)) {
1022 xmlChar *rep;
1023
1024 ctxt->depth++;
1025 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1026 0, 0, 0);
1027 ctxt->depth--;
1028 if (rep != NULL) {
1029 current = rep;
1030 while (*current != 0) { /* non input consuming loop */
1031 buffer[nbchars++] = *current++;
1032 if (nbchars >
1033 buffer_size - XML_PARSER_BUFFER_SIZE) {
1034 growBuffer(buffer);
1035 }
1036 }
1037 xmlFree(rep);
1038 }
1039 } else if (ent != NULL) {
1040 int i = xmlStrlen(ent->name);
1041 const xmlChar *cur = ent->name;
1042
1043 buffer[nbchars++] = '&';
1044 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1045 growBuffer(buffer);
1046 }
1047 for (;i > 0;i--)
1048 buffer[nbchars++] = *cur++;
1049 buffer[nbchars++] = ';';
1050 }
1051 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1052 if (xmlParserDebugEntities)
1053 xmlGenericError(xmlGenericErrorContext,
1054 "String decoding PE Reference: %.30s\n", str);
1055 ent = xmlParseStringPEReference(ctxt, &str);
1056 if (ent != NULL) {
1057 xmlChar *rep;
1058
1059 ctxt->depth++;
1060 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1061 0, 0, 0);
1062 ctxt->depth--;
1063 if (rep != NULL) {
1064 current = rep;
1065 while (*current != 0) { /* non input consuming loop */
1066 buffer[nbchars++] = *current++;
1067 if (nbchars >
1068 buffer_size - XML_PARSER_BUFFER_SIZE) {
1069 growBuffer(buffer);
1070 }
1071 }
1072 xmlFree(rep);
1073 }
1074 }
1075 } else {
1076 COPY_BUF(l,buffer,nbchars,c);
1077 str += l;
1078 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1079 growBuffer(buffer);
1080 }
1081 }
1082 c = CUR_SCHAR(str, l);
1083 }
1084 buffer[nbchars++] = 0;
1085 return(buffer);
1086}
1087
1088
1089/************************************************************************
1090 * *
1091 * Commodity functions to handle xmlChars *
1092 * *
1093 ************************************************************************/
1094
1095/**
1096 * xmlStrndup:
1097 * @cur: the input xmlChar *
1098 * @len: the len of @cur
1099 *
1100 * a strndup for array of xmlChar's
1101 *
1102 * Returns a new xmlChar * or NULL
1103 */
1104xmlChar *
1105xmlStrndup(const xmlChar *cur, int len) {
1106 xmlChar *ret;
1107
1108 if ((cur == NULL) || (len < 0)) return(NULL);
1109 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1110 if (ret == NULL) {
1111 xmlGenericError(xmlGenericErrorContext,
1112 "malloc of %ld byte failed\n",
1113 (len + 1) * (long)sizeof(xmlChar));
1114 return(NULL);
1115 }
1116 memcpy(ret, cur, len * sizeof(xmlChar));
1117 ret[len] = 0;
1118 return(ret);
1119}
1120
1121/**
1122 * xmlStrdup:
1123 * @cur: the input xmlChar *
1124 *
1125 * a strdup for array of xmlChar's. Since they are supposed to be
1126 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1127 * a termination mark of '0'.
1128 *
1129 * Returns a new xmlChar * or NULL
1130 */
1131xmlChar *
1132xmlStrdup(const xmlChar *cur) {
1133 const xmlChar *p = cur;
1134
1135 if (cur == NULL) return(NULL);
1136 while (*p != 0) p++; /* non input consuming */
1137 return(xmlStrndup(cur, p - cur));
1138}
1139
1140/**
1141 * xmlCharStrndup:
1142 * @cur: the input char *
1143 * @len: the len of @cur
1144 *
1145 * a strndup for char's to xmlChar's
1146 *
1147 * Returns a new xmlChar * or NULL
1148 */
1149
1150xmlChar *
1151xmlCharStrndup(const char *cur, int len) {
1152 int i;
1153 xmlChar *ret;
1154
1155 if ((cur == NULL) || (len < 0)) return(NULL);
1156 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1157 if (ret == NULL) {
1158 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1159 (len + 1) * (long)sizeof(xmlChar));
1160 return(NULL);
1161 }
1162 for (i = 0;i < len;i++)
1163 ret[i] = (xmlChar) cur[i];
1164 ret[len] = 0;
1165 return(ret);
1166}
1167
1168/**
1169 * xmlCharStrdup:
1170 * @cur: the input char *
1171 * @len: the len of @cur
1172 *
1173 * a strdup for char's to xmlChar's
1174 *
1175 * Returns a new xmlChar * or NULL
1176 */
1177
1178xmlChar *
1179xmlCharStrdup(const char *cur) {
1180 const char *p = cur;
1181
1182 if (cur == NULL) return(NULL);
1183 while (*p != '\0') p++; /* non input consuming */
1184 return(xmlCharStrndup(cur, p - cur));
1185}
1186
1187/**
1188 * xmlStrcmp:
1189 * @str1: the first xmlChar *
1190 * @str2: the second xmlChar *
1191 *
1192 * a strcmp for xmlChar's
1193 *
1194 * Returns the integer result of the comparison
1195 */
1196
1197int
1198xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1199 register int tmp;
1200
1201 if (str1 == str2) return(0);
1202 if (str1 == NULL) return(-1);
1203 if (str2 == NULL) return(1);
1204 do {
1205 tmp = *str1++ - *str2;
1206 if (tmp != 0) return(tmp);
1207 } while (*str2++ != 0);
1208 return 0;
1209}
1210
1211/**
1212 * xmlStrEqual:
1213 * @str1: the first xmlChar *
1214 * @str2: the second xmlChar *
1215 *
1216 * Check if both string are equal of have same content
1217 * Should be a bit more readable and faster than xmlStrEqual()
1218 *
1219 * Returns 1 if they are equal, 0 if they are different
1220 */
1221
1222int
1223xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1224 if (str1 == str2) return(1);
1225 if (str1 == NULL) return(0);
1226 if (str2 == NULL) return(0);
1227 do {
1228 if (*str1++ != *str2) return(0);
1229 } while (*str2++);
1230 return(1);
1231}
1232
1233/**
1234 * xmlStrncmp:
1235 * @str1: the first xmlChar *
1236 * @str2: the second xmlChar *
1237 * @len: the max comparison length
1238 *
1239 * a strncmp for xmlChar's
1240 *
1241 * Returns the integer result of the comparison
1242 */
1243
1244int
1245xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1246 register int tmp;
1247
1248 if (len <= 0) return(0);
1249 if (str1 == str2) return(0);
1250 if (str1 == NULL) return(-1);
1251 if (str2 == NULL) return(1);
1252 do {
1253 tmp = *str1++ - *str2;
1254 if (tmp != 0 || --len == 0) return(tmp);
1255 } while (*str2++ != 0);
1256 return 0;
1257}
1258
Daniel Veillardb44025c2001-10-11 22:55:55 +00001259static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001260 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1261 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1262 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1263 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1264 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1265 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1266 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1267 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1268 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1269 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1270 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1271 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1272 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1273 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1274 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1275 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1276 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1277 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1278 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1279 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1280 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1281 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1282 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1283 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1284 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1285 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1286 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1287 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1288 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1289 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1290 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1291 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1292};
1293
1294/**
1295 * xmlStrcasecmp:
1296 * @str1: the first xmlChar *
1297 * @str2: the second xmlChar *
1298 *
1299 * a strcasecmp for xmlChar's
1300 *
1301 * Returns the integer result of the comparison
1302 */
1303
1304int
1305xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1306 register int tmp;
1307
1308 if (str1 == str2) return(0);
1309 if (str1 == NULL) return(-1);
1310 if (str2 == NULL) return(1);
1311 do {
1312 tmp = casemap[*str1++] - casemap[*str2];
1313 if (tmp != 0) return(tmp);
1314 } while (*str2++ != 0);
1315 return 0;
1316}
1317
1318/**
1319 * xmlStrncasecmp:
1320 * @str1: the first xmlChar *
1321 * @str2: the second xmlChar *
1322 * @len: the max comparison length
1323 *
1324 * a strncasecmp for xmlChar's
1325 *
1326 * Returns the integer result of the comparison
1327 */
1328
1329int
1330xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1331 register int tmp;
1332
1333 if (len <= 0) return(0);
1334 if (str1 == str2) return(0);
1335 if (str1 == NULL) return(-1);
1336 if (str2 == NULL) return(1);
1337 do {
1338 tmp = casemap[*str1++] - casemap[*str2];
1339 if (tmp != 0 || --len == 0) return(tmp);
1340 } while (*str2++ != 0);
1341 return 0;
1342}
1343
1344/**
1345 * xmlStrchr:
1346 * @str: the xmlChar * array
1347 * @val: the xmlChar to search
1348 *
1349 * a strchr for xmlChar's
1350 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001351 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001352 */
1353
1354const xmlChar *
1355xmlStrchr(const xmlChar *str, xmlChar val) {
1356 if (str == NULL) return(NULL);
1357 while (*str != 0) { /* non input consuming */
1358 if (*str == val) return((xmlChar *) str);
1359 str++;
1360 }
1361 return(NULL);
1362}
1363
1364/**
1365 * xmlStrstr:
1366 * @str: the xmlChar * array (haystack)
1367 * @val: the xmlChar to search (needle)
1368 *
1369 * a strstr for xmlChar's
1370 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001371 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001372 */
1373
1374const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001375xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001376 int n;
1377
1378 if (str == NULL) return(NULL);
1379 if (val == NULL) return(NULL);
1380 n = xmlStrlen(val);
1381
1382 if (n == 0) return(str);
1383 while (*str != 0) { /* non input consuming */
1384 if (*str == *val) {
1385 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1386 }
1387 str++;
1388 }
1389 return(NULL);
1390}
1391
1392/**
1393 * xmlStrcasestr:
1394 * @str: the xmlChar * array (haystack)
1395 * @val: the xmlChar to search (needle)
1396 *
1397 * a case-ignoring strstr for xmlChar's
1398 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001399 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001400 */
1401
1402const xmlChar *
1403xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1404 int n;
1405
1406 if (str == NULL) return(NULL);
1407 if (val == NULL) return(NULL);
1408 n = xmlStrlen(val);
1409
1410 if (n == 0) return(str);
1411 while (*str != 0) { /* non input consuming */
1412 if (casemap[*str] == casemap[*val])
1413 if (!xmlStrncasecmp(str, val, n)) return(str);
1414 str++;
1415 }
1416 return(NULL);
1417}
1418
1419/**
1420 * xmlStrsub:
1421 * @str: the xmlChar * array (haystack)
1422 * @start: the index of the first char (zero based)
1423 * @len: the length of the substring
1424 *
1425 * Extract a substring of a given string
1426 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001427 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001428 */
1429
1430xmlChar *
1431xmlStrsub(const xmlChar *str, int start, int len) {
1432 int i;
1433
1434 if (str == NULL) return(NULL);
1435 if (start < 0) return(NULL);
1436 if (len < 0) return(NULL);
1437
1438 for (i = 0;i < start;i++) {
1439 if (*str == 0) return(NULL);
1440 str++;
1441 }
1442 if (*str == 0) return(NULL);
1443 return(xmlStrndup(str, len));
1444}
1445
1446/**
1447 * xmlStrlen:
1448 * @str: the xmlChar * array
1449 *
1450 * length of a xmlChar's string
1451 *
1452 * Returns the number of xmlChar contained in the ARRAY.
1453 */
1454
1455int
1456xmlStrlen(const xmlChar *str) {
1457 int len = 0;
1458
1459 if (str == NULL) return(0);
1460 while (*str != 0) { /* non input consuming */
1461 str++;
1462 len++;
1463 }
1464 return(len);
1465}
1466
1467/**
1468 * xmlStrncat:
1469 * @cur: the original xmlChar * array
1470 * @add: the xmlChar * array added
1471 * @len: the length of @add
1472 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001473 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001474 * first bytes of @add.
1475 *
1476 * Returns a new xmlChar *, the original @cur is reallocated if needed
1477 * and should not be freed
1478 */
1479
1480xmlChar *
1481xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1482 int size;
1483 xmlChar *ret;
1484
1485 if ((add == NULL) || (len == 0))
1486 return(cur);
1487 if (cur == NULL)
1488 return(xmlStrndup(add, len));
1489
1490 size = xmlStrlen(cur);
1491 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1492 if (ret == NULL) {
1493 xmlGenericError(xmlGenericErrorContext,
1494 "xmlStrncat: realloc of %ld byte failed\n",
1495 (size + len + 1) * (long)sizeof(xmlChar));
1496 return(cur);
1497 }
1498 memcpy(&ret[size], add, len * sizeof(xmlChar));
1499 ret[size + len] = 0;
1500 return(ret);
1501}
1502
1503/**
1504 * xmlStrcat:
1505 * @cur: the original xmlChar * array
1506 * @add: the xmlChar * array added
1507 *
1508 * a strcat for array of xmlChar's. Since they are supposed to be
1509 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1510 * a termination mark of '0'.
1511 *
1512 * Returns a new xmlChar * containing the concatenated string.
1513 */
1514xmlChar *
1515xmlStrcat(xmlChar *cur, const xmlChar *add) {
1516 const xmlChar *p = add;
1517
1518 if (add == NULL) return(cur);
1519 if (cur == NULL)
1520 return(xmlStrdup(add));
1521
1522 while (*p != 0) p++; /* non input consuming */
1523 return(xmlStrncat(cur, add, p - add));
1524}
1525
1526/************************************************************************
1527 * *
1528 * Commodity functions, cleanup needed ? *
1529 * *
1530 ************************************************************************/
1531
1532/**
1533 * areBlanks:
1534 * @ctxt: an XML parser context
1535 * @str: a xmlChar *
1536 * @len: the size of @str
1537 *
1538 * Is this a sequence of blank chars that one can ignore ?
1539 *
1540 * Returns 1 if ignorable 0 otherwise.
1541 */
1542
1543static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1544 int i, ret;
1545 xmlNodePtr lastChild;
1546
Daniel Veillard05c13a22001-09-09 08:38:09 +00001547 /*
1548 * Don't spend time trying to differentiate them, the same callback is
1549 * used !
1550 */
1551 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001552 return(0);
1553
Owen Taylor3473f882001-02-23 17:55:21 +00001554 /*
1555 * Check for xml:space value.
1556 */
1557 if (*(ctxt->space) == 1)
1558 return(0);
1559
1560 /*
1561 * Check that the string is made of blanks
1562 */
1563 for (i = 0;i < len;i++)
1564 if (!(IS_BLANK(str[i]))) return(0);
1565
1566 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001567 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001568 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001569 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001570 if (ctxt->myDoc != NULL) {
1571 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1572 if (ret == 0) return(1);
1573 if (ret == 1) return(0);
1574 }
1575
1576 /*
1577 * Otherwise, heuristic :-\
1578 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001579 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001580 if ((ctxt->node->children == NULL) &&
1581 (RAW == '<') && (NXT(1) == '/')) return(0);
1582
1583 lastChild = xmlGetLastChild(ctxt->node);
1584 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001585 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1586 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001587 } else if (xmlNodeIsText(lastChild))
1588 return(0);
1589 else if ((ctxt->node->children != NULL) &&
1590 (xmlNodeIsText(ctxt->node->children)))
1591 return(0);
1592 return(1);
1593}
1594
Owen Taylor3473f882001-02-23 17:55:21 +00001595/************************************************************************
1596 * *
1597 * Extra stuff for namespace support *
1598 * Relates to http://www.w3.org/TR/WD-xml-names *
1599 * *
1600 ************************************************************************/
1601
1602/**
1603 * xmlSplitQName:
1604 * @ctxt: an XML parser context
1605 * @name: an XML parser context
1606 * @prefix: a xmlChar **
1607 *
1608 * parse an UTF8 encoded XML qualified name string
1609 *
1610 * [NS 5] QName ::= (Prefix ':')? LocalPart
1611 *
1612 * [NS 6] Prefix ::= NCName
1613 *
1614 * [NS 7] LocalPart ::= NCName
1615 *
1616 * Returns the local part, and prefix is updated
1617 * to get the Prefix if any.
1618 */
1619
1620xmlChar *
1621xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1622 xmlChar buf[XML_MAX_NAMELEN + 5];
1623 xmlChar *buffer = NULL;
1624 int len = 0;
1625 int max = XML_MAX_NAMELEN;
1626 xmlChar *ret = NULL;
1627 const xmlChar *cur = name;
1628 int c;
1629
1630 *prefix = NULL;
1631
1632#ifndef XML_XML_NAMESPACE
1633 /* xml: prefix is not really a namespace */
1634 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1635 (cur[2] == 'l') && (cur[3] == ':'))
1636 return(xmlStrdup(name));
1637#endif
1638
1639 /* nasty but valid */
1640 if (cur[0] == ':')
1641 return(xmlStrdup(name));
1642
1643 c = *cur++;
1644 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1645 buf[len++] = c;
1646 c = *cur++;
1647 }
1648 if (len >= max) {
1649 /*
1650 * Okay someone managed to make a huge name, so he's ready to pay
1651 * for the processing speed.
1652 */
1653 max = len * 2;
1654
1655 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1656 if (buffer == NULL) {
1657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1658 ctxt->sax->error(ctxt->userData,
1659 "xmlSplitQName: out of memory\n");
1660 return(NULL);
1661 }
1662 memcpy(buffer, buf, len);
1663 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1664 if (len + 10 > max) {
1665 max *= 2;
1666 buffer = (xmlChar *) xmlRealloc(buffer,
1667 max * sizeof(xmlChar));
1668 if (buffer == NULL) {
1669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1670 ctxt->sax->error(ctxt->userData,
1671 "xmlSplitQName: out of memory\n");
1672 return(NULL);
1673 }
1674 }
1675 buffer[len++] = c;
1676 c = *cur++;
1677 }
1678 buffer[len] = 0;
1679 }
1680
1681 if (buffer == NULL)
1682 ret = xmlStrndup(buf, len);
1683 else {
1684 ret = buffer;
1685 buffer = NULL;
1686 max = XML_MAX_NAMELEN;
1687 }
1688
1689
1690 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001691 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001692 if (c == 0) return(ret);
1693 *prefix = ret;
1694 len = 0;
1695
Daniel Veillardbb284f42002-10-16 18:02:47 +00001696 /*
1697 * Check that the first character is proper to start
1698 * a new name
1699 */
1700 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1701 ((c >= 0x41) && (c <= 0x5A)) ||
1702 (c == '_') || (c == ':'))) {
1703 int l;
1704 int first = CUR_SCHAR(cur, l);
1705
1706 if (!IS_LETTER(first) && (first != '_')) {
1707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1708 ctxt->sax->error(ctxt->userData,
1709 "Name %s is not XML Namespace compliant\n",
1710 name);
1711 }
1712 }
1713 cur++;
1714
Owen Taylor3473f882001-02-23 17:55:21 +00001715 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1716 buf[len++] = c;
1717 c = *cur++;
1718 }
1719 if (len >= max) {
1720 /*
1721 * Okay someone managed to make a huge name, so he's ready to pay
1722 * for the processing speed.
1723 */
1724 max = len * 2;
1725
1726 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1727 if (buffer == NULL) {
1728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1729 ctxt->sax->error(ctxt->userData,
1730 "xmlSplitQName: out of memory\n");
1731 return(NULL);
1732 }
1733 memcpy(buffer, buf, len);
1734 while (c != 0) { /* tested bigname2.xml */
1735 if (len + 10 > max) {
1736 max *= 2;
1737 buffer = (xmlChar *) xmlRealloc(buffer,
1738 max * sizeof(xmlChar));
1739 if (buffer == NULL) {
1740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1741 ctxt->sax->error(ctxt->userData,
1742 "xmlSplitQName: out of memory\n");
1743 return(NULL);
1744 }
1745 }
1746 buffer[len++] = c;
1747 c = *cur++;
1748 }
1749 buffer[len] = 0;
1750 }
1751
1752 if (buffer == NULL)
1753 ret = xmlStrndup(buf, len);
1754 else {
1755 ret = buffer;
1756 }
1757 }
1758
1759 return(ret);
1760}
1761
1762/************************************************************************
1763 * *
1764 * The parser itself *
1765 * Relates to http://www.w3.org/TR/REC-xml *
1766 * *
1767 ************************************************************************/
1768
Daniel Veillard76d66f42001-05-16 21:05:17 +00001769static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001770/**
1771 * xmlParseName:
1772 * @ctxt: an XML parser context
1773 *
1774 * parse an XML name.
1775 *
1776 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1777 * CombiningChar | Extender
1778 *
1779 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1780 *
1781 * [6] Names ::= Name (S Name)*
1782 *
1783 * Returns the Name parsed or NULL
1784 */
1785
1786xmlChar *
1787xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001788 const xmlChar *in;
1789 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001790 int count = 0;
1791
1792 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001793
1794 /*
1795 * Accelerator for simple ASCII names
1796 */
1797 in = ctxt->input->cur;
1798 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1799 ((*in >= 0x41) && (*in <= 0x5A)) ||
1800 (*in == '_') || (*in == ':')) {
1801 in++;
1802 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1803 ((*in >= 0x41) && (*in <= 0x5A)) ||
1804 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001805 (*in == '_') || (*in == '-') ||
1806 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001807 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001808 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001809 count = in - ctxt->input->cur;
1810 ret = xmlStrndup(ctxt->input->cur, count);
1811 ctxt->input->cur = in;
1812 return(ret);
1813 }
1814 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001815 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001816}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001817
Daniel Veillard46de64e2002-05-29 08:21:33 +00001818/**
1819 * xmlParseNameAndCompare:
1820 * @ctxt: an XML parser context
1821 *
1822 * parse an XML name and compares for match
1823 * (specialized for endtag parsing)
1824 *
1825 *
1826 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1827 * and the name for mismatch
1828 */
1829
Daniel Veillardf4862f02002-09-10 11:13:43 +00001830static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001831xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1832 const xmlChar *cmp = other;
1833 const xmlChar *in;
1834 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001835
1836 GROW;
1837
1838 in = ctxt->input->cur;
1839 while (*in != 0 && *in == *cmp) {
1840 ++in;
1841 ++cmp;
1842 }
1843 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1844 /* success */
1845 ctxt->input->cur = in;
1846 return (xmlChar*) 1;
1847 }
1848 /* failure (or end of input buffer), check with full function */
1849 ret = xmlParseName (ctxt);
1850 if (ret != 0 && xmlStrEqual (ret, other)) {
1851 xmlFree (ret);
1852 return (xmlChar*) 1;
1853 }
1854 return ret;
1855}
1856
Daniel Veillard76d66f42001-05-16 21:05:17 +00001857static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001858xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1859 xmlChar buf[XML_MAX_NAMELEN + 5];
1860 int len = 0, l;
1861 int c;
1862 int count = 0;
1863
1864 /*
1865 * Handler for more complex cases
1866 */
1867 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001868 c = CUR_CHAR(l);
1869 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1870 (!IS_LETTER(c) && (c != '_') &&
1871 (c != ':'))) {
1872 return(NULL);
1873 }
1874
1875 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1876 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1877 (c == '.') || (c == '-') ||
1878 (c == '_') || (c == ':') ||
1879 (IS_COMBINING(c)) ||
1880 (IS_EXTENDER(c)))) {
1881 if (count++ > 100) {
1882 count = 0;
1883 GROW;
1884 }
1885 COPY_BUF(l,buf,len,c);
1886 NEXTL(l);
1887 c = CUR_CHAR(l);
1888 if (len >= XML_MAX_NAMELEN) {
1889 /*
1890 * Okay someone managed to make a huge name, so he's ready to pay
1891 * for the processing speed.
1892 */
1893 xmlChar *buffer;
1894 int max = len * 2;
1895
1896 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1897 if (buffer == NULL) {
1898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1899 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001900 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001901 return(NULL);
1902 }
1903 memcpy(buffer, buf, len);
1904 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1905 (c == '.') || (c == '-') ||
1906 (c == '_') || (c == ':') ||
1907 (IS_COMBINING(c)) ||
1908 (IS_EXTENDER(c))) {
1909 if (count++ > 100) {
1910 count = 0;
1911 GROW;
1912 }
1913 if (len + 10 > max) {
1914 max *= 2;
1915 buffer = (xmlChar *) xmlRealloc(buffer,
1916 max * sizeof(xmlChar));
1917 if (buffer == NULL) {
1918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1919 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001920 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001921 return(NULL);
1922 }
1923 }
1924 COPY_BUF(l,buffer,len,c);
1925 NEXTL(l);
1926 c = CUR_CHAR(l);
1927 }
1928 buffer[len] = 0;
1929 return(buffer);
1930 }
1931 }
1932 return(xmlStrndup(buf, len));
1933}
1934
1935/**
1936 * xmlParseStringName:
1937 * @ctxt: an XML parser context
1938 * @str: a pointer to the string pointer (IN/OUT)
1939 *
1940 * parse an XML name.
1941 *
1942 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1943 * CombiningChar | Extender
1944 *
1945 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1946 *
1947 * [6] Names ::= Name (S Name)*
1948 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001949 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001950 * is updated to the current location in the string.
1951 */
1952
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001953static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001954xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1955 xmlChar buf[XML_MAX_NAMELEN + 5];
1956 const xmlChar *cur = *str;
1957 int len = 0, l;
1958 int c;
1959
1960 c = CUR_SCHAR(cur, l);
1961 if (!IS_LETTER(c) && (c != '_') &&
1962 (c != ':')) {
1963 return(NULL);
1964 }
1965
1966 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1967 (c == '.') || (c == '-') ||
1968 (c == '_') || (c == ':') ||
1969 (IS_COMBINING(c)) ||
1970 (IS_EXTENDER(c))) {
1971 COPY_BUF(l,buf,len,c);
1972 cur += l;
1973 c = CUR_SCHAR(cur, l);
1974 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1975 /*
1976 * Okay someone managed to make a huge name, so he's ready to pay
1977 * for the processing speed.
1978 */
1979 xmlChar *buffer;
1980 int max = len * 2;
1981
1982 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1983 if (buffer == NULL) {
1984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1985 ctxt->sax->error(ctxt->userData,
1986 "xmlParseStringName: out of memory\n");
1987 return(NULL);
1988 }
1989 memcpy(buffer, buf, len);
1990 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1991 (c == '.') || (c == '-') ||
1992 (c == '_') || (c == ':') ||
1993 (IS_COMBINING(c)) ||
1994 (IS_EXTENDER(c))) {
1995 if (len + 10 > max) {
1996 max *= 2;
1997 buffer = (xmlChar *) xmlRealloc(buffer,
1998 max * sizeof(xmlChar));
1999 if (buffer == NULL) {
2000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2001 ctxt->sax->error(ctxt->userData,
2002 "xmlParseStringName: out of memory\n");
2003 return(NULL);
2004 }
2005 }
2006 COPY_BUF(l,buffer,len,c);
2007 cur += l;
2008 c = CUR_SCHAR(cur, l);
2009 }
2010 buffer[len] = 0;
2011 *str = cur;
2012 return(buffer);
2013 }
2014 }
2015 *str = cur;
2016 return(xmlStrndup(buf, len));
2017}
2018
2019/**
2020 * xmlParseNmtoken:
2021 * @ctxt: an XML parser context
2022 *
2023 * parse an XML Nmtoken.
2024 *
2025 * [7] Nmtoken ::= (NameChar)+
2026 *
2027 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2028 *
2029 * Returns the Nmtoken parsed or NULL
2030 */
2031
2032xmlChar *
2033xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2034 xmlChar buf[XML_MAX_NAMELEN + 5];
2035 int len = 0, l;
2036 int c;
2037 int count = 0;
2038
2039 GROW;
2040 c = CUR_CHAR(l);
2041
2042 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2043 (c == '.') || (c == '-') ||
2044 (c == '_') || (c == ':') ||
2045 (IS_COMBINING(c)) ||
2046 (IS_EXTENDER(c))) {
2047 if (count++ > 100) {
2048 count = 0;
2049 GROW;
2050 }
2051 COPY_BUF(l,buf,len,c);
2052 NEXTL(l);
2053 c = CUR_CHAR(l);
2054 if (len >= XML_MAX_NAMELEN) {
2055 /*
2056 * Okay someone managed to make a huge token, so he's ready to pay
2057 * for the processing speed.
2058 */
2059 xmlChar *buffer;
2060 int max = len * 2;
2061
2062 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2063 if (buffer == NULL) {
2064 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2065 ctxt->sax->error(ctxt->userData,
2066 "xmlParseNmtoken: out of memory\n");
2067 return(NULL);
2068 }
2069 memcpy(buffer, buf, len);
2070 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2071 (c == '.') || (c == '-') ||
2072 (c == '_') || (c == ':') ||
2073 (IS_COMBINING(c)) ||
2074 (IS_EXTENDER(c))) {
2075 if (count++ > 100) {
2076 count = 0;
2077 GROW;
2078 }
2079 if (len + 10 > max) {
2080 max *= 2;
2081 buffer = (xmlChar *) xmlRealloc(buffer,
2082 max * sizeof(xmlChar));
2083 if (buffer == NULL) {
2084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2085 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002086 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002087 return(NULL);
2088 }
2089 }
2090 COPY_BUF(l,buffer,len,c);
2091 NEXTL(l);
2092 c = CUR_CHAR(l);
2093 }
2094 buffer[len] = 0;
2095 return(buffer);
2096 }
2097 }
2098 if (len == 0)
2099 return(NULL);
2100 return(xmlStrndup(buf, len));
2101}
2102
2103/**
2104 * xmlParseEntityValue:
2105 * @ctxt: an XML parser context
2106 * @orig: if non-NULL store a copy of the original entity value
2107 *
2108 * parse a value for ENTITY declarations
2109 *
2110 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2111 * "'" ([^%&'] | PEReference | Reference)* "'"
2112 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002113 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002114 */
2115
2116xmlChar *
2117xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2118 xmlChar *buf = NULL;
2119 int len = 0;
2120 int size = XML_PARSER_BUFFER_SIZE;
2121 int c, l;
2122 xmlChar stop;
2123 xmlChar *ret = NULL;
2124 const xmlChar *cur = NULL;
2125 xmlParserInputPtr input;
2126
2127 if (RAW == '"') stop = '"';
2128 else if (RAW == '\'') stop = '\'';
2129 else {
2130 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2132 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2133 ctxt->wellFormed = 0;
2134 ctxt->disableSAX = 1;
2135 return(NULL);
2136 }
2137 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2138 if (buf == NULL) {
2139 xmlGenericError(xmlGenericErrorContext,
2140 "malloc of %d byte failed\n", size);
2141 return(NULL);
2142 }
2143
2144 /*
2145 * The content of the entity definition is copied in a buffer.
2146 */
2147
2148 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2149 input = ctxt->input;
2150 GROW;
2151 NEXT;
2152 c = CUR_CHAR(l);
2153 /*
2154 * NOTE: 4.4.5 Included in Literal
2155 * When a parameter entity reference appears in a literal entity
2156 * value, ... a single or double quote character in the replacement
2157 * text is always treated as a normal data character and will not
2158 * terminate the literal.
2159 * In practice it means we stop the loop only when back at parsing
2160 * the initial entity and the quote is found
2161 */
2162 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2163 (ctxt->input != input))) {
2164 if (len + 5 >= size) {
2165 size *= 2;
2166 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2167 if (buf == NULL) {
2168 xmlGenericError(xmlGenericErrorContext,
2169 "realloc of %d byte failed\n", size);
2170 return(NULL);
2171 }
2172 }
2173 COPY_BUF(l,buf,len,c);
2174 NEXTL(l);
2175 /*
2176 * Pop-up of finished entities.
2177 */
2178 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2179 xmlPopInput(ctxt);
2180
2181 GROW;
2182 c = CUR_CHAR(l);
2183 if (c == 0) {
2184 GROW;
2185 c = CUR_CHAR(l);
2186 }
2187 }
2188 buf[len] = 0;
2189
2190 /*
2191 * Raise problem w.r.t. '&' and '%' being used in non-entities
2192 * reference constructs. Note Charref will be handled in
2193 * xmlStringDecodeEntities()
2194 */
2195 cur = buf;
2196 while (*cur != 0) { /* non input consuming */
2197 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2198 xmlChar *name;
2199 xmlChar tmp = *cur;
2200
2201 cur++;
2202 name = xmlParseStringName(ctxt, &cur);
2203 if ((name == NULL) || (*cur != ';')) {
2204 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2206 ctxt->sax->error(ctxt->userData,
2207 "EntityValue: '%c' forbidden except for entities references\n",
2208 tmp);
2209 ctxt->wellFormed = 0;
2210 ctxt->disableSAX = 1;
2211 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002212 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2213 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002214 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2216 ctxt->sax->error(ctxt->userData,
2217 "EntityValue: PEReferences forbidden in internal subset\n",
2218 tmp);
2219 ctxt->wellFormed = 0;
2220 ctxt->disableSAX = 1;
2221 }
2222 if (name != NULL)
2223 xmlFree(name);
2224 }
2225 cur++;
2226 }
2227
2228 /*
2229 * Then PEReference entities are substituted.
2230 */
2231 if (c != stop) {
2232 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2234 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2235 ctxt->wellFormed = 0;
2236 ctxt->disableSAX = 1;
2237 xmlFree(buf);
2238 } else {
2239 NEXT;
2240 /*
2241 * NOTE: 4.4.7 Bypassed
2242 * When a general entity reference appears in the EntityValue in
2243 * an entity declaration, it is bypassed and left as is.
2244 * so XML_SUBSTITUTE_REF is not set here.
2245 */
2246 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2247 0, 0, 0);
2248 if (orig != NULL)
2249 *orig = buf;
2250 else
2251 xmlFree(buf);
2252 }
2253
2254 return(ret);
2255}
2256
2257/**
2258 * xmlParseAttValue:
2259 * @ctxt: an XML parser context
2260 *
2261 * parse a value for an attribute
2262 * Note: the parser won't do substitution of entities here, this
2263 * will be handled later in xmlStringGetNodeList
2264 *
2265 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2266 * "'" ([^<&'] | Reference)* "'"
2267 *
2268 * 3.3.3 Attribute-Value Normalization:
2269 * Before the value of an attribute is passed to the application or
2270 * checked for validity, the XML processor must normalize it as follows:
2271 * - a character reference is processed by appending the referenced
2272 * character to the attribute value
2273 * - an entity reference is processed by recursively processing the
2274 * replacement text of the entity
2275 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2276 * appending #x20 to the normalized value, except that only a single
2277 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2278 * parsed entity or the literal entity value of an internal parsed entity
2279 * - other characters are processed by appending them to the normalized value
2280 * If the declared value is not CDATA, then the XML processor must further
2281 * process the normalized attribute value by discarding any leading and
2282 * trailing space (#x20) characters, and by replacing sequences of space
2283 * (#x20) characters by a single space (#x20) character.
2284 * All attributes for which no declaration has been read should be treated
2285 * by a non-validating parser as if declared CDATA.
2286 *
2287 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2288 */
2289
2290xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002291xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2292
2293xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002294xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2295 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002296 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002297 xmlChar *ret = NULL;
2298 SHRINK;
2299 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002300 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002301 if (*in != '"' && *in != '\'') {
2302 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2304 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2305 ctxt->wellFormed = 0;
2306 ctxt->disableSAX = 1;
2307 return(NULL);
2308 }
2309 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2310 limit = *in;
2311 ++in;
2312
2313 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2314 *in != '&' && *in != '<'
2315 ) {
2316 ++in;
2317 }
2318 if (*in != limit) {
2319 return xmlParseAttValueComplex(ctxt);
2320 }
2321 ++in;
2322 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2323 CUR_PTR = in;
2324 return ret;
2325}
2326
2327xmlChar *
2328xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2329 xmlChar limit = 0;
2330 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002331 int len = 0;
2332 int buf_size = 0;
2333 int c, l;
2334 xmlChar *current = NULL;
2335 xmlEntityPtr ent;
2336
2337
2338 SHRINK;
2339 if (NXT(0) == '"') {
2340 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2341 limit = '"';
2342 NEXT;
2343 } else if (NXT(0) == '\'') {
2344 limit = '\'';
2345 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2346 NEXT;
2347 } else {
2348 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2350 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2351 ctxt->wellFormed = 0;
2352 ctxt->disableSAX = 1;
2353 return(NULL);
2354 }
2355
2356 /*
2357 * allocate a translation buffer.
2358 */
2359 buf_size = XML_PARSER_BUFFER_SIZE;
2360 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2361 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002362 xmlGenericError(xmlGenericErrorContext,
2363 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002364 return(NULL);
2365 }
2366
2367 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002368 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002369 */
2370 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002371 while ((NXT(0) != limit) && /* checked */
2372 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002373 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002374 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002375 if (NXT(1) == '#') {
2376 int val = xmlParseCharRef(ctxt);
2377 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002378 if (ctxt->replaceEntities) {
2379 if (len > buf_size - 10) {
2380 growBuffer(buf);
2381 }
2382 buf[len++] = '&';
2383 } else {
2384 /*
2385 * The reparsing will be done in xmlStringGetNodeList()
2386 * called by the attribute() function in SAX.c
2387 */
2388 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002389
Daniel Veillard319a7422001-09-11 09:27:09 +00002390 if (len > buf_size - 10) {
2391 growBuffer(buf);
2392 }
2393 current = &buffer[0];
2394 while (*current != 0) { /* non input consuming */
2395 buf[len++] = *current++;
2396 }
Owen Taylor3473f882001-02-23 17:55:21 +00002397 }
2398 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002399 if (len > buf_size - 10) {
2400 growBuffer(buf);
2401 }
Owen Taylor3473f882001-02-23 17:55:21 +00002402 len += xmlCopyChar(0, &buf[len], val);
2403 }
2404 } else {
2405 ent = xmlParseEntityRef(ctxt);
2406 if ((ent != NULL) &&
2407 (ctxt->replaceEntities != 0)) {
2408 xmlChar *rep;
2409
2410 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2411 rep = xmlStringDecodeEntities(ctxt, ent->content,
2412 XML_SUBSTITUTE_REF, 0, 0, 0);
2413 if (rep != NULL) {
2414 current = rep;
2415 while (*current != 0) { /* non input consuming */
2416 buf[len++] = *current++;
2417 if (len > buf_size - 10) {
2418 growBuffer(buf);
2419 }
2420 }
2421 xmlFree(rep);
2422 }
2423 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002424 if (len > buf_size - 10) {
2425 growBuffer(buf);
2426 }
Owen Taylor3473f882001-02-23 17:55:21 +00002427 if (ent->content != NULL)
2428 buf[len++] = ent->content[0];
2429 }
2430 } else if (ent != NULL) {
2431 int i = xmlStrlen(ent->name);
2432 const xmlChar *cur = ent->name;
2433
2434 /*
2435 * This may look absurd but is needed to detect
2436 * entities problems
2437 */
2438 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2439 (ent->content != NULL)) {
2440 xmlChar *rep;
2441 rep = xmlStringDecodeEntities(ctxt, ent->content,
2442 XML_SUBSTITUTE_REF, 0, 0, 0);
2443 if (rep != NULL)
2444 xmlFree(rep);
2445 }
2446
2447 /*
2448 * Just output the reference
2449 */
2450 buf[len++] = '&';
2451 if (len > buf_size - i - 10) {
2452 growBuffer(buf);
2453 }
2454 for (;i > 0;i--)
2455 buf[len++] = *cur++;
2456 buf[len++] = ';';
2457 }
2458 }
2459 } else {
2460 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2461 COPY_BUF(l,buf,len,0x20);
2462 if (len > buf_size - 10) {
2463 growBuffer(buf);
2464 }
2465 } else {
2466 COPY_BUF(l,buf,len,c);
2467 if (len > buf_size - 10) {
2468 growBuffer(buf);
2469 }
2470 }
2471 NEXTL(l);
2472 }
2473 GROW;
2474 c = CUR_CHAR(l);
2475 }
2476 buf[len++] = 0;
2477 if (RAW == '<') {
2478 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2480 ctxt->sax->error(ctxt->userData,
2481 "Unescaped '<' not allowed in attributes values\n");
2482 ctxt->wellFormed = 0;
2483 ctxt->disableSAX = 1;
2484 } else if (RAW != limit) {
2485 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2487 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2488 ctxt->wellFormed = 0;
2489 ctxt->disableSAX = 1;
2490 } else
2491 NEXT;
2492 return(buf);
2493}
2494
2495/**
2496 * xmlParseSystemLiteral:
2497 * @ctxt: an XML parser context
2498 *
2499 * parse an XML Literal
2500 *
2501 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2502 *
2503 * Returns the SystemLiteral parsed or NULL
2504 */
2505
2506xmlChar *
2507xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2508 xmlChar *buf = NULL;
2509 int len = 0;
2510 int size = XML_PARSER_BUFFER_SIZE;
2511 int cur, l;
2512 xmlChar stop;
2513 int state = ctxt->instate;
2514 int count = 0;
2515
2516 SHRINK;
2517 if (RAW == '"') {
2518 NEXT;
2519 stop = '"';
2520 } else if (RAW == '\'') {
2521 NEXT;
2522 stop = '\'';
2523 } else {
2524 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2526 ctxt->sax->error(ctxt->userData,
2527 "SystemLiteral \" or ' expected\n");
2528 ctxt->wellFormed = 0;
2529 ctxt->disableSAX = 1;
2530 return(NULL);
2531 }
2532
2533 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2534 if (buf == NULL) {
2535 xmlGenericError(xmlGenericErrorContext,
2536 "malloc of %d byte failed\n", size);
2537 return(NULL);
2538 }
2539 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2540 cur = CUR_CHAR(l);
2541 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2542 if (len + 5 >= size) {
2543 size *= 2;
2544 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2545 if (buf == NULL) {
2546 xmlGenericError(xmlGenericErrorContext,
2547 "realloc of %d byte failed\n", size);
2548 ctxt->instate = (xmlParserInputState) state;
2549 return(NULL);
2550 }
2551 }
2552 count++;
2553 if (count > 50) {
2554 GROW;
2555 count = 0;
2556 }
2557 COPY_BUF(l,buf,len,cur);
2558 NEXTL(l);
2559 cur = CUR_CHAR(l);
2560 if (cur == 0) {
2561 GROW;
2562 SHRINK;
2563 cur = CUR_CHAR(l);
2564 }
2565 }
2566 buf[len] = 0;
2567 ctxt->instate = (xmlParserInputState) state;
2568 if (!IS_CHAR(cur)) {
2569 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2571 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2572 ctxt->wellFormed = 0;
2573 ctxt->disableSAX = 1;
2574 } else {
2575 NEXT;
2576 }
2577 return(buf);
2578}
2579
2580/**
2581 * xmlParsePubidLiteral:
2582 * @ctxt: an XML parser context
2583 *
2584 * parse an XML public literal
2585 *
2586 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2587 *
2588 * Returns the PubidLiteral parsed or NULL.
2589 */
2590
2591xmlChar *
2592xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2593 xmlChar *buf = NULL;
2594 int len = 0;
2595 int size = XML_PARSER_BUFFER_SIZE;
2596 xmlChar cur;
2597 xmlChar stop;
2598 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002599 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002600
2601 SHRINK;
2602 if (RAW == '"') {
2603 NEXT;
2604 stop = '"';
2605 } else if (RAW == '\'') {
2606 NEXT;
2607 stop = '\'';
2608 } else {
2609 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2611 ctxt->sax->error(ctxt->userData,
2612 "SystemLiteral \" or ' expected\n");
2613 ctxt->wellFormed = 0;
2614 ctxt->disableSAX = 1;
2615 return(NULL);
2616 }
2617 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2618 if (buf == NULL) {
2619 xmlGenericError(xmlGenericErrorContext,
2620 "malloc of %d byte failed\n", size);
2621 return(NULL);
2622 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002623 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002624 cur = CUR;
2625 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2626 if (len + 1 >= size) {
2627 size *= 2;
2628 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2629 if (buf == NULL) {
2630 xmlGenericError(xmlGenericErrorContext,
2631 "realloc of %d byte failed\n", size);
2632 return(NULL);
2633 }
2634 }
2635 buf[len++] = cur;
2636 count++;
2637 if (count > 50) {
2638 GROW;
2639 count = 0;
2640 }
2641 NEXT;
2642 cur = CUR;
2643 if (cur == 0) {
2644 GROW;
2645 SHRINK;
2646 cur = CUR;
2647 }
2648 }
2649 buf[len] = 0;
2650 if (cur != stop) {
2651 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2653 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2654 ctxt->wellFormed = 0;
2655 ctxt->disableSAX = 1;
2656 } else {
2657 NEXT;
2658 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002659 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002660 return(buf);
2661}
2662
Daniel Veillard48b2f892001-02-25 16:11:03 +00002663void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002664/**
2665 * xmlParseCharData:
2666 * @ctxt: an XML parser context
2667 * @cdata: int indicating whether we are within a CDATA section
2668 *
2669 * parse a CharData section.
2670 * if we are within a CDATA section ']]>' marks an end of section.
2671 *
2672 * The right angle bracket (>) may be represented using the string "&gt;",
2673 * and must, for compatibility, be escaped using "&gt;" or a character
2674 * reference when it appears in the string "]]>" in content, when that
2675 * string is not marking the end of a CDATA section.
2676 *
2677 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2678 */
2679
2680void
2681xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002682 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002683 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002684 int line = ctxt->input->line;
2685 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002686
2687 SHRINK;
2688 GROW;
2689 /*
2690 * Accelerated common case where input don't need to be
2691 * modified before passing it to the handler.
2692 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002693 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002694 in = ctxt->input->cur;
2695 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002696get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002697 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2698 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002699 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002700 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002701 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002702 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002703 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002704 ctxt->input->line++;
2705 in++;
2706 }
2707 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002708 }
2709 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002710 if ((in[1] == ']') && (in[2] == '>')) {
2711 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2713 ctxt->sax->error(ctxt->userData,
2714 "Sequence ']]>' not allowed in content\n");
2715 ctxt->input->cur = in;
2716 ctxt->wellFormed = 0;
2717 ctxt->disableSAX = 1;
2718 return;
2719 }
2720 in++;
2721 goto get_more;
2722 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002723 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002724 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002725 if (IS_BLANK(*ctxt->input->cur)) {
2726 const xmlChar *tmp = ctxt->input->cur;
2727 ctxt->input->cur = in;
2728 if (areBlanks(ctxt, tmp, nbchar)) {
2729 if (ctxt->sax->ignorableWhitespace != NULL)
2730 ctxt->sax->ignorableWhitespace(ctxt->userData,
2731 tmp, nbchar);
2732 } else {
2733 if (ctxt->sax->characters != NULL)
2734 ctxt->sax->characters(ctxt->userData,
2735 tmp, nbchar);
2736 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002737 line = ctxt->input->line;
2738 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002739 } else {
2740 if (ctxt->sax->characters != NULL)
2741 ctxt->sax->characters(ctxt->userData,
2742 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002743 line = ctxt->input->line;
2744 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002745 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002746 }
2747 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002748 if (*in == 0xD) {
2749 in++;
2750 if (*in == 0xA) {
2751 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002752 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002753 ctxt->input->line++;
2754 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002755 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002756 in--;
2757 }
2758 if (*in == '<') {
2759 return;
2760 }
2761 if (*in == '&') {
2762 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002763 }
2764 SHRINK;
2765 GROW;
2766 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002767 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002768 nbchar = 0;
2769 }
Daniel Veillard50582112001-03-26 22:52:16 +00002770 ctxt->input->line = line;
2771 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002772 xmlParseCharDataComplex(ctxt, cdata);
2773}
2774
2775void
2776xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002777 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2778 int nbchar = 0;
2779 int cur, l;
2780 int count = 0;
2781
2782 SHRINK;
2783 GROW;
2784 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002785 while ((cur != '<') && /* checked */
2786 (cur != '&') &&
2787 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002788 if ((cur == ']') && (NXT(1) == ']') &&
2789 (NXT(2) == '>')) {
2790 if (cdata) break;
2791 else {
2792 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2794 ctxt->sax->error(ctxt->userData,
2795 "Sequence ']]>' not allowed in content\n");
2796 /* Should this be relaxed ??? I see a "must here */
2797 ctxt->wellFormed = 0;
2798 ctxt->disableSAX = 1;
2799 }
2800 }
2801 COPY_BUF(l,buf,nbchar,cur);
2802 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2803 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002804 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002805 */
2806 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2807 if (areBlanks(ctxt, buf, nbchar)) {
2808 if (ctxt->sax->ignorableWhitespace != NULL)
2809 ctxt->sax->ignorableWhitespace(ctxt->userData,
2810 buf, nbchar);
2811 } else {
2812 if (ctxt->sax->characters != NULL)
2813 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2814 }
2815 }
2816 nbchar = 0;
2817 }
2818 count++;
2819 if (count > 50) {
2820 GROW;
2821 count = 0;
2822 }
2823 NEXTL(l);
2824 cur = CUR_CHAR(l);
2825 }
2826 if (nbchar != 0) {
2827 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002828 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002829 */
2830 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2831 if (areBlanks(ctxt, buf, nbchar)) {
2832 if (ctxt->sax->ignorableWhitespace != NULL)
2833 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2834 } else {
2835 if (ctxt->sax->characters != NULL)
2836 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2837 }
2838 }
2839 }
2840}
2841
2842/**
2843 * xmlParseExternalID:
2844 * @ctxt: an XML parser context
2845 * @publicID: a xmlChar** receiving PubidLiteral
2846 * @strict: indicate whether we should restrict parsing to only
2847 * production [75], see NOTE below
2848 *
2849 * Parse an External ID or a Public ID
2850 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002851 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002852 * 'PUBLIC' S PubidLiteral S SystemLiteral
2853 *
2854 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2855 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2856 *
2857 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2858 *
2859 * Returns the function returns SystemLiteral and in the second
2860 * case publicID receives PubidLiteral, is strict is off
2861 * it is possible to return NULL and have publicID set.
2862 */
2863
2864xmlChar *
2865xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2866 xmlChar *URI = NULL;
2867
2868 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002869
2870 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002871 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2872 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2873 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2874 SKIP(6);
2875 if (!IS_BLANK(CUR)) {
2876 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2878 ctxt->sax->error(ctxt->userData,
2879 "Space required after 'SYSTEM'\n");
2880 ctxt->wellFormed = 0;
2881 ctxt->disableSAX = 1;
2882 }
2883 SKIP_BLANKS;
2884 URI = xmlParseSystemLiteral(ctxt);
2885 if (URI == NULL) {
2886 ctxt->errNo = XML_ERR_URI_REQUIRED;
2887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2888 ctxt->sax->error(ctxt->userData,
2889 "xmlParseExternalID: SYSTEM, no URI\n");
2890 ctxt->wellFormed = 0;
2891 ctxt->disableSAX = 1;
2892 }
2893 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2894 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2895 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2896 SKIP(6);
2897 if (!IS_BLANK(CUR)) {
2898 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2900 ctxt->sax->error(ctxt->userData,
2901 "Space required after 'PUBLIC'\n");
2902 ctxt->wellFormed = 0;
2903 ctxt->disableSAX = 1;
2904 }
2905 SKIP_BLANKS;
2906 *publicID = xmlParsePubidLiteral(ctxt);
2907 if (*publicID == NULL) {
2908 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2910 ctxt->sax->error(ctxt->userData,
2911 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2912 ctxt->wellFormed = 0;
2913 ctxt->disableSAX = 1;
2914 }
2915 if (strict) {
2916 /*
2917 * We don't handle [83] so "S SystemLiteral" is required.
2918 */
2919 if (!IS_BLANK(CUR)) {
2920 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2922 ctxt->sax->error(ctxt->userData,
2923 "Space required after the Public Identifier\n");
2924 ctxt->wellFormed = 0;
2925 ctxt->disableSAX = 1;
2926 }
2927 } else {
2928 /*
2929 * We handle [83] so we return immediately, if
2930 * "S SystemLiteral" is not detected. From a purely parsing
2931 * point of view that's a nice mess.
2932 */
2933 const xmlChar *ptr;
2934 GROW;
2935
2936 ptr = CUR_PTR;
2937 if (!IS_BLANK(*ptr)) return(NULL);
2938
2939 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2940 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2941 }
2942 SKIP_BLANKS;
2943 URI = xmlParseSystemLiteral(ctxt);
2944 if (URI == NULL) {
2945 ctxt->errNo = XML_ERR_URI_REQUIRED;
2946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2947 ctxt->sax->error(ctxt->userData,
2948 "xmlParseExternalID: PUBLIC, no URI\n");
2949 ctxt->wellFormed = 0;
2950 ctxt->disableSAX = 1;
2951 }
2952 }
2953 return(URI);
2954}
2955
2956/**
2957 * xmlParseComment:
2958 * @ctxt: an XML parser context
2959 *
2960 * Skip an XML (SGML) comment <!-- .... -->
2961 * The spec says that "For compatibility, the string "--" (double-hyphen)
2962 * must not occur within comments. "
2963 *
2964 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2965 */
2966void
2967xmlParseComment(xmlParserCtxtPtr ctxt) {
2968 xmlChar *buf = NULL;
2969 int len;
2970 int size = XML_PARSER_BUFFER_SIZE;
2971 int q, ql;
2972 int r, rl;
2973 int cur, l;
2974 xmlParserInputState state;
2975 xmlParserInputPtr input = ctxt->input;
2976 int count = 0;
2977
2978 /*
2979 * Check that there is a comment right here.
2980 */
2981 if ((RAW != '<') || (NXT(1) != '!') ||
2982 (NXT(2) != '-') || (NXT(3) != '-')) return;
2983
2984 state = ctxt->instate;
2985 ctxt->instate = XML_PARSER_COMMENT;
2986 SHRINK;
2987 SKIP(4);
2988 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2989 if (buf == NULL) {
2990 xmlGenericError(xmlGenericErrorContext,
2991 "malloc of %d byte failed\n", size);
2992 ctxt->instate = state;
2993 return;
2994 }
2995 q = CUR_CHAR(ql);
2996 NEXTL(ql);
2997 r = CUR_CHAR(rl);
2998 NEXTL(rl);
2999 cur = CUR_CHAR(l);
3000 len = 0;
3001 while (IS_CHAR(cur) && /* checked */
3002 ((cur != '>') ||
3003 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003004 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003005 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3007 ctxt->sax->error(ctxt->userData,
3008 "Comment must not contain '--' (double-hyphen)`\n");
3009 ctxt->wellFormed = 0;
3010 ctxt->disableSAX = 1;
3011 }
3012 if (len + 5 >= size) {
3013 size *= 2;
3014 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3015 if (buf == NULL) {
3016 xmlGenericError(xmlGenericErrorContext,
3017 "realloc of %d byte failed\n", size);
3018 ctxt->instate = state;
3019 return;
3020 }
3021 }
3022 COPY_BUF(ql,buf,len,q);
3023 q = r;
3024 ql = rl;
3025 r = cur;
3026 rl = l;
3027
3028 count++;
3029 if (count > 50) {
3030 GROW;
3031 count = 0;
3032 }
3033 NEXTL(l);
3034 cur = CUR_CHAR(l);
3035 if (cur == 0) {
3036 SHRINK;
3037 GROW;
3038 cur = CUR_CHAR(l);
3039 }
3040 }
3041 buf[len] = 0;
3042 if (!IS_CHAR(cur)) {
3043 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3045 ctxt->sax->error(ctxt->userData,
3046 "Comment not terminated \n<!--%.50s\n", buf);
3047 ctxt->wellFormed = 0;
3048 ctxt->disableSAX = 1;
3049 xmlFree(buf);
3050 } else {
3051 if (input != ctxt->input) {
3052 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3054 ctxt->sax->error(ctxt->userData,
3055"Comment doesn't start and stop in the same entity\n");
3056 ctxt->wellFormed = 0;
3057 ctxt->disableSAX = 1;
3058 }
3059 NEXT;
3060 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3061 (!ctxt->disableSAX))
3062 ctxt->sax->comment(ctxt->userData, buf);
3063 xmlFree(buf);
3064 }
3065 ctxt->instate = state;
3066}
3067
3068/**
3069 * xmlParsePITarget:
3070 * @ctxt: an XML parser context
3071 *
3072 * parse the name of a PI
3073 *
3074 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3075 *
3076 * Returns the PITarget name or NULL
3077 */
3078
3079xmlChar *
3080xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3081 xmlChar *name;
3082
3083 name = xmlParseName(ctxt);
3084 if ((name != NULL) &&
3085 ((name[0] == 'x') || (name[0] == 'X')) &&
3086 ((name[1] == 'm') || (name[1] == 'M')) &&
3087 ((name[2] == 'l') || (name[2] == 'L'))) {
3088 int i;
3089 if ((name[0] == 'x') && (name[1] == 'm') &&
3090 (name[2] == 'l') && (name[3] == 0)) {
3091 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3093 ctxt->sax->error(ctxt->userData,
3094 "XML declaration allowed only at the start of the document\n");
3095 ctxt->wellFormed = 0;
3096 ctxt->disableSAX = 1;
3097 return(name);
3098 } else if (name[3] == 0) {
3099 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3100 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3101 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3102 ctxt->wellFormed = 0;
3103 ctxt->disableSAX = 1;
3104 return(name);
3105 }
3106 for (i = 0;;i++) {
3107 if (xmlW3CPIs[i] == NULL) break;
3108 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3109 return(name);
3110 }
3111 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3112 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3113 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003114 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003115 }
3116 }
3117 return(name);
3118}
3119
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003120#ifdef LIBXML_CATALOG_ENABLED
3121/**
3122 * xmlParseCatalogPI:
3123 * @ctxt: an XML parser context
3124 * @catalog: the PI value string
3125 *
3126 * parse an XML Catalog Processing Instruction.
3127 *
3128 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3129 *
3130 * Occurs only if allowed by the user and if happening in the Misc
3131 * part of the document before any doctype informations
3132 * This will add the given catalog to the parsing context in order
3133 * to be used if there is a resolution need further down in the document
3134 */
3135
3136static void
3137xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3138 xmlChar *URL = NULL;
3139 const xmlChar *tmp, *base;
3140 xmlChar marker;
3141
3142 tmp = catalog;
3143 while (IS_BLANK(*tmp)) tmp++;
3144 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3145 goto error;
3146 tmp += 7;
3147 while (IS_BLANK(*tmp)) tmp++;
3148 if (*tmp != '=') {
3149 return;
3150 }
3151 tmp++;
3152 while (IS_BLANK(*tmp)) tmp++;
3153 marker = *tmp;
3154 if ((marker != '\'') && (marker != '"'))
3155 goto error;
3156 tmp++;
3157 base = tmp;
3158 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3159 if (*tmp == 0)
3160 goto error;
3161 URL = xmlStrndup(base, tmp - base);
3162 tmp++;
3163 while (IS_BLANK(*tmp)) tmp++;
3164 if (*tmp != 0)
3165 goto error;
3166
3167 if (URL != NULL) {
3168 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3169 xmlFree(URL);
3170 }
3171 return;
3172
3173error:
3174 ctxt->errNo = XML_WAR_CATALOG_PI;
3175 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3176 ctxt->sax->warning(ctxt->userData,
3177 "Catalog PI syntax error: %s\n", catalog);
3178 if (URL != NULL)
3179 xmlFree(URL);
3180}
3181#endif
3182
Owen Taylor3473f882001-02-23 17:55:21 +00003183/**
3184 * xmlParsePI:
3185 * @ctxt: an XML parser context
3186 *
3187 * parse an XML Processing Instruction.
3188 *
3189 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3190 *
3191 * The processing is transfered to SAX once parsed.
3192 */
3193
3194void
3195xmlParsePI(xmlParserCtxtPtr ctxt) {
3196 xmlChar *buf = NULL;
3197 int len = 0;
3198 int size = XML_PARSER_BUFFER_SIZE;
3199 int cur, l;
3200 xmlChar *target;
3201 xmlParserInputState state;
3202 int count = 0;
3203
3204 if ((RAW == '<') && (NXT(1) == '?')) {
3205 xmlParserInputPtr input = ctxt->input;
3206 state = ctxt->instate;
3207 ctxt->instate = XML_PARSER_PI;
3208 /*
3209 * this is a Processing Instruction.
3210 */
3211 SKIP(2);
3212 SHRINK;
3213
3214 /*
3215 * Parse the target name and check for special support like
3216 * namespace.
3217 */
3218 target = xmlParsePITarget(ctxt);
3219 if (target != NULL) {
3220 if ((RAW == '?') && (NXT(1) == '>')) {
3221 if (input != ctxt->input) {
3222 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3224 ctxt->sax->error(ctxt->userData,
3225 "PI declaration doesn't start and stop in the same entity\n");
3226 ctxt->wellFormed = 0;
3227 ctxt->disableSAX = 1;
3228 }
3229 SKIP(2);
3230
3231 /*
3232 * SAX: PI detected.
3233 */
3234 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3235 (ctxt->sax->processingInstruction != NULL))
3236 ctxt->sax->processingInstruction(ctxt->userData,
3237 target, NULL);
3238 ctxt->instate = state;
3239 xmlFree(target);
3240 return;
3241 }
3242 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3243 if (buf == NULL) {
3244 xmlGenericError(xmlGenericErrorContext,
3245 "malloc of %d byte failed\n", size);
3246 ctxt->instate = state;
3247 return;
3248 }
3249 cur = CUR;
3250 if (!IS_BLANK(cur)) {
3251 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3253 ctxt->sax->error(ctxt->userData,
3254 "xmlParsePI: PI %s space expected\n", target);
3255 ctxt->wellFormed = 0;
3256 ctxt->disableSAX = 1;
3257 }
3258 SKIP_BLANKS;
3259 cur = CUR_CHAR(l);
3260 while (IS_CHAR(cur) && /* checked */
3261 ((cur != '?') || (NXT(1) != '>'))) {
3262 if (len + 5 >= size) {
3263 size *= 2;
3264 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3265 if (buf == NULL) {
3266 xmlGenericError(xmlGenericErrorContext,
3267 "realloc of %d byte failed\n", size);
3268 ctxt->instate = state;
3269 return;
3270 }
3271 }
3272 count++;
3273 if (count > 50) {
3274 GROW;
3275 count = 0;
3276 }
3277 COPY_BUF(l,buf,len,cur);
3278 NEXTL(l);
3279 cur = CUR_CHAR(l);
3280 if (cur == 0) {
3281 SHRINK;
3282 GROW;
3283 cur = CUR_CHAR(l);
3284 }
3285 }
3286 buf[len] = 0;
3287 if (cur != '?') {
3288 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3290 ctxt->sax->error(ctxt->userData,
3291 "xmlParsePI: PI %s never end ...\n", target);
3292 ctxt->wellFormed = 0;
3293 ctxt->disableSAX = 1;
3294 } else {
3295 if (input != ctxt->input) {
3296 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3298 ctxt->sax->error(ctxt->userData,
3299 "PI declaration doesn't start and stop in the same entity\n");
3300 ctxt->wellFormed = 0;
3301 ctxt->disableSAX = 1;
3302 }
3303 SKIP(2);
3304
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003305#ifdef LIBXML_CATALOG_ENABLED
3306 if (((state == XML_PARSER_MISC) ||
3307 (state == XML_PARSER_START)) &&
3308 (xmlStrEqual(target, XML_CATALOG_PI))) {
3309 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3310 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3311 (allow == XML_CATA_ALLOW_ALL))
3312 xmlParseCatalogPI(ctxt, buf);
3313 }
3314#endif
3315
3316
Owen Taylor3473f882001-02-23 17:55:21 +00003317 /*
3318 * SAX: PI detected.
3319 */
3320 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3321 (ctxt->sax->processingInstruction != NULL))
3322 ctxt->sax->processingInstruction(ctxt->userData,
3323 target, buf);
3324 }
3325 xmlFree(buf);
3326 xmlFree(target);
3327 } else {
3328 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3330 ctxt->sax->error(ctxt->userData,
3331 "xmlParsePI : no target name\n");
3332 ctxt->wellFormed = 0;
3333 ctxt->disableSAX = 1;
3334 }
3335 ctxt->instate = state;
3336 }
3337}
3338
3339/**
3340 * xmlParseNotationDecl:
3341 * @ctxt: an XML parser context
3342 *
3343 * parse a notation declaration
3344 *
3345 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3346 *
3347 * Hence there is actually 3 choices:
3348 * 'PUBLIC' S PubidLiteral
3349 * 'PUBLIC' S PubidLiteral S SystemLiteral
3350 * and 'SYSTEM' S SystemLiteral
3351 *
3352 * See the NOTE on xmlParseExternalID().
3353 */
3354
3355void
3356xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3357 xmlChar *name;
3358 xmlChar *Pubid;
3359 xmlChar *Systemid;
3360
3361 if ((RAW == '<') && (NXT(1) == '!') &&
3362 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3363 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3364 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3365 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3366 xmlParserInputPtr input = ctxt->input;
3367 SHRINK;
3368 SKIP(10);
3369 if (!IS_BLANK(CUR)) {
3370 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3372 ctxt->sax->error(ctxt->userData,
3373 "Space required after '<!NOTATION'\n");
3374 ctxt->wellFormed = 0;
3375 ctxt->disableSAX = 1;
3376 return;
3377 }
3378 SKIP_BLANKS;
3379
Daniel Veillard76d66f42001-05-16 21:05:17 +00003380 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003381 if (name == NULL) {
3382 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3384 ctxt->sax->error(ctxt->userData,
3385 "NOTATION: Name expected here\n");
3386 ctxt->wellFormed = 0;
3387 ctxt->disableSAX = 1;
3388 return;
3389 }
3390 if (!IS_BLANK(CUR)) {
3391 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3393 ctxt->sax->error(ctxt->userData,
3394 "Space required after the NOTATION name'\n");
3395 ctxt->wellFormed = 0;
3396 ctxt->disableSAX = 1;
3397 return;
3398 }
3399 SKIP_BLANKS;
3400
3401 /*
3402 * Parse the IDs.
3403 */
3404 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3405 SKIP_BLANKS;
3406
3407 if (RAW == '>') {
3408 if (input != ctxt->input) {
3409 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3411 ctxt->sax->error(ctxt->userData,
3412"Notation declaration doesn't start and stop in the same entity\n");
3413 ctxt->wellFormed = 0;
3414 ctxt->disableSAX = 1;
3415 }
3416 NEXT;
3417 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3418 (ctxt->sax->notationDecl != NULL))
3419 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3420 } else {
3421 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3423 ctxt->sax->error(ctxt->userData,
3424 "'>' required to close NOTATION declaration\n");
3425 ctxt->wellFormed = 0;
3426 ctxt->disableSAX = 1;
3427 }
3428 xmlFree(name);
3429 if (Systemid != NULL) xmlFree(Systemid);
3430 if (Pubid != NULL) xmlFree(Pubid);
3431 }
3432}
3433
3434/**
3435 * xmlParseEntityDecl:
3436 * @ctxt: an XML parser context
3437 *
3438 * parse <!ENTITY declarations
3439 *
3440 * [70] EntityDecl ::= GEDecl | PEDecl
3441 *
3442 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3443 *
3444 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3445 *
3446 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3447 *
3448 * [74] PEDef ::= EntityValue | ExternalID
3449 *
3450 * [76] NDataDecl ::= S 'NDATA' S Name
3451 *
3452 * [ VC: Notation Declared ]
3453 * The Name must match the declared name of a notation.
3454 */
3455
3456void
3457xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3458 xmlChar *name = NULL;
3459 xmlChar *value = NULL;
3460 xmlChar *URI = NULL, *literal = NULL;
3461 xmlChar *ndata = NULL;
3462 int isParameter = 0;
3463 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003464 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003465
3466 GROW;
3467 if ((RAW == '<') && (NXT(1) == '!') &&
3468 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3469 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3470 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3471 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003472 SHRINK;
3473 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003474 skipped = SKIP_BLANKS;
3475 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003476 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3478 ctxt->sax->error(ctxt->userData,
3479 "Space required after '<!ENTITY'\n");
3480 ctxt->wellFormed = 0;
3481 ctxt->disableSAX = 1;
3482 }
Owen Taylor3473f882001-02-23 17:55:21 +00003483
3484 if (RAW == '%') {
3485 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003486 skipped = SKIP_BLANKS;
3487 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003488 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3490 ctxt->sax->error(ctxt->userData,
3491 "Space required after '%'\n");
3492 ctxt->wellFormed = 0;
3493 ctxt->disableSAX = 1;
3494 }
Owen Taylor3473f882001-02-23 17:55:21 +00003495 isParameter = 1;
3496 }
3497
Daniel Veillard76d66f42001-05-16 21:05:17 +00003498 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003499 if (name == NULL) {
3500 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3502 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3503 ctxt->wellFormed = 0;
3504 ctxt->disableSAX = 1;
3505 return;
3506 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003507 skipped = SKIP_BLANKS;
3508 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003509 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3511 ctxt->sax->error(ctxt->userData,
3512 "Space required after the entity name\n");
3513 ctxt->wellFormed = 0;
3514 ctxt->disableSAX = 1;
3515 }
Owen Taylor3473f882001-02-23 17:55:21 +00003516
Daniel Veillardf5582f12002-06-11 10:08:16 +00003517 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003518 /*
3519 * handle the various case of definitions...
3520 */
3521 if (isParameter) {
3522 if ((RAW == '"') || (RAW == '\'')) {
3523 value = xmlParseEntityValue(ctxt, &orig);
3524 if (value) {
3525 if ((ctxt->sax != NULL) &&
3526 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3527 ctxt->sax->entityDecl(ctxt->userData, name,
3528 XML_INTERNAL_PARAMETER_ENTITY,
3529 NULL, NULL, value);
3530 }
3531 } else {
3532 URI = xmlParseExternalID(ctxt, &literal, 1);
3533 if ((URI == NULL) && (literal == NULL)) {
3534 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3536 ctxt->sax->error(ctxt->userData,
3537 "Entity value required\n");
3538 ctxt->wellFormed = 0;
3539 ctxt->disableSAX = 1;
3540 }
3541 if (URI) {
3542 xmlURIPtr uri;
3543
3544 uri = xmlParseURI((const char *) URI);
3545 if (uri == NULL) {
3546 ctxt->errNo = XML_ERR_INVALID_URI;
3547 if ((ctxt->sax != NULL) &&
3548 (!ctxt->disableSAX) &&
3549 (ctxt->sax->error != NULL))
3550 ctxt->sax->error(ctxt->userData,
3551 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003552 /*
3553 * This really ought to be a well formedness error
3554 * but the XML Core WG decided otherwise c.f. issue
3555 * E26 of the XML erratas.
3556 */
Owen Taylor3473f882001-02-23 17:55:21 +00003557 } else {
3558 if (uri->fragment != NULL) {
3559 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3560 if ((ctxt->sax != NULL) &&
3561 (!ctxt->disableSAX) &&
3562 (ctxt->sax->error != NULL))
3563 ctxt->sax->error(ctxt->userData,
3564 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003565 /*
3566 * Okay this is foolish to block those but not
3567 * invalid URIs.
3568 */
Owen Taylor3473f882001-02-23 17:55:21 +00003569 ctxt->wellFormed = 0;
3570 } else {
3571 if ((ctxt->sax != NULL) &&
3572 (!ctxt->disableSAX) &&
3573 (ctxt->sax->entityDecl != NULL))
3574 ctxt->sax->entityDecl(ctxt->userData, name,
3575 XML_EXTERNAL_PARAMETER_ENTITY,
3576 literal, URI, NULL);
3577 }
3578 xmlFreeURI(uri);
3579 }
3580 }
3581 }
3582 } else {
3583 if ((RAW == '"') || (RAW == '\'')) {
3584 value = xmlParseEntityValue(ctxt, &orig);
3585 if ((ctxt->sax != NULL) &&
3586 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3587 ctxt->sax->entityDecl(ctxt->userData, name,
3588 XML_INTERNAL_GENERAL_ENTITY,
3589 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003590 /*
3591 * For expat compatibility in SAX mode.
3592 */
3593 if ((ctxt->myDoc == NULL) ||
3594 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3595 if (ctxt->myDoc == NULL) {
3596 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3597 }
3598 if (ctxt->myDoc->intSubset == NULL)
3599 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3600 BAD_CAST "fake", NULL, NULL);
3601
3602 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3603 NULL, NULL, value);
3604 }
Owen Taylor3473f882001-02-23 17:55:21 +00003605 } else {
3606 URI = xmlParseExternalID(ctxt, &literal, 1);
3607 if ((URI == NULL) && (literal == NULL)) {
3608 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3610 ctxt->sax->error(ctxt->userData,
3611 "Entity value required\n");
3612 ctxt->wellFormed = 0;
3613 ctxt->disableSAX = 1;
3614 }
3615 if (URI) {
3616 xmlURIPtr uri;
3617
3618 uri = xmlParseURI((const char *)URI);
3619 if (uri == NULL) {
3620 ctxt->errNo = XML_ERR_INVALID_URI;
3621 if ((ctxt->sax != NULL) &&
3622 (!ctxt->disableSAX) &&
3623 (ctxt->sax->error != NULL))
3624 ctxt->sax->error(ctxt->userData,
3625 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003626 /*
3627 * This really ought to be a well formedness error
3628 * but the XML Core WG decided otherwise c.f. issue
3629 * E26 of the XML erratas.
3630 */
Owen Taylor3473f882001-02-23 17:55:21 +00003631 } else {
3632 if (uri->fragment != NULL) {
3633 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3634 if ((ctxt->sax != NULL) &&
3635 (!ctxt->disableSAX) &&
3636 (ctxt->sax->error != NULL))
3637 ctxt->sax->error(ctxt->userData,
3638 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003639 /*
3640 * Okay this is foolish to block those but not
3641 * invalid URIs.
3642 */
Owen Taylor3473f882001-02-23 17:55:21 +00003643 ctxt->wellFormed = 0;
3644 }
3645 xmlFreeURI(uri);
3646 }
3647 }
3648 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3649 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3651 ctxt->sax->error(ctxt->userData,
3652 "Space required before 'NDATA'\n");
3653 ctxt->wellFormed = 0;
3654 ctxt->disableSAX = 1;
3655 }
3656 SKIP_BLANKS;
3657 if ((RAW == 'N') && (NXT(1) == 'D') &&
3658 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3659 (NXT(4) == 'A')) {
3660 SKIP(5);
3661 if (!IS_BLANK(CUR)) {
3662 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3664 ctxt->sax->error(ctxt->userData,
3665 "Space required after 'NDATA'\n");
3666 ctxt->wellFormed = 0;
3667 ctxt->disableSAX = 1;
3668 }
3669 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003670 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003671 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3672 (ctxt->sax->unparsedEntityDecl != NULL))
3673 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3674 literal, URI, ndata);
3675 } else {
3676 if ((ctxt->sax != NULL) &&
3677 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3678 ctxt->sax->entityDecl(ctxt->userData, name,
3679 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3680 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003681 /*
3682 * For expat compatibility in SAX mode.
3683 * assuming the entity repalcement was asked for
3684 */
3685 if ((ctxt->replaceEntities != 0) &&
3686 ((ctxt->myDoc == NULL) ||
3687 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3688 if (ctxt->myDoc == NULL) {
3689 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3690 }
3691
3692 if (ctxt->myDoc->intSubset == NULL)
3693 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3694 BAD_CAST "fake", NULL, NULL);
3695 entityDecl(ctxt, name,
3696 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3697 literal, URI, NULL);
3698 }
Owen Taylor3473f882001-02-23 17:55:21 +00003699 }
3700 }
3701 }
3702 SKIP_BLANKS;
3703 if (RAW != '>') {
3704 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3706 ctxt->sax->error(ctxt->userData,
3707 "xmlParseEntityDecl: entity %s not terminated\n", name);
3708 ctxt->wellFormed = 0;
3709 ctxt->disableSAX = 1;
3710 } else {
3711 if (input != ctxt->input) {
3712 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3714 ctxt->sax->error(ctxt->userData,
3715"Entity declaration doesn't start and stop in the same entity\n");
3716 ctxt->wellFormed = 0;
3717 ctxt->disableSAX = 1;
3718 }
3719 NEXT;
3720 }
3721 if (orig != NULL) {
3722 /*
3723 * Ugly mechanism to save the raw entity value.
3724 */
3725 xmlEntityPtr cur = NULL;
3726
3727 if (isParameter) {
3728 if ((ctxt->sax != NULL) &&
3729 (ctxt->sax->getParameterEntity != NULL))
3730 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3731 } else {
3732 if ((ctxt->sax != NULL) &&
3733 (ctxt->sax->getEntity != NULL))
3734 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003735 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3736 cur = getEntity(ctxt, name);
3737 }
Owen Taylor3473f882001-02-23 17:55:21 +00003738 }
3739 if (cur != NULL) {
3740 if (cur->orig != NULL)
3741 xmlFree(orig);
3742 else
3743 cur->orig = orig;
3744 } else
3745 xmlFree(orig);
3746 }
3747 if (name != NULL) xmlFree(name);
3748 if (value != NULL) xmlFree(value);
3749 if (URI != NULL) xmlFree(URI);
3750 if (literal != NULL) xmlFree(literal);
3751 if (ndata != NULL) xmlFree(ndata);
3752 }
3753}
3754
3755/**
3756 * xmlParseDefaultDecl:
3757 * @ctxt: an XML parser context
3758 * @value: Receive a possible fixed default value for the attribute
3759 *
3760 * Parse an attribute default declaration
3761 *
3762 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3763 *
3764 * [ VC: Required Attribute ]
3765 * if the default declaration is the keyword #REQUIRED, then the
3766 * attribute must be specified for all elements of the type in the
3767 * attribute-list declaration.
3768 *
3769 * [ VC: Attribute Default Legal ]
3770 * The declared default value must meet the lexical constraints of
3771 * the declared attribute type c.f. xmlValidateAttributeDecl()
3772 *
3773 * [ VC: Fixed Attribute Default ]
3774 * if an attribute has a default value declared with the #FIXED
3775 * keyword, instances of that attribute must match the default value.
3776 *
3777 * [ WFC: No < in Attribute Values ]
3778 * handled in xmlParseAttValue()
3779 *
3780 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3781 * or XML_ATTRIBUTE_FIXED.
3782 */
3783
3784int
3785xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3786 int val;
3787 xmlChar *ret;
3788
3789 *value = NULL;
3790 if ((RAW == '#') && (NXT(1) == 'R') &&
3791 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3792 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3793 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3794 (NXT(8) == 'D')) {
3795 SKIP(9);
3796 return(XML_ATTRIBUTE_REQUIRED);
3797 }
3798 if ((RAW == '#') && (NXT(1) == 'I') &&
3799 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3800 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3801 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3802 SKIP(8);
3803 return(XML_ATTRIBUTE_IMPLIED);
3804 }
3805 val = XML_ATTRIBUTE_NONE;
3806 if ((RAW == '#') && (NXT(1) == 'F') &&
3807 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3808 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3809 SKIP(6);
3810 val = XML_ATTRIBUTE_FIXED;
3811 if (!IS_BLANK(CUR)) {
3812 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3814 ctxt->sax->error(ctxt->userData,
3815 "Space required after '#FIXED'\n");
3816 ctxt->wellFormed = 0;
3817 ctxt->disableSAX = 1;
3818 }
3819 SKIP_BLANKS;
3820 }
3821 ret = xmlParseAttValue(ctxt);
3822 ctxt->instate = XML_PARSER_DTD;
3823 if (ret == NULL) {
3824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3825 ctxt->sax->error(ctxt->userData,
3826 "Attribute default value declaration error\n");
3827 ctxt->wellFormed = 0;
3828 ctxt->disableSAX = 1;
3829 } else
3830 *value = ret;
3831 return(val);
3832}
3833
3834/**
3835 * xmlParseNotationType:
3836 * @ctxt: an XML parser context
3837 *
3838 * parse an Notation attribute type.
3839 *
3840 * Note: the leading 'NOTATION' S part has already being parsed...
3841 *
3842 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3843 *
3844 * [ VC: Notation Attributes ]
3845 * Values of this type must match one of the notation names included
3846 * in the declaration; all notation names in the declaration must be declared.
3847 *
3848 * Returns: the notation attribute tree built while parsing
3849 */
3850
3851xmlEnumerationPtr
3852xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3853 xmlChar *name;
3854 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3855
3856 if (RAW != '(') {
3857 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3859 ctxt->sax->error(ctxt->userData,
3860 "'(' required to start 'NOTATION'\n");
3861 ctxt->wellFormed = 0;
3862 ctxt->disableSAX = 1;
3863 return(NULL);
3864 }
3865 SHRINK;
3866 do {
3867 NEXT;
3868 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003869 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003870 if (name == NULL) {
3871 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3873 ctxt->sax->error(ctxt->userData,
3874 "Name expected in NOTATION declaration\n");
3875 ctxt->wellFormed = 0;
3876 ctxt->disableSAX = 1;
3877 return(ret);
3878 }
3879 cur = xmlCreateEnumeration(name);
3880 xmlFree(name);
3881 if (cur == NULL) return(ret);
3882 if (last == NULL) ret = last = cur;
3883 else {
3884 last->next = cur;
3885 last = cur;
3886 }
3887 SKIP_BLANKS;
3888 } while (RAW == '|');
3889 if (RAW != ')') {
3890 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3892 ctxt->sax->error(ctxt->userData,
3893 "')' required to finish NOTATION declaration\n");
3894 ctxt->wellFormed = 0;
3895 ctxt->disableSAX = 1;
3896 if ((last != NULL) && (last != ret))
3897 xmlFreeEnumeration(last);
3898 return(ret);
3899 }
3900 NEXT;
3901 return(ret);
3902}
3903
3904/**
3905 * xmlParseEnumerationType:
3906 * @ctxt: an XML parser context
3907 *
3908 * parse an Enumeration attribute type.
3909 *
3910 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3911 *
3912 * [ VC: Enumeration ]
3913 * Values of this type must match one of the Nmtoken tokens in
3914 * the declaration
3915 *
3916 * Returns: the enumeration attribute tree built while parsing
3917 */
3918
3919xmlEnumerationPtr
3920xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3921 xmlChar *name;
3922 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3923
3924 if (RAW != '(') {
3925 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3927 ctxt->sax->error(ctxt->userData,
3928 "'(' required to start ATTLIST enumeration\n");
3929 ctxt->wellFormed = 0;
3930 ctxt->disableSAX = 1;
3931 return(NULL);
3932 }
3933 SHRINK;
3934 do {
3935 NEXT;
3936 SKIP_BLANKS;
3937 name = xmlParseNmtoken(ctxt);
3938 if (name == NULL) {
3939 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3941 ctxt->sax->error(ctxt->userData,
3942 "NmToken expected in ATTLIST enumeration\n");
3943 ctxt->wellFormed = 0;
3944 ctxt->disableSAX = 1;
3945 return(ret);
3946 }
3947 cur = xmlCreateEnumeration(name);
3948 xmlFree(name);
3949 if (cur == NULL) return(ret);
3950 if (last == NULL) ret = last = cur;
3951 else {
3952 last->next = cur;
3953 last = cur;
3954 }
3955 SKIP_BLANKS;
3956 } while (RAW == '|');
3957 if (RAW != ')') {
3958 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3960 ctxt->sax->error(ctxt->userData,
3961 "')' required to finish ATTLIST enumeration\n");
3962 ctxt->wellFormed = 0;
3963 ctxt->disableSAX = 1;
3964 return(ret);
3965 }
3966 NEXT;
3967 return(ret);
3968}
3969
3970/**
3971 * xmlParseEnumeratedType:
3972 * @ctxt: an XML parser context
3973 * @tree: the enumeration tree built while parsing
3974 *
3975 * parse an Enumerated attribute type.
3976 *
3977 * [57] EnumeratedType ::= NotationType | Enumeration
3978 *
3979 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3980 *
3981 *
3982 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3983 */
3984
3985int
3986xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3987 if ((RAW == 'N') && (NXT(1) == 'O') &&
3988 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3989 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3990 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3991 SKIP(8);
3992 if (!IS_BLANK(CUR)) {
3993 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3995 ctxt->sax->error(ctxt->userData,
3996 "Space required after 'NOTATION'\n");
3997 ctxt->wellFormed = 0;
3998 ctxt->disableSAX = 1;
3999 return(0);
4000 }
4001 SKIP_BLANKS;
4002 *tree = xmlParseNotationType(ctxt);
4003 if (*tree == NULL) return(0);
4004 return(XML_ATTRIBUTE_NOTATION);
4005 }
4006 *tree = xmlParseEnumerationType(ctxt);
4007 if (*tree == NULL) return(0);
4008 return(XML_ATTRIBUTE_ENUMERATION);
4009}
4010
4011/**
4012 * xmlParseAttributeType:
4013 * @ctxt: an XML parser context
4014 * @tree: the enumeration tree built while parsing
4015 *
4016 * parse the Attribute list def for an element
4017 *
4018 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4019 *
4020 * [55] StringType ::= 'CDATA'
4021 *
4022 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4023 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4024 *
4025 * Validity constraints for attribute values syntax are checked in
4026 * xmlValidateAttributeValue()
4027 *
4028 * [ VC: ID ]
4029 * Values of type ID must match the Name production. A name must not
4030 * appear more than once in an XML document as a value of this type;
4031 * i.e., ID values must uniquely identify the elements which bear them.
4032 *
4033 * [ VC: One ID per Element Type ]
4034 * No element type may have more than one ID attribute specified.
4035 *
4036 * [ VC: ID Attribute Default ]
4037 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4038 *
4039 * [ VC: IDREF ]
4040 * Values of type IDREF must match the Name production, and values
4041 * of type IDREFS must match Names; each IDREF Name must match the value
4042 * of an ID attribute on some element in the XML document; i.e. IDREF
4043 * values must match the value of some ID attribute.
4044 *
4045 * [ VC: Entity Name ]
4046 * Values of type ENTITY must match the Name production, values
4047 * of type ENTITIES must match Names; each Entity Name must match the
4048 * name of an unparsed entity declared in the DTD.
4049 *
4050 * [ VC: Name Token ]
4051 * Values of type NMTOKEN must match the Nmtoken production; values
4052 * of type NMTOKENS must match Nmtokens.
4053 *
4054 * Returns the attribute type
4055 */
4056int
4057xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4058 SHRINK;
4059 if ((RAW == 'C') && (NXT(1) == 'D') &&
4060 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4061 (NXT(4) == 'A')) {
4062 SKIP(5);
4063 return(XML_ATTRIBUTE_CDATA);
4064 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4065 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4066 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4067 SKIP(6);
4068 return(XML_ATTRIBUTE_IDREFS);
4069 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4070 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4071 (NXT(4) == 'F')) {
4072 SKIP(5);
4073 return(XML_ATTRIBUTE_IDREF);
4074 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4075 SKIP(2);
4076 return(XML_ATTRIBUTE_ID);
4077 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4078 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4079 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4080 SKIP(6);
4081 return(XML_ATTRIBUTE_ENTITY);
4082 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4083 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4084 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4085 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4086 SKIP(8);
4087 return(XML_ATTRIBUTE_ENTITIES);
4088 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4089 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4090 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4091 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4092 SKIP(8);
4093 return(XML_ATTRIBUTE_NMTOKENS);
4094 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4095 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4096 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4097 (NXT(6) == 'N')) {
4098 SKIP(7);
4099 return(XML_ATTRIBUTE_NMTOKEN);
4100 }
4101 return(xmlParseEnumeratedType(ctxt, tree));
4102}
4103
4104/**
4105 * xmlParseAttributeListDecl:
4106 * @ctxt: an XML parser context
4107 *
4108 * : parse the Attribute list def for an element
4109 *
4110 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4111 *
4112 * [53] AttDef ::= S Name S AttType S DefaultDecl
4113 *
4114 */
4115void
4116xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4117 xmlChar *elemName;
4118 xmlChar *attrName;
4119 xmlEnumerationPtr tree;
4120
4121 if ((RAW == '<') && (NXT(1) == '!') &&
4122 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4123 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4124 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4125 (NXT(8) == 'T')) {
4126 xmlParserInputPtr input = ctxt->input;
4127
4128 SKIP(9);
4129 if (!IS_BLANK(CUR)) {
4130 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4132 ctxt->sax->error(ctxt->userData,
4133 "Space required after '<!ATTLIST'\n");
4134 ctxt->wellFormed = 0;
4135 ctxt->disableSAX = 1;
4136 }
4137 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004138 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004139 if (elemName == NULL) {
4140 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4142 ctxt->sax->error(ctxt->userData,
4143 "ATTLIST: no name for Element\n");
4144 ctxt->wellFormed = 0;
4145 ctxt->disableSAX = 1;
4146 return;
4147 }
4148 SKIP_BLANKS;
4149 GROW;
4150 while (RAW != '>') {
4151 const xmlChar *check = CUR_PTR;
4152 int type;
4153 int def;
4154 xmlChar *defaultValue = NULL;
4155
4156 GROW;
4157 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004158 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004159 if (attrName == NULL) {
4160 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4162 ctxt->sax->error(ctxt->userData,
4163 "ATTLIST: no name for Attribute\n");
4164 ctxt->wellFormed = 0;
4165 ctxt->disableSAX = 1;
4166 break;
4167 }
4168 GROW;
4169 if (!IS_BLANK(CUR)) {
4170 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4172 ctxt->sax->error(ctxt->userData,
4173 "Space required after the attribute name\n");
4174 ctxt->wellFormed = 0;
4175 ctxt->disableSAX = 1;
4176 if (attrName != NULL)
4177 xmlFree(attrName);
4178 if (defaultValue != NULL)
4179 xmlFree(defaultValue);
4180 break;
4181 }
4182 SKIP_BLANKS;
4183
4184 type = xmlParseAttributeType(ctxt, &tree);
4185 if (type <= 0) {
4186 if (attrName != NULL)
4187 xmlFree(attrName);
4188 if (defaultValue != NULL)
4189 xmlFree(defaultValue);
4190 break;
4191 }
4192
4193 GROW;
4194 if (!IS_BLANK(CUR)) {
4195 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4197 ctxt->sax->error(ctxt->userData,
4198 "Space required after the attribute type\n");
4199 ctxt->wellFormed = 0;
4200 ctxt->disableSAX = 1;
4201 if (attrName != NULL)
4202 xmlFree(attrName);
4203 if (defaultValue != NULL)
4204 xmlFree(defaultValue);
4205 if (tree != NULL)
4206 xmlFreeEnumeration(tree);
4207 break;
4208 }
4209 SKIP_BLANKS;
4210
4211 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4212 if (def <= 0) {
4213 if (attrName != NULL)
4214 xmlFree(attrName);
4215 if (defaultValue != NULL)
4216 xmlFree(defaultValue);
4217 if (tree != NULL)
4218 xmlFreeEnumeration(tree);
4219 break;
4220 }
4221
4222 GROW;
4223 if (RAW != '>') {
4224 if (!IS_BLANK(CUR)) {
4225 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4227 ctxt->sax->error(ctxt->userData,
4228 "Space required after the attribute default value\n");
4229 ctxt->wellFormed = 0;
4230 ctxt->disableSAX = 1;
4231 if (attrName != NULL)
4232 xmlFree(attrName);
4233 if (defaultValue != NULL)
4234 xmlFree(defaultValue);
4235 if (tree != NULL)
4236 xmlFreeEnumeration(tree);
4237 break;
4238 }
4239 SKIP_BLANKS;
4240 }
4241 if (check == CUR_PTR) {
4242 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4244 ctxt->sax->error(ctxt->userData,
4245 "xmlParseAttributeListDecl: detected internal error\n");
4246 if (attrName != NULL)
4247 xmlFree(attrName);
4248 if (defaultValue != NULL)
4249 xmlFree(defaultValue);
4250 if (tree != NULL)
4251 xmlFreeEnumeration(tree);
4252 break;
4253 }
4254 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4255 (ctxt->sax->attributeDecl != NULL))
4256 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4257 type, def, defaultValue, tree);
4258 if (attrName != NULL)
4259 xmlFree(attrName);
4260 if (defaultValue != NULL)
4261 xmlFree(defaultValue);
4262 GROW;
4263 }
4264 if (RAW == '>') {
4265 if (input != ctxt->input) {
4266 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4268 ctxt->sax->error(ctxt->userData,
4269"Attribute list declaration doesn't start and stop in the same entity\n");
4270 ctxt->wellFormed = 0;
4271 ctxt->disableSAX = 1;
4272 }
4273 NEXT;
4274 }
4275
4276 xmlFree(elemName);
4277 }
4278}
4279
4280/**
4281 * xmlParseElementMixedContentDecl:
4282 * @ctxt: an XML parser context
4283 *
4284 * parse the declaration for a Mixed Element content
4285 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4286 *
4287 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4288 * '(' S? '#PCDATA' S? ')'
4289 *
4290 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4291 *
4292 * [ VC: No Duplicate Types ]
4293 * The same name must not appear more than once in a single
4294 * mixed-content declaration.
4295 *
4296 * returns: the list of the xmlElementContentPtr describing the element choices
4297 */
4298xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004299xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004300 xmlElementContentPtr ret = NULL, cur = NULL, n;
4301 xmlChar *elem = NULL;
4302
4303 GROW;
4304 if ((RAW == '#') && (NXT(1) == 'P') &&
4305 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4306 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4307 (NXT(6) == 'A')) {
4308 SKIP(7);
4309 SKIP_BLANKS;
4310 SHRINK;
4311 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004312 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4313 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4314 if (ctxt->vctxt.error != NULL)
4315 ctxt->vctxt.error(ctxt->vctxt.userData,
4316"Element content declaration doesn't start and stop in the same entity\n");
4317 ctxt->valid = 0;
4318 }
Owen Taylor3473f882001-02-23 17:55:21 +00004319 NEXT;
4320 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4321 if (RAW == '*') {
4322 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4323 NEXT;
4324 }
4325 return(ret);
4326 }
4327 if ((RAW == '(') || (RAW == '|')) {
4328 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4329 if (ret == NULL) return(NULL);
4330 }
4331 while (RAW == '|') {
4332 NEXT;
4333 if (elem == NULL) {
4334 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4335 if (ret == NULL) return(NULL);
4336 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004337 if (cur != NULL)
4338 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004339 cur = ret;
4340 } else {
4341 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4342 if (n == NULL) return(NULL);
4343 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004344 if (n->c1 != NULL)
4345 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004346 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004347 if (n != NULL)
4348 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004349 cur = n;
4350 xmlFree(elem);
4351 }
4352 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004353 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004354 if (elem == NULL) {
4355 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4357 ctxt->sax->error(ctxt->userData,
4358 "xmlParseElementMixedContentDecl : Name expected\n");
4359 ctxt->wellFormed = 0;
4360 ctxt->disableSAX = 1;
4361 xmlFreeElementContent(cur);
4362 return(NULL);
4363 }
4364 SKIP_BLANKS;
4365 GROW;
4366 }
4367 if ((RAW == ')') && (NXT(1) == '*')) {
4368 if (elem != NULL) {
4369 cur->c2 = xmlNewElementContent(elem,
4370 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004371 if (cur->c2 != NULL)
4372 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004373 xmlFree(elem);
4374 }
4375 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004376 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4377 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4378 if (ctxt->vctxt.error != NULL)
4379 ctxt->vctxt.error(ctxt->vctxt.userData,
4380"Element content declaration doesn't start and stop in the same entity\n");
4381 ctxt->valid = 0;
4382 }
Owen Taylor3473f882001-02-23 17:55:21 +00004383 SKIP(2);
4384 } else {
4385 if (elem != NULL) xmlFree(elem);
4386 xmlFreeElementContent(ret);
4387 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4389 ctxt->sax->error(ctxt->userData,
4390 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4391 ctxt->wellFormed = 0;
4392 ctxt->disableSAX = 1;
4393 return(NULL);
4394 }
4395
4396 } else {
4397 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4399 ctxt->sax->error(ctxt->userData,
4400 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4401 ctxt->wellFormed = 0;
4402 ctxt->disableSAX = 1;
4403 }
4404 return(ret);
4405}
4406
4407/**
4408 * xmlParseElementChildrenContentDecl:
4409 * @ctxt: an XML parser context
4410 *
4411 * parse the declaration for a Mixed Element content
4412 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4413 *
4414 *
4415 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4416 *
4417 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4418 *
4419 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4420 *
4421 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4422 *
4423 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4424 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004425 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004426 * opening or closing parentheses in a choice, seq, or Mixed
4427 * construct is contained in the replacement text for a parameter
4428 * entity, both must be contained in the same replacement text. For
4429 * interoperability, if a parameter-entity reference appears in a
4430 * choice, seq, or Mixed construct, its replacement text should not
4431 * be empty, and neither the first nor last non-blank character of
4432 * the replacement text should be a connector (| or ,).
4433 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004434 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004435 * hierarchy.
4436 */
4437xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004438xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004439(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004440 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4441 xmlChar *elem;
4442 xmlChar type = 0;
4443
4444 SKIP_BLANKS;
4445 GROW;
4446 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004447 xmlParserInputPtr input = ctxt->input;
4448
Owen Taylor3473f882001-02-23 17:55:21 +00004449 /* Recurse on first child */
4450 NEXT;
4451 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004452 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004453 SKIP_BLANKS;
4454 GROW;
4455 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004456 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004457 if (elem == NULL) {
4458 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4460 ctxt->sax->error(ctxt->userData,
4461 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4462 ctxt->wellFormed = 0;
4463 ctxt->disableSAX = 1;
4464 return(NULL);
4465 }
4466 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4467 GROW;
4468 if (RAW == '?') {
4469 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4470 NEXT;
4471 } else if (RAW == '*') {
4472 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4473 NEXT;
4474 } else if (RAW == '+') {
4475 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4476 NEXT;
4477 } else {
4478 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4479 }
4480 xmlFree(elem);
4481 GROW;
4482 }
4483 SKIP_BLANKS;
4484 SHRINK;
4485 while (RAW != ')') {
4486 /*
4487 * Each loop we parse one separator and one element.
4488 */
4489 if (RAW == ',') {
4490 if (type == 0) type = CUR;
4491
4492 /*
4493 * Detect "Name | Name , Name" error
4494 */
4495 else if (type != CUR) {
4496 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4498 ctxt->sax->error(ctxt->userData,
4499 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4500 type);
4501 ctxt->wellFormed = 0;
4502 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004503 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004504 xmlFreeElementContent(last);
4505 if (ret != NULL)
4506 xmlFreeElementContent(ret);
4507 return(NULL);
4508 }
4509 NEXT;
4510
4511 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4512 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004513 if ((last != NULL) && (last != ret))
4514 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004515 xmlFreeElementContent(ret);
4516 return(NULL);
4517 }
4518 if (last == NULL) {
4519 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004520 if (ret != NULL)
4521 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004522 ret = cur = op;
4523 } else {
4524 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004525 if (op != NULL)
4526 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004527 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004528 if (last != NULL)
4529 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004530 cur =op;
4531 last = NULL;
4532 }
4533 } else if (RAW == '|') {
4534 if (type == 0) type = CUR;
4535
4536 /*
4537 * Detect "Name , Name | Name" error
4538 */
4539 else if (type != CUR) {
4540 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4541 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4542 ctxt->sax->error(ctxt->userData,
4543 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4544 type);
4545 ctxt->wellFormed = 0;
4546 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004547 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004548 xmlFreeElementContent(last);
4549 if (ret != NULL)
4550 xmlFreeElementContent(ret);
4551 return(NULL);
4552 }
4553 NEXT;
4554
4555 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4556 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004557 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004558 xmlFreeElementContent(last);
4559 if (ret != NULL)
4560 xmlFreeElementContent(ret);
4561 return(NULL);
4562 }
4563 if (last == NULL) {
4564 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004565 if (ret != NULL)
4566 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004567 ret = cur = op;
4568 } else {
4569 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004570 if (op != NULL)
4571 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004572 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004573 if (last != NULL)
4574 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004575 cur =op;
4576 last = NULL;
4577 }
4578 } else {
4579 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4581 ctxt->sax->error(ctxt->userData,
4582 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4583 ctxt->wellFormed = 0;
4584 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004585 if (ret != NULL)
4586 xmlFreeElementContent(ret);
4587 return(NULL);
4588 }
4589 GROW;
4590 SKIP_BLANKS;
4591 GROW;
4592 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004593 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004594 /* Recurse on second child */
4595 NEXT;
4596 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004597 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004598 SKIP_BLANKS;
4599 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004600 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004601 if (elem == NULL) {
4602 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4604 ctxt->sax->error(ctxt->userData,
4605 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4606 ctxt->wellFormed = 0;
4607 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004608 if (ret != NULL)
4609 xmlFreeElementContent(ret);
4610 return(NULL);
4611 }
4612 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4613 xmlFree(elem);
4614 if (RAW == '?') {
4615 last->ocur = XML_ELEMENT_CONTENT_OPT;
4616 NEXT;
4617 } else if (RAW == '*') {
4618 last->ocur = XML_ELEMENT_CONTENT_MULT;
4619 NEXT;
4620 } else if (RAW == '+') {
4621 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4622 NEXT;
4623 } else {
4624 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4625 }
4626 }
4627 SKIP_BLANKS;
4628 GROW;
4629 }
4630 if ((cur != NULL) && (last != NULL)) {
4631 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004632 if (last != NULL)
4633 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004634 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004635 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4636 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4637 if (ctxt->vctxt.error != NULL)
4638 ctxt->vctxt.error(ctxt->vctxt.userData,
4639"Element content declaration doesn't start and stop in the same entity\n");
4640 ctxt->valid = 0;
4641 }
Owen Taylor3473f882001-02-23 17:55:21 +00004642 NEXT;
4643 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004644 if (ret != NULL)
4645 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004646 NEXT;
4647 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004648 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004649 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004650 cur = ret;
4651 /*
4652 * Some normalization:
4653 * (a | b* | c?)* == (a | b | c)*
4654 */
4655 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4656 if ((cur->c1 != NULL) &&
4657 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4658 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4659 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4660 if ((cur->c2 != NULL) &&
4661 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4662 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4663 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4664 cur = cur->c2;
4665 }
4666 }
Owen Taylor3473f882001-02-23 17:55:21 +00004667 NEXT;
4668 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004669 if (ret != NULL) {
4670 int found = 0;
4671
Daniel Veillarde470df72001-04-18 21:41:07 +00004672 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004673 /*
4674 * Some normalization:
4675 * (a | b*)+ == (a | b)*
4676 * (a | b?)+ == (a | b)*
4677 */
4678 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4679 if ((cur->c1 != NULL) &&
4680 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4681 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4682 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4683 found = 1;
4684 }
4685 if ((cur->c2 != NULL) &&
4686 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4687 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4688 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4689 found = 1;
4690 }
4691 cur = cur->c2;
4692 }
4693 if (found)
4694 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4695 }
Owen Taylor3473f882001-02-23 17:55:21 +00004696 NEXT;
4697 }
4698 return(ret);
4699}
4700
4701/**
4702 * xmlParseElementContentDecl:
4703 * @ctxt: an XML parser context
4704 * @name: the name of the element being defined.
4705 * @result: the Element Content pointer will be stored here if any
4706 *
4707 * parse the declaration for an Element content either Mixed or Children,
4708 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4709 *
4710 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4711 *
4712 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4713 */
4714
4715int
4716xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4717 xmlElementContentPtr *result) {
4718
4719 xmlElementContentPtr tree = NULL;
4720 xmlParserInputPtr input = ctxt->input;
4721 int res;
4722
4723 *result = NULL;
4724
4725 if (RAW != '(') {
4726 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4728 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004729 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004730 ctxt->wellFormed = 0;
4731 ctxt->disableSAX = 1;
4732 return(-1);
4733 }
4734 NEXT;
4735 GROW;
4736 SKIP_BLANKS;
4737 if ((RAW == '#') && (NXT(1) == 'P') &&
4738 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4739 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4740 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004741 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004742 res = XML_ELEMENT_TYPE_MIXED;
4743 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004744 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004745 res = XML_ELEMENT_TYPE_ELEMENT;
4746 }
Owen Taylor3473f882001-02-23 17:55:21 +00004747 SKIP_BLANKS;
4748 *result = tree;
4749 return(res);
4750}
4751
4752/**
4753 * xmlParseElementDecl:
4754 * @ctxt: an XML parser context
4755 *
4756 * parse an Element declaration.
4757 *
4758 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4759 *
4760 * [ VC: Unique Element Type Declaration ]
4761 * No element type may be declared more than once
4762 *
4763 * Returns the type of the element, or -1 in case of error
4764 */
4765int
4766xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4767 xmlChar *name;
4768 int ret = -1;
4769 xmlElementContentPtr content = NULL;
4770
4771 GROW;
4772 if ((RAW == '<') && (NXT(1) == '!') &&
4773 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4774 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4775 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4776 (NXT(8) == 'T')) {
4777 xmlParserInputPtr input = ctxt->input;
4778
4779 SKIP(9);
4780 if (!IS_BLANK(CUR)) {
4781 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4783 ctxt->sax->error(ctxt->userData,
4784 "Space required after 'ELEMENT'\n");
4785 ctxt->wellFormed = 0;
4786 ctxt->disableSAX = 1;
4787 }
4788 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004789 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004790 if (name == NULL) {
4791 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4793 ctxt->sax->error(ctxt->userData,
4794 "xmlParseElementDecl: no name for Element\n");
4795 ctxt->wellFormed = 0;
4796 ctxt->disableSAX = 1;
4797 return(-1);
4798 }
4799 while ((RAW == 0) && (ctxt->inputNr > 1))
4800 xmlPopInput(ctxt);
4801 if (!IS_BLANK(CUR)) {
4802 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4803 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4804 ctxt->sax->error(ctxt->userData,
4805 "Space required after the element name\n");
4806 ctxt->wellFormed = 0;
4807 ctxt->disableSAX = 1;
4808 }
4809 SKIP_BLANKS;
4810 if ((RAW == 'E') && (NXT(1) == 'M') &&
4811 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4812 (NXT(4) == 'Y')) {
4813 SKIP(5);
4814 /*
4815 * Element must always be empty.
4816 */
4817 ret = XML_ELEMENT_TYPE_EMPTY;
4818 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4819 (NXT(2) == 'Y')) {
4820 SKIP(3);
4821 /*
4822 * Element is a generic container.
4823 */
4824 ret = XML_ELEMENT_TYPE_ANY;
4825 } else if (RAW == '(') {
4826 ret = xmlParseElementContentDecl(ctxt, name, &content);
4827 } else {
4828 /*
4829 * [ WFC: PEs in Internal Subset ] error handling.
4830 */
4831 if ((RAW == '%') && (ctxt->external == 0) &&
4832 (ctxt->inputNr == 1)) {
4833 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4834 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4835 ctxt->sax->error(ctxt->userData,
4836 "PEReference: forbidden within markup decl in internal subset\n");
4837 } else {
4838 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4839 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4840 ctxt->sax->error(ctxt->userData,
4841 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4842 }
4843 ctxt->wellFormed = 0;
4844 ctxt->disableSAX = 1;
4845 if (name != NULL) xmlFree(name);
4846 return(-1);
4847 }
4848
4849 SKIP_BLANKS;
4850 /*
4851 * Pop-up of finished entities.
4852 */
4853 while ((RAW == 0) && (ctxt->inputNr > 1))
4854 xmlPopInput(ctxt);
4855 SKIP_BLANKS;
4856
4857 if (RAW != '>') {
4858 ctxt->errNo = XML_ERR_GT_REQUIRED;
4859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4860 ctxt->sax->error(ctxt->userData,
4861 "xmlParseElementDecl: expected '>' at the end\n");
4862 ctxt->wellFormed = 0;
4863 ctxt->disableSAX = 1;
4864 } else {
4865 if (input != ctxt->input) {
4866 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4868 ctxt->sax->error(ctxt->userData,
4869"Element declaration doesn't start and stop in the same entity\n");
4870 ctxt->wellFormed = 0;
4871 ctxt->disableSAX = 1;
4872 }
4873
4874 NEXT;
4875 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4876 (ctxt->sax->elementDecl != NULL))
4877 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4878 content);
4879 }
4880 if (content != NULL) {
4881 xmlFreeElementContent(content);
4882 }
4883 if (name != NULL) {
4884 xmlFree(name);
4885 }
4886 }
4887 return(ret);
4888}
4889
4890/**
Owen Taylor3473f882001-02-23 17:55:21 +00004891 * xmlParseConditionalSections
4892 * @ctxt: an XML parser context
4893 *
4894 * [61] conditionalSect ::= includeSect | ignoreSect
4895 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4896 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4897 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4898 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4899 */
4900
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004901static void
Owen Taylor3473f882001-02-23 17:55:21 +00004902xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4903 SKIP(3);
4904 SKIP_BLANKS;
4905 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4906 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4907 (NXT(6) == 'E')) {
4908 SKIP(7);
4909 SKIP_BLANKS;
4910 if (RAW != '[') {
4911 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4913 ctxt->sax->error(ctxt->userData,
4914 "XML conditional section '[' expected\n");
4915 ctxt->wellFormed = 0;
4916 ctxt->disableSAX = 1;
4917 } else {
4918 NEXT;
4919 }
4920 if (xmlParserDebugEntities) {
4921 if ((ctxt->input != NULL) && (ctxt->input->filename))
4922 xmlGenericError(xmlGenericErrorContext,
4923 "%s(%d): ", ctxt->input->filename,
4924 ctxt->input->line);
4925 xmlGenericError(xmlGenericErrorContext,
4926 "Entering INCLUDE Conditional Section\n");
4927 }
4928
4929 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4930 (NXT(2) != '>'))) {
4931 const xmlChar *check = CUR_PTR;
4932 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00004933
4934 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4935 xmlParseConditionalSections(ctxt);
4936 } else if (IS_BLANK(CUR)) {
4937 NEXT;
4938 } else if (RAW == '%') {
4939 xmlParsePEReference(ctxt);
4940 } else
4941 xmlParseMarkupDecl(ctxt);
4942
4943 /*
4944 * Pop-up of finished entities.
4945 */
4946 while ((RAW == 0) && (ctxt->inputNr > 1))
4947 xmlPopInput(ctxt);
4948
Daniel Veillardfdc91562002-07-01 21:52:03 +00004949 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004950 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4952 ctxt->sax->error(ctxt->userData,
4953 "Content error in the external subset\n");
4954 ctxt->wellFormed = 0;
4955 ctxt->disableSAX = 1;
4956 break;
4957 }
4958 }
4959 if (xmlParserDebugEntities) {
4960 if ((ctxt->input != NULL) && (ctxt->input->filename))
4961 xmlGenericError(xmlGenericErrorContext,
4962 "%s(%d): ", ctxt->input->filename,
4963 ctxt->input->line);
4964 xmlGenericError(xmlGenericErrorContext,
4965 "Leaving INCLUDE Conditional Section\n");
4966 }
4967
4968 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4969 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4970 int state;
4971 int instate;
4972 int depth = 0;
4973
4974 SKIP(6);
4975 SKIP_BLANKS;
4976 if (RAW != '[') {
4977 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4979 ctxt->sax->error(ctxt->userData,
4980 "XML conditional section '[' expected\n");
4981 ctxt->wellFormed = 0;
4982 ctxt->disableSAX = 1;
4983 } else {
4984 NEXT;
4985 }
4986 if (xmlParserDebugEntities) {
4987 if ((ctxt->input != NULL) && (ctxt->input->filename))
4988 xmlGenericError(xmlGenericErrorContext,
4989 "%s(%d): ", ctxt->input->filename,
4990 ctxt->input->line);
4991 xmlGenericError(xmlGenericErrorContext,
4992 "Entering IGNORE Conditional Section\n");
4993 }
4994
4995 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004996 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004997 * But disable SAX event generating DTD building in the meantime
4998 */
4999 state = ctxt->disableSAX;
5000 instate = ctxt->instate;
5001 ctxt->disableSAX = 1;
5002 ctxt->instate = XML_PARSER_IGNORE;
5003
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005004 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005005 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5006 depth++;
5007 SKIP(3);
5008 continue;
5009 }
5010 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5011 if (--depth >= 0) SKIP(3);
5012 continue;
5013 }
5014 NEXT;
5015 continue;
5016 }
5017
5018 ctxt->disableSAX = state;
5019 ctxt->instate = instate;
5020
5021 if (xmlParserDebugEntities) {
5022 if ((ctxt->input != NULL) && (ctxt->input->filename))
5023 xmlGenericError(xmlGenericErrorContext,
5024 "%s(%d): ", ctxt->input->filename,
5025 ctxt->input->line);
5026 xmlGenericError(xmlGenericErrorContext,
5027 "Leaving IGNORE Conditional Section\n");
5028 }
5029
5030 } else {
5031 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5033 ctxt->sax->error(ctxt->userData,
5034 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5035 ctxt->wellFormed = 0;
5036 ctxt->disableSAX = 1;
5037 }
5038
5039 if (RAW == 0)
5040 SHRINK;
5041
5042 if (RAW == 0) {
5043 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5045 ctxt->sax->error(ctxt->userData,
5046 "XML conditional section not closed\n");
5047 ctxt->wellFormed = 0;
5048 ctxt->disableSAX = 1;
5049 } else {
5050 SKIP(3);
5051 }
5052}
5053
5054/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005055 * xmlParseMarkupDecl:
5056 * @ctxt: an XML parser context
5057 *
5058 * parse Markup declarations
5059 *
5060 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5061 * NotationDecl | PI | Comment
5062 *
5063 * [ VC: Proper Declaration/PE Nesting ]
5064 * Parameter-entity replacement text must be properly nested with
5065 * markup declarations. That is to say, if either the first character
5066 * or the last character of a markup declaration (markupdecl above) is
5067 * contained in the replacement text for a parameter-entity reference,
5068 * both must be contained in the same replacement text.
5069 *
5070 * [ WFC: PEs in Internal Subset ]
5071 * In the internal DTD subset, parameter-entity references can occur
5072 * only where markup declarations can occur, not within markup declarations.
5073 * (This does not apply to references that occur in external parameter
5074 * entities or to the external subset.)
5075 */
5076void
5077xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5078 GROW;
5079 xmlParseElementDecl(ctxt);
5080 xmlParseAttributeListDecl(ctxt);
5081 xmlParseEntityDecl(ctxt);
5082 xmlParseNotationDecl(ctxt);
5083 xmlParsePI(ctxt);
5084 xmlParseComment(ctxt);
5085 /*
5086 * This is only for internal subset. On external entities,
5087 * the replacement is done before parsing stage
5088 */
5089 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5090 xmlParsePEReference(ctxt);
5091
5092 /*
5093 * Conditional sections are allowed from entities included
5094 * by PE References in the internal subset.
5095 */
5096 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5097 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5098 xmlParseConditionalSections(ctxt);
5099 }
5100 }
5101
5102 ctxt->instate = XML_PARSER_DTD;
5103}
5104
5105/**
5106 * xmlParseTextDecl:
5107 * @ctxt: an XML parser context
5108 *
5109 * parse an XML declaration header for external entities
5110 *
5111 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5112 *
5113 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5114 */
5115
5116void
5117xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5118 xmlChar *version;
5119
5120 /*
5121 * We know that '<?xml' is here.
5122 */
5123 if ((RAW == '<') && (NXT(1) == '?') &&
5124 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5125 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5126 SKIP(5);
5127 } else {
5128 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5130 ctxt->sax->error(ctxt->userData,
5131 "Text declaration '<?xml' required\n");
5132 ctxt->wellFormed = 0;
5133 ctxt->disableSAX = 1;
5134
5135 return;
5136 }
5137
5138 if (!IS_BLANK(CUR)) {
5139 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5141 ctxt->sax->error(ctxt->userData,
5142 "Space needed after '<?xml'\n");
5143 ctxt->wellFormed = 0;
5144 ctxt->disableSAX = 1;
5145 }
5146 SKIP_BLANKS;
5147
5148 /*
5149 * We may have the VersionInfo here.
5150 */
5151 version = xmlParseVersionInfo(ctxt);
5152 if (version == NULL)
5153 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005154 else {
5155 if (!IS_BLANK(CUR)) {
5156 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5158 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5159 ctxt->wellFormed = 0;
5160 ctxt->disableSAX = 1;
5161 }
5162 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005163 ctxt->input->version = version;
5164
5165 /*
5166 * We must have the encoding declaration
5167 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005168 xmlParseEncodingDecl(ctxt);
5169 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5170 /*
5171 * The XML REC instructs us to stop parsing right here
5172 */
5173 return;
5174 }
5175
5176 SKIP_BLANKS;
5177 if ((RAW == '?') && (NXT(1) == '>')) {
5178 SKIP(2);
5179 } else if (RAW == '>') {
5180 /* Deprecated old WD ... */
5181 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5183 ctxt->sax->error(ctxt->userData,
5184 "XML declaration must end-up with '?>'\n");
5185 ctxt->wellFormed = 0;
5186 ctxt->disableSAX = 1;
5187 NEXT;
5188 } else {
5189 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5191 ctxt->sax->error(ctxt->userData,
5192 "parsing XML declaration: '?>' expected\n");
5193 ctxt->wellFormed = 0;
5194 ctxt->disableSAX = 1;
5195 MOVETO_ENDTAG(CUR_PTR);
5196 NEXT;
5197 }
5198}
5199
5200/**
Owen Taylor3473f882001-02-23 17:55:21 +00005201 * xmlParseExternalSubset:
5202 * @ctxt: an XML parser context
5203 * @ExternalID: the external identifier
5204 * @SystemID: the system identifier (or URL)
5205 *
5206 * parse Markup declarations from an external subset
5207 *
5208 * [30] extSubset ::= textDecl? extSubsetDecl
5209 *
5210 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5211 */
5212void
5213xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5214 const xmlChar *SystemID) {
5215 GROW;
5216 if ((RAW == '<') && (NXT(1) == '?') &&
5217 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5218 (NXT(4) == 'l')) {
5219 xmlParseTextDecl(ctxt);
5220 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5221 /*
5222 * The XML REC instructs us to stop parsing right here
5223 */
5224 ctxt->instate = XML_PARSER_EOF;
5225 return;
5226 }
5227 }
5228 if (ctxt->myDoc == NULL) {
5229 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5230 }
5231 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5232 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5233
5234 ctxt->instate = XML_PARSER_DTD;
5235 ctxt->external = 1;
5236 while (((RAW == '<') && (NXT(1) == '?')) ||
5237 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005238 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005239 const xmlChar *check = CUR_PTR;
5240 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005241
5242 GROW;
5243 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5244 xmlParseConditionalSections(ctxt);
5245 } else if (IS_BLANK(CUR)) {
5246 NEXT;
5247 } else if (RAW == '%') {
5248 xmlParsePEReference(ctxt);
5249 } else
5250 xmlParseMarkupDecl(ctxt);
5251
5252 /*
5253 * Pop-up of finished entities.
5254 */
5255 while ((RAW == 0) && (ctxt->inputNr > 1))
5256 xmlPopInput(ctxt);
5257
Daniel Veillardfdc91562002-07-01 21:52:03 +00005258 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005259 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5260 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5261 ctxt->sax->error(ctxt->userData,
5262 "Content error in the external subset\n");
5263 ctxt->wellFormed = 0;
5264 ctxt->disableSAX = 1;
5265 break;
5266 }
5267 }
5268
5269 if (RAW != 0) {
5270 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5272 ctxt->sax->error(ctxt->userData,
5273 "Extra content at the end of the document\n");
5274 ctxt->wellFormed = 0;
5275 ctxt->disableSAX = 1;
5276 }
5277
5278}
5279
5280/**
5281 * xmlParseReference:
5282 * @ctxt: an XML parser context
5283 *
5284 * parse and handle entity references in content, depending on the SAX
5285 * interface, this may end-up in a call to character() if this is a
5286 * CharRef, a predefined entity, if there is no reference() callback.
5287 * or if the parser was asked to switch to that mode.
5288 *
5289 * [67] Reference ::= EntityRef | CharRef
5290 */
5291void
5292xmlParseReference(xmlParserCtxtPtr ctxt) {
5293 xmlEntityPtr ent;
5294 xmlChar *val;
5295 if (RAW != '&') return;
5296
5297 if (NXT(1) == '#') {
5298 int i = 0;
5299 xmlChar out[10];
5300 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005301 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005302
5303 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5304 /*
5305 * So we are using non-UTF-8 buffers
5306 * Check that the char fit on 8bits, if not
5307 * generate a CharRef.
5308 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005309 if (value <= 0xFF) {
5310 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005311 out[1] = 0;
5312 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5313 (!ctxt->disableSAX))
5314 ctxt->sax->characters(ctxt->userData, out, 1);
5315 } else {
5316 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005317 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005318 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005319 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005320 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5321 (!ctxt->disableSAX))
5322 ctxt->sax->reference(ctxt->userData, out);
5323 }
5324 } else {
5325 /*
5326 * Just encode the value in UTF-8
5327 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005328 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005329 out[i] = 0;
5330 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5331 (!ctxt->disableSAX))
5332 ctxt->sax->characters(ctxt->userData, out, i);
5333 }
5334 } else {
5335 ent = xmlParseEntityRef(ctxt);
5336 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005337 if (!ctxt->wellFormed)
5338 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005339 if ((ent->name != NULL) &&
5340 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5341 xmlNodePtr list = NULL;
5342 int ret;
5343
5344
5345 /*
5346 * The first reference to the entity trigger a parsing phase
5347 * where the ent->children is filled with the result from
5348 * the parsing.
5349 */
5350 if (ent->children == NULL) {
5351 xmlChar *value;
5352 value = ent->content;
5353
5354 /*
5355 * Check that this entity is well formed
5356 */
5357 if ((value != NULL) &&
5358 (value[1] == 0) && (value[0] == '<') &&
5359 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5360 /*
5361 * DONE: get definite answer on this !!!
5362 * Lots of entity decls are used to declare a single
5363 * char
5364 * <!ENTITY lt "<">
5365 * Which seems to be valid since
5366 * 2.4: The ampersand character (&) and the left angle
5367 * bracket (<) may appear in their literal form only
5368 * when used ... They are also legal within the literal
5369 * entity value of an internal entity declaration;i
5370 * see "4.3.2 Well-Formed Parsed Entities".
5371 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5372 * Looking at the OASIS test suite and James Clark
5373 * tests, this is broken. However the XML REC uses
5374 * it. Is the XML REC not well-formed ????
5375 * This is a hack to avoid this problem
5376 *
5377 * ANSWER: since lt gt amp .. are already defined,
5378 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005379 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005380 * is lousy but acceptable.
5381 */
5382 list = xmlNewDocText(ctxt->myDoc, value);
5383 if (list != NULL) {
5384 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5385 (ent->children == NULL)) {
5386 ent->children = list;
5387 ent->last = list;
5388 list->parent = (xmlNodePtr) ent;
5389 } else {
5390 xmlFreeNodeList(list);
5391 }
5392 } else if (list != NULL) {
5393 xmlFreeNodeList(list);
5394 }
5395 } else {
5396 /*
5397 * 4.3.2: An internal general parsed entity is well-formed
5398 * if its replacement text matches the production labeled
5399 * content.
5400 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005401
5402 void *user_data;
5403 /*
5404 * This is a bit hackish but this seems the best
5405 * way to make sure both SAX and DOM entity support
5406 * behaves okay.
5407 */
5408 if (ctxt->userData == ctxt)
5409 user_data = NULL;
5410 else
5411 user_data = ctxt->userData;
5412
Owen Taylor3473f882001-02-23 17:55:21 +00005413 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5414 ctxt->depth++;
5415 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005416 ctxt->sax, user_data, ctxt->depth,
Owen Taylor3473f882001-02-23 17:55:21 +00005417 value, &list);
5418 ctxt->depth--;
5419 } else if (ent->etype ==
5420 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5421 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005422 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005423 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005424 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005425 ctxt->depth--;
5426 } else {
5427 ret = -1;
5428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5429 ctxt->sax->error(ctxt->userData,
5430 "Internal: invalid entity type\n");
5431 }
5432 if (ret == XML_ERR_ENTITY_LOOP) {
5433 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5435 ctxt->sax->error(ctxt->userData,
5436 "Detected entity reference loop\n");
5437 ctxt->wellFormed = 0;
5438 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005439 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005440 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005441 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5442 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005443 (ent->children == NULL)) {
5444 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005445 if (ctxt->replaceEntities) {
5446 /*
5447 * Prune it directly in the generated document
5448 * except for single text nodes.
5449 */
5450 if ((list->type == XML_TEXT_NODE) &&
5451 (list->next == NULL)) {
5452 list->parent = (xmlNodePtr) ent;
5453 list = NULL;
5454 } else {
5455 while (list != NULL) {
5456 list->parent = (xmlNodePtr) ctxt->node;
5457 if (list->next == NULL)
5458 ent->last = list;
5459 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005460 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005461 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005462 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5463 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005464 }
5465 } else {
5466 while (list != NULL) {
5467 list->parent = (xmlNodePtr) ent;
5468 if (list->next == NULL)
5469 ent->last = list;
5470 list = list->next;
5471 }
Owen Taylor3473f882001-02-23 17:55:21 +00005472 }
5473 } else {
5474 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005475 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005476 }
5477 } else if (ret > 0) {
5478 ctxt->errNo = ret;
5479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5480 ctxt->sax->error(ctxt->userData,
5481 "Entity value required\n");
5482 ctxt->wellFormed = 0;
5483 ctxt->disableSAX = 1;
5484 } else if (list != NULL) {
5485 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005486 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005487 }
5488 }
5489 }
5490 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5491 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5492 /*
5493 * Create a node.
5494 */
5495 ctxt->sax->reference(ctxt->userData, ent->name);
5496 return;
5497 } else if (ctxt->replaceEntities) {
5498 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5499 /*
5500 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005501 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005502 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005503 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005504 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005505 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005506 cur = ent->children;
5507 while (cur != NULL) {
5508 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005509 if (firstChild == NULL){
5510 firstChild = new;
5511 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005512 xmlAddChild(ctxt->node, new);
5513 if (cur == ent->last)
5514 break;
5515 cur = cur->next;
5516 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005517 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5518 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005519 } else {
5520 /*
5521 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005522 * node with a possible previous text one which
5523 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005524 */
5525 if (ent->children->type == XML_TEXT_NODE)
5526 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5527 if ((ent->last != ent->children) &&
5528 (ent->last->type == XML_TEXT_NODE))
5529 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5530 xmlAddChildList(ctxt->node, ent->children);
5531 }
5532
Owen Taylor3473f882001-02-23 17:55:21 +00005533 /*
5534 * This is to avoid a nasty side effect, see
5535 * characters() in SAX.c
5536 */
5537 ctxt->nodemem = 0;
5538 ctxt->nodelen = 0;
5539 return;
5540 } else {
5541 /*
5542 * Probably running in SAX mode
5543 */
5544 xmlParserInputPtr input;
5545
5546 input = xmlNewEntityInputStream(ctxt, ent);
5547 xmlPushInput(ctxt, input);
5548 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5549 (RAW == '<') && (NXT(1) == '?') &&
5550 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5551 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5552 xmlParseTextDecl(ctxt);
5553 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5554 /*
5555 * The XML REC instructs us to stop parsing right here
5556 */
5557 ctxt->instate = XML_PARSER_EOF;
5558 return;
5559 }
5560 if (input->standalone == 1) {
5561 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5562 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5563 ctxt->sax->error(ctxt->userData,
5564 "external parsed entities cannot be standalone\n");
5565 ctxt->wellFormed = 0;
5566 ctxt->disableSAX = 1;
5567 }
5568 }
5569 return;
5570 }
5571 }
5572 } else {
5573 val = ent->content;
5574 if (val == NULL) return;
5575 /*
5576 * inline the entity.
5577 */
5578 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5579 (!ctxt->disableSAX))
5580 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5581 }
5582 }
5583}
5584
5585/**
5586 * xmlParseEntityRef:
5587 * @ctxt: an XML parser context
5588 *
5589 * parse ENTITY references declarations
5590 *
5591 * [68] EntityRef ::= '&' Name ';'
5592 *
5593 * [ WFC: Entity Declared ]
5594 * In a document without any DTD, a document with only an internal DTD
5595 * subset which contains no parameter entity references, or a document
5596 * with "standalone='yes'", the Name given in the entity reference
5597 * must match that in an entity declaration, except that well-formed
5598 * documents need not declare any of the following entities: amp, lt,
5599 * gt, apos, quot. The declaration of a parameter entity must precede
5600 * any reference to it. Similarly, the declaration of a general entity
5601 * must precede any reference to it which appears in a default value in an
5602 * attribute-list declaration. Note that if entities are declared in the
5603 * external subset or in external parameter entities, a non-validating
5604 * processor is not obligated to read and process their declarations;
5605 * for such documents, the rule that an entity must be declared is a
5606 * well-formedness constraint only if standalone='yes'.
5607 *
5608 * [ WFC: Parsed Entity ]
5609 * An entity reference must not contain the name of an unparsed entity
5610 *
5611 * Returns the xmlEntityPtr if found, or NULL otherwise.
5612 */
5613xmlEntityPtr
5614xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5615 xmlChar *name;
5616 xmlEntityPtr ent = NULL;
5617
5618 GROW;
5619
5620 if (RAW == '&') {
5621 NEXT;
5622 name = xmlParseName(ctxt);
5623 if (name == NULL) {
5624 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5626 ctxt->sax->error(ctxt->userData,
5627 "xmlParseEntityRef: no name\n");
5628 ctxt->wellFormed = 0;
5629 ctxt->disableSAX = 1;
5630 } else {
5631 if (RAW == ';') {
5632 NEXT;
5633 /*
5634 * Ask first SAX for entity resolution, otherwise try the
5635 * predefined set.
5636 */
5637 if (ctxt->sax != NULL) {
5638 if (ctxt->sax->getEntity != NULL)
5639 ent = ctxt->sax->getEntity(ctxt->userData, name);
5640 if (ent == NULL)
5641 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005642 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5643 ent = getEntity(ctxt, name);
5644 }
Owen Taylor3473f882001-02-23 17:55:21 +00005645 }
5646 /*
5647 * [ WFC: Entity Declared ]
5648 * In a document without any DTD, a document with only an
5649 * internal DTD subset which contains no parameter entity
5650 * references, or a document with "standalone='yes'", the
5651 * Name given in the entity reference must match that in an
5652 * entity declaration, except that well-formed documents
5653 * need not declare any of the following entities: amp, lt,
5654 * gt, apos, quot.
5655 * The declaration of a parameter entity must precede any
5656 * reference to it.
5657 * Similarly, the declaration of a general entity must
5658 * precede any reference to it which appears in a default
5659 * value in an attribute-list declaration. Note that if
5660 * entities are declared in the external subset or in
5661 * external parameter entities, a non-validating processor
5662 * is not obligated to read and process their declarations;
5663 * for such documents, the rule that an entity must be
5664 * declared is a well-formedness constraint only if
5665 * standalone='yes'.
5666 */
5667 if (ent == NULL) {
5668 if ((ctxt->standalone == 1) ||
5669 ((ctxt->hasExternalSubset == 0) &&
5670 (ctxt->hasPErefs == 0))) {
5671 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5673 ctxt->sax->error(ctxt->userData,
5674 "Entity '%s' not defined\n", name);
5675 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005676 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005677 ctxt->disableSAX = 1;
5678 } else {
5679 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005681 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005682 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005683 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005684 }
5685 }
5686
5687 /*
5688 * [ WFC: Parsed Entity ]
5689 * An entity reference must not contain the name of an
5690 * unparsed entity
5691 */
5692 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5693 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5695 ctxt->sax->error(ctxt->userData,
5696 "Entity reference to unparsed entity %s\n", name);
5697 ctxt->wellFormed = 0;
5698 ctxt->disableSAX = 1;
5699 }
5700
5701 /*
5702 * [ WFC: No External Entity References ]
5703 * Attribute values cannot contain direct or indirect
5704 * entity references to external entities.
5705 */
5706 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5707 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5708 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5710 ctxt->sax->error(ctxt->userData,
5711 "Attribute references external entity '%s'\n", name);
5712 ctxt->wellFormed = 0;
5713 ctxt->disableSAX = 1;
5714 }
5715 /*
5716 * [ WFC: No < in Attribute Values ]
5717 * The replacement text of any entity referred to directly or
5718 * indirectly in an attribute value (other than "&lt;") must
5719 * not contain a <.
5720 */
5721 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5722 (ent != NULL) &&
5723 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5724 (ent->content != NULL) &&
5725 (xmlStrchr(ent->content, '<'))) {
5726 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5728 ctxt->sax->error(ctxt->userData,
5729 "'<' in entity '%s' is not allowed in attributes values\n", name);
5730 ctxt->wellFormed = 0;
5731 ctxt->disableSAX = 1;
5732 }
5733
5734 /*
5735 * Internal check, no parameter entities here ...
5736 */
5737 else {
5738 switch (ent->etype) {
5739 case XML_INTERNAL_PARAMETER_ENTITY:
5740 case XML_EXTERNAL_PARAMETER_ENTITY:
5741 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5743 ctxt->sax->error(ctxt->userData,
5744 "Attempt to reference the parameter entity '%s'\n", name);
5745 ctxt->wellFormed = 0;
5746 ctxt->disableSAX = 1;
5747 break;
5748 default:
5749 break;
5750 }
5751 }
5752
5753 /*
5754 * [ WFC: No Recursion ]
5755 * A parsed entity must not contain a recursive reference
5756 * to itself, either directly or indirectly.
5757 * Done somewhere else
5758 */
5759
5760 } else {
5761 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5763 ctxt->sax->error(ctxt->userData,
5764 "xmlParseEntityRef: expecting ';'\n");
5765 ctxt->wellFormed = 0;
5766 ctxt->disableSAX = 1;
5767 }
5768 xmlFree(name);
5769 }
5770 }
5771 return(ent);
5772}
5773
5774/**
5775 * xmlParseStringEntityRef:
5776 * @ctxt: an XML parser context
5777 * @str: a pointer to an index in the string
5778 *
5779 * parse ENTITY references declarations, but this version parses it from
5780 * a string value.
5781 *
5782 * [68] EntityRef ::= '&' Name ';'
5783 *
5784 * [ WFC: Entity Declared ]
5785 * In a document without any DTD, a document with only an internal DTD
5786 * subset which contains no parameter entity references, or a document
5787 * with "standalone='yes'", the Name given in the entity reference
5788 * must match that in an entity declaration, except that well-formed
5789 * documents need not declare any of the following entities: amp, lt,
5790 * gt, apos, quot. The declaration of a parameter entity must precede
5791 * any reference to it. Similarly, the declaration of a general entity
5792 * must precede any reference to it which appears in a default value in an
5793 * attribute-list declaration. Note that if entities are declared in the
5794 * external subset or in external parameter entities, a non-validating
5795 * processor is not obligated to read and process their declarations;
5796 * for such documents, the rule that an entity must be declared is a
5797 * well-formedness constraint only if standalone='yes'.
5798 *
5799 * [ WFC: Parsed Entity ]
5800 * An entity reference must not contain the name of an unparsed entity
5801 *
5802 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5803 * is updated to the current location in the string.
5804 */
5805xmlEntityPtr
5806xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5807 xmlChar *name;
5808 const xmlChar *ptr;
5809 xmlChar cur;
5810 xmlEntityPtr ent = NULL;
5811
5812 if ((str == NULL) || (*str == NULL))
5813 return(NULL);
5814 ptr = *str;
5815 cur = *ptr;
5816 if (cur == '&') {
5817 ptr++;
5818 cur = *ptr;
5819 name = xmlParseStringName(ctxt, &ptr);
5820 if (name == NULL) {
5821 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5823 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005824 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005825 ctxt->wellFormed = 0;
5826 ctxt->disableSAX = 1;
5827 } else {
5828 if (*ptr == ';') {
5829 ptr++;
5830 /*
5831 * Ask first SAX for entity resolution, otherwise try the
5832 * predefined set.
5833 */
5834 if (ctxt->sax != NULL) {
5835 if (ctxt->sax->getEntity != NULL)
5836 ent = ctxt->sax->getEntity(ctxt->userData, name);
5837 if (ent == NULL)
5838 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005839 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5840 ent = getEntity(ctxt, name);
5841 }
Owen Taylor3473f882001-02-23 17:55:21 +00005842 }
5843 /*
5844 * [ WFC: Entity Declared ]
5845 * In a document without any DTD, a document with only an
5846 * internal DTD subset which contains no parameter entity
5847 * references, or a document with "standalone='yes'", the
5848 * Name given in the entity reference must match that in an
5849 * entity declaration, except that well-formed documents
5850 * need not declare any of the following entities: amp, lt,
5851 * gt, apos, quot.
5852 * The declaration of a parameter entity must precede any
5853 * reference to it.
5854 * Similarly, the declaration of a general entity must
5855 * precede any reference to it which appears in a default
5856 * value in an attribute-list declaration. Note that if
5857 * entities are declared in the external subset or in
5858 * external parameter entities, a non-validating processor
5859 * is not obligated to read and process their declarations;
5860 * for such documents, the rule that an entity must be
5861 * declared is a well-formedness constraint only if
5862 * standalone='yes'.
5863 */
5864 if (ent == NULL) {
5865 if ((ctxt->standalone == 1) ||
5866 ((ctxt->hasExternalSubset == 0) &&
5867 (ctxt->hasPErefs == 0))) {
5868 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5870 ctxt->sax->error(ctxt->userData,
5871 "Entity '%s' not defined\n", name);
5872 ctxt->wellFormed = 0;
5873 ctxt->disableSAX = 1;
5874 } else {
5875 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5876 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5877 ctxt->sax->warning(ctxt->userData,
5878 "Entity '%s' not defined\n", name);
5879 }
5880 }
5881
5882 /*
5883 * [ WFC: Parsed Entity ]
5884 * An entity reference must not contain the name of an
5885 * unparsed entity
5886 */
5887 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5888 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5890 ctxt->sax->error(ctxt->userData,
5891 "Entity reference to unparsed entity %s\n", name);
5892 ctxt->wellFormed = 0;
5893 ctxt->disableSAX = 1;
5894 }
5895
5896 /*
5897 * [ WFC: No External Entity References ]
5898 * Attribute values cannot contain direct or indirect
5899 * entity references to external entities.
5900 */
5901 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5902 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5903 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5904 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5905 ctxt->sax->error(ctxt->userData,
5906 "Attribute references external entity '%s'\n", name);
5907 ctxt->wellFormed = 0;
5908 ctxt->disableSAX = 1;
5909 }
5910 /*
5911 * [ WFC: No < in Attribute Values ]
5912 * The replacement text of any entity referred to directly or
5913 * indirectly in an attribute value (other than "&lt;") must
5914 * not contain a <.
5915 */
5916 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5917 (ent != NULL) &&
5918 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5919 (ent->content != NULL) &&
5920 (xmlStrchr(ent->content, '<'))) {
5921 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5923 ctxt->sax->error(ctxt->userData,
5924 "'<' in entity '%s' is not allowed in attributes values\n", name);
5925 ctxt->wellFormed = 0;
5926 ctxt->disableSAX = 1;
5927 }
5928
5929 /*
5930 * Internal check, no parameter entities here ...
5931 */
5932 else {
5933 switch (ent->etype) {
5934 case XML_INTERNAL_PARAMETER_ENTITY:
5935 case XML_EXTERNAL_PARAMETER_ENTITY:
5936 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5937 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5938 ctxt->sax->error(ctxt->userData,
5939 "Attempt to reference the parameter entity '%s'\n", name);
5940 ctxt->wellFormed = 0;
5941 ctxt->disableSAX = 1;
5942 break;
5943 default:
5944 break;
5945 }
5946 }
5947
5948 /*
5949 * [ WFC: No Recursion ]
5950 * A parsed entity must not contain a recursive reference
5951 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005952 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005953 */
5954
5955 } else {
5956 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5957 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5958 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005959 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005960 ctxt->wellFormed = 0;
5961 ctxt->disableSAX = 1;
5962 }
5963 xmlFree(name);
5964 }
5965 }
5966 *str = ptr;
5967 return(ent);
5968}
5969
5970/**
5971 * xmlParsePEReference:
5972 * @ctxt: an XML parser context
5973 *
5974 * parse PEReference declarations
5975 * The entity content is handled directly by pushing it's content as
5976 * a new input stream.
5977 *
5978 * [69] PEReference ::= '%' Name ';'
5979 *
5980 * [ WFC: No Recursion ]
5981 * A parsed entity must not contain a recursive
5982 * reference to itself, either directly or indirectly.
5983 *
5984 * [ WFC: Entity Declared ]
5985 * In a document without any DTD, a document with only an internal DTD
5986 * subset which contains no parameter entity references, or a document
5987 * with "standalone='yes'", ... ... The declaration of a parameter
5988 * entity must precede any reference to it...
5989 *
5990 * [ VC: Entity Declared ]
5991 * In a document with an external subset or external parameter entities
5992 * with "standalone='no'", ... ... The declaration of a parameter entity
5993 * must precede any reference to it...
5994 *
5995 * [ WFC: In DTD ]
5996 * Parameter-entity references may only appear in the DTD.
5997 * NOTE: misleading but this is handled.
5998 */
5999void
6000xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6001 xmlChar *name;
6002 xmlEntityPtr entity = NULL;
6003 xmlParserInputPtr input;
6004
6005 if (RAW == '%') {
6006 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006007 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006008 if (name == NULL) {
6009 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6011 ctxt->sax->error(ctxt->userData,
6012 "xmlParsePEReference: no name\n");
6013 ctxt->wellFormed = 0;
6014 ctxt->disableSAX = 1;
6015 } else {
6016 if (RAW == ';') {
6017 NEXT;
6018 if ((ctxt->sax != NULL) &&
6019 (ctxt->sax->getParameterEntity != NULL))
6020 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6021 name);
6022 if (entity == NULL) {
6023 /*
6024 * [ WFC: Entity Declared ]
6025 * In a document without any DTD, a document with only an
6026 * internal DTD subset which contains no parameter entity
6027 * references, or a document with "standalone='yes'", ...
6028 * ... The declaration of a parameter entity must precede
6029 * any reference to it...
6030 */
6031 if ((ctxt->standalone == 1) ||
6032 ((ctxt->hasExternalSubset == 0) &&
6033 (ctxt->hasPErefs == 0))) {
6034 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6035 if ((!ctxt->disableSAX) &&
6036 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6037 ctxt->sax->error(ctxt->userData,
6038 "PEReference: %%%s; not found\n", name);
6039 ctxt->wellFormed = 0;
6040 ctxt->disableSAX = 1;
6041 } else {
6042 /*
6043 * [ VC: Entity Declared ]
6044 * In a document with an external subset or external
6045 * parameter entities with "standalone='no'", ...
6046 * ... The declaration of a parameter entity must precede
6047 * any reference to it...
6048 */
6049 if ((!ctxt->disableSAX) &&
6050 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6051 ctxt->sax->warning(ctxt->userData,
6052 "PEReference: %%%s; not found\n", name);
6053 ctxt->valid = 0;
6054 }
6055 } else {
6056 /*
6057 * Internal checking in case the entity quest barfed
6058 */
6059 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6060 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6061 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6062 ctxt->sax->warning(ctxt->userData,
6063 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006064 } else if (ctxt->input->free != deallocblankswrapper) {
6065 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6066 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006067 } else {
6068 /*
6069 * TODO !!!
6070 * handle the extra spaces added before and after
6071 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6072 */
6073 input = xmlNewEntityInputStream(ctxt, entity);
6074 xmlPushInput(ctxt, input);
6075 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6076 (RAW == '<') && (NXT(1) == '?') &&
6077 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6078 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6079 xmlParseTextDecl(ctxt);
6080 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6081 /*
6082 * The XML REC instructs us to stop parsing
6083 * right here
6084 */
6085 ctxt->instate = XML_PARSER_EOF;
6086 xmlFree(name);
6087 return;
6088 }
6089 }
Owen Taylor3473f882001-02-23 17:55:21 +00006090 }
6091 }
6092 ctxt->hasPErefs = 1;
6093 } else {
6094 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6096 ctxt->sax->error(ctxt->userData,
6097 "xmlParsePEReference: expecting ';'\n");
6098 ctxt->wellFormed = 0;
6099 ctxt->disableSAX = 1;
6100 }
6101 xmlFree(name);
6102 }
6103 }
6104}
6105
6106/**
6107 * xmlParseStringPEReference:
6108 * @ctxt: an XML parser context
6109 * @str: a pointer to an index in the string
6110 *
6111 * parse PEReference declarations
6112 *
6113 * [69] PEReference ::= '%' Name ';'
6114 *
6115 * [ WFC: No Recursion ]
6116 * A parsed entity must not contain a recursive
6117 * reference to itself, either directly or indirectly.
6118 *
6119 * [ WFC: Entity Declared ]
6120 * In a document without any DTD, a document with only an internal DTD
6121 * subset which contains no parameter entity references, or a document
6122 * with "standalone='yes'", ... ... The declaration of a parameter
6123 * entity must precede any reference to it...
6124 *
6125 * [ VC: Entity Declared ]
6126 * In a document with an external subset or external parameter entities
6127 * with "standalone='no'", ... ... The declaration of a parameter entity
6128 * must precede any reference to it...
6129 *
6130 * [ WFC: In DTD ]
6131 * Parameter-entity references may only appear in the DTD.
6132 * NOTE: misleading but this is handled.
6133 *
6134 * Returns the string of the entity content.
6135 * str is updated to the current value of the index
6136 */
6137xmlEntityPtr
6138xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6139 const xmlChar *ptr;
6140 xmlChar cur;
6141 xmlChar *name;
6142 xmlEntityPtr entity = NULL;
6143
6144 if ((str == NULL) || (*str == NULL)) return(NULL);
6145 ptr = *str;
6146 cur = *ptr;
6147 if (cur == '%') {
6148 ptr++;
6149 cur = *ptr;
6150 name = xmlParseStringName(ctxt, &ptr);
6151 if (name == NULL) {
6152 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6154 ctxt->sax->error(ctxt->userData,
6155 "xmlParseStringPEReference: no name\n");
6156 ctxt->wellFormed = 0;
6157 ctxt->disableSAX = 1;
6158 } else {
6159 cur = *ptr;
6160 if (cur == ';') {
6161 ptr++;
6162 cur = *ptr;
6163 if ((ctxt->sax != NULL) &&
6164 (ctxt->sax->getParameterEntity != NULL))
6165 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6166 name);
6167 if (entity == NULL) {
6168 /*
6169 * [ WFC: Entity Declared ]
6170 * In a document without any DTD, a document with only an
6171 * internal DTD subset which contains no parameter entity
6172 * references, or a document with "standalone='yes'", ...
6173 * ... The declaration of a parameter entity must precede
6174 * any reference to it...
6175 */
6176 if ((ctxt->standalone == 1) ||
6177 ((ctxt->hasExternalSubset == 0) &&
6178 (ctxt->hasPErefs == 0))) {
6179 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6181 ctxt->sax->error(ctxt->userData,
6182 "PEReference: %%%s; not found\n", name);
6183 ctxt->wellFormed = 0;
6184 ctxt->disableSAX = 1;
6185 } else {
6186 /*
6187 * [ VC: Entity Declared ]
6188 * In a document with an external subset or external
6189 * parameter entities with "standalone='no'", ...
6190 * ... The declaration of a parameter entity must
6191 * precede any reference to it...
6192 */
6193 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6194 ctxt->sax->warning(ctxt->userData,
6195 "PEReference: %%%s; not found\n", name);
6196 ctxt->valid = 0;
6197 }
6198 } else {
6199 /*
6200 * Internal checking in case the entity quest barfed
6201 */
6202 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6203 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6204 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6205 ctxt->sax->warning(ctxt->userData,
6206 "Internal: %%%s; is not a parameter entity\n", name);
6207 }
6208 }
6209 ctxt->hasPErefs = 1;
6210 } else {
6211 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6213 ctxt->sax->error(ctxt->userData,
6214 "xmlParseStringPEReference: expecting ';'\n");
6215 ctxt->wellFormed = 0;
6216 ctxt->disableSAX = 1;
6217 }
6218 xmlFree(name);
6219 }
6220 }
6221 *str = ptr;
6222 return(entity);
6223}
6224
6225/**
6226 * xmlParseDocTypeDecl:
6227 * @ctxt: an XML parser context
6228 *
6229 * parse a DOCTYPE declaration
6230 *
6231 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6232 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6233 *
6234 * [ VC: Root Element Type ]
6235 * The Name in the document type declaration must match the element
6236 * type of the root element.
6237 */
6238
6239void
6240xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6241 xmlChar *name = NULL;
6242 xmlChar *ExternalID = NULL;
6243 xmlChar *URI = NULL;
6244
6245 /*
6246 * We know that '<!DOCTYPE' has been detected.
6247 */
6248 SKIP(9);
6249
6250 SKIP_BLANKS;
6251
6252 /*
6253 * Parse the DOCTYPE name.
6254 */
6255 name = xmlParseName(ctxt);
6256 if (name == NULL) {
6257 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6259 ctxt->sax->error(ctxt->userData,
6260 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6261 ctxt->wellFormed = 0;
6262 ctxt->disableSAX = 1;
6263 }
6264 ctxt->intSubName = name;
6265
6266 SKIP_BLANKS;
6267
6268 /*
6269 * Check for SystemID and ExternalID
6270 */
6271 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6272
6273 if ((URI != NULL) || (ExternalID != NULL)) {
6274 ctxt->hasExternalSubset = 1;
6275 }
6276 ctxt->extSubURI = URI;
6277 ctxt->extSubSystem = ExternalID;
6278
6279 SKIP_BLANKS;
6280
6281 /*
6282 * Create and update the internal subset.
6283 */
6284 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6285 (!ctxt->disableSAX))
6286 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6287
6288 /*
6289 * Is there any internal subset declarations ?
6290 * they are handled separately in xmlParseInternalSubset()
6291 */
6292 if (RAW == '[')
6293 return;
6294
6295 /*
6296 * We should be at the end of the DOCTYPE declaration.
6297 */
6298 if (RAW != '>') {
6299 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006301 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006302 ctxt->wellFormed = 0;
6303 ctxt->disableSAX = 1;
6304 }
6305 NEXT;
6306}
6307
6308/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006309 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006310 * @ctxt: an XML parser context
6311 *
6312 * parse the internal subset declaration
6313 *
6314 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6315 */
6316
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006317static void
Owen Taylor3473f882001-02-23 17:55:21 +00006318xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6319 /*
6320 * Is there any DTD definition ?
6321 */
6322 if (RAW == '[') {
6323 ctxt->instate = XML_PARSER_DTD;
6324 NEXT;
6325 /*
6326 * Parse the succession of Markup declarations and
6327 * PEReferences.
6328 * Subsequence (markupdecl | PEReference | S)*
6329 */
6330 while (RAW != ']') {
6331 const xmlChar *check = CUR_PTR;
6332 int cons = ctxt->input->consumed;
6333
6334 SKIP_BLANKS;
6335 xmlParseMarkupDecl(ctxt);
6336 xmlParsePEReference(ctxt);
6337
6338 /*
6339 * Pop-up of finished entities.
6340 */
6341 while ((RAW == 0) && (ctxt->inputNr > 1))
6342 xmlPopInput(ctxt);
6343
6344 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6345 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6347 ctxt->sax->error(ctxt->userData,
6348 "xmlParseInternalSubset: error detected in Markup declaration\n");
6349 ctxt->wellFormed = 0;
6350 ctxt->disableSAX = 1;
6351 break;
6352 }
6353 }
6354 if (RAW == ']') {
6355 NEXT;
6356 SKIP_BLANKS;
6357 }
6358 }
6359
6360 /*
6361 * We should be at the end of the DOCTYPE declaration.
6362 */
6363 if (RAW != '>') {
6364 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006366 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006367 ctxt->wellFormed = 0;
6368 ctxt->disableSAX = 1;
6369 }
6370 NEXT;
6371}
6372
6373/**
6374 * xmlParseAttribute:
6375 * @ctxt: an XML parser context
6376 * @value: a xmlChar ** used to store the value of the attribute
6377 *
6378 * parse an attribute
6379 *
6380 * [41] Attribute ::= Name Eq AttValue
6381 *
6382 * [ WFC: No External Entity References ]
6383 * Attribute values cannot contain direct or indirect entity references
6384 * to external entities.
6385 *
6386 * [ WFC: No < in Attribute Values ]
6387 * The replacement text of any entity referred to directly or indirectly in
6388 * an attribute value (other than "&lt;") must not contain a <.
6389 *
6390 * [ VC: Attribute Value Type ]
6391 * The attribute must have been declared; the value must be of the type
6392 * declared for it.
6393 *
6394 * [25] Eq ::= S? '=' S?
6395 *
6396 * With namespace:
6397 *
6398 * [NS 11] Attribute ::= QName Eq AttValue
6399 *
6400 * Also the case QName == xmlns:??? is handled independently as a namespace
6401 * definition.
6402 *
6403 * Returns the attribute name, and the value in *value.
6404 */
6405
6406xmlChar *
6407xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6408 xmlChar *name, *val;
6409
6410 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006411 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006412 name = xmlParseName(ctxt);
6413 if (name == NULL) {
6414 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6416 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6417 ctxt->wellFormed = 0;
6418 ctxt->disableSAX = 1;
6419 return(NULL);
6420 }
6421
6422 /*
6423 * read the value
6424 */
6425 SKIP_BLANKS;
6426 if (RAW == '=') {
6427 NEXT;
6428 SKIP_BLANKS;
6429 val = xmlParseAttValue(ctxt);
6430 ctxt->instate = XML_PARSER_CONTENT;
6431 } else {
6432 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6433 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6434 ctxt->sax->error(ctxt->userData,
6435 "Specification mandate value for attribute %s\n", name);
6436 ctxt->wellFormed = 0;
6437 ctxt->disableSAX = 1;
6438 xmlFree(name);
6439 return(NULL);
6440 }
6441
6442 /*
6443 * Check that xml:lang conforms to the specification
6444 * No more registered as an error, just generate a warning now
6445 * since this was deprecated in XML second edition
6446 */
6447 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6448 if (!xmlCheckLanguageID(val)) {
6449 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6450 ctxt->sax->warning(ctxt->userData,
6451 "Malformed value for xml:lang : %s\n", val);
6452 }
6453 }
6454
6455 /*
6456 * Check that xml:space conforms to the specification
6457 */
6458 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6459 if (xmlStrEqual(val, BAD_CAST "default"))
6460 *(ctxt->space) = 0;
6461 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6462 *(ctxt->space) = 1;
6463 else {
6464 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6466 ctxt->sax->error(ctxt->userData,
6467"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6468 val);
6469 ctxt->wellFormed = 0;
6470 ctxt->disableSAX = 1;
6471 }
6472 }
6473
6474 *value = val;
6475 return(name);
6476}
6477
6478/**
6479 * xmlParseStartTag:
6480 * @ctxt: an XML parser context
6481 *
6482 * parse a start of tag either for rule element or
6483 * EmptyElement. In both case we don't parse the tag closing chars.
6484 *
6485 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6486 *
6487 * [ WFC: Unique Att Spec ]
6488 * No attribute name may appear more than once in the same start-tag or
6489 * empty-element tag.
6490 *
6491 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6492 *
6493 * [ WFC: Unique Att Spec ]
6494 * No attribute name may appear more than once in the same start-tag or
6495 * empty-element tag.
6496 *
6497 * With namespace:
6498 *
6499 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6500 *
6501 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6502 *
6503 * Returns the element name parsed
6504 */
6505
6506xmlChar *
6507xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6508 xmlChar *name;
6509 xmlChar *attname;
6510 xmlChar *attvalue;
6511 const xmlChar **atts = NULL;
6512 int nbatts = 0;
6513 int maxatts = 0;
6514 int i;
6515
6516 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006517 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006518
6519 name = xmlParseName(ctxt);
6520 if (name == NULL) {
6521 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6523 ctxt->sax->error(ctxt->userData,
6524 "xmlParseStartTag: invalid element name\n");
6525 ctxt->wellFormed = 0;
6526 ctxt->disableSAX = 1;
6527 return(NULL);
6528 }
6529
6530 /*
6531 * Now parse the attributes, it ends up with the ending
6532 *
6533 * (S Attribute)* S?
6534 */
6535 SKIP_BLANKS;
6536 GROW;
6537
Daniel Veillard21a0f912001-02-25 19:54:14 +00006538 while ((RAW != '>') &&
6539 ((RAW != '/') || (NXT(1) != '>')) &&
6540 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006541 const xmlChar *q = CUR_PTR;
6542 int cons = ctxt->input->consumed;
6543
6544 attname = xmlParseAttribute(ctxt, &attvalue);
6545 if ((attname != NULL) && (attvalue != NULL)) {
6546 /*
6547 * [ WFC: Unique Att Spec ]
6548 * No attribute name may appear more than once in the same
6549 * start-tag or empty-element tag.
6550 */
6551 for (i = 0; i < nbatts;i += 2) {
6552 if (xmlStrEqual(atts[i], attname)) {
6553 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6555 ctxt->sax->error(ctxt->userData,
6556 "Attribute %s redefined\n",
6557 attname);
6558 ctxt->wellFormed = 0;
6559 ctxt->disableSAX = 1;
6560 xmlFree(attname);
6561 xmlFree(attvalue);
6562 goto failed;
6563 }
6564 }
6565
6566 /*
6567 * Add the pair to atts
6568 */
6569 if (atts == NULL) {
6570 maxatts = 10;
6571 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6572 if (atts == NULL) {
6573 xmlGenericError(xmlGenericErrorContext,
6574 "malloc of %ld byte failed\n",
6575 maxatts * (long)sizeof(xmlChar *));
6576 return(NULL);
6577 }
6578 } else if (nbatts + 4 > maxatts) {
6579 maxatts *= 2;
6580 atts = (const xmlChar **) xmlRealloc((void *) atts,
6581 maxatts * sizeof(xmlChar *));
6582 if (atts == NULL) {
6583 xmlGenericError(xmlGenericErrorContext,
6584 "realloc of %ld byte failed\n",
6585 maxatts * (long)sizeof(xmlChar *));
6586 return(NULL);
6587 }
6588 }
6589 atts[nbatts++] = attname;
6590 atts[nbatts++] = attvalue;
6591 atts[nbatts] = NULL;
6592 atts[nbatts + 1] = NULL;
6593 } else {
6594 if (attname != NULL)
6595 xmlFree(attname);
6596 if (attvalue != NULL)
6597 xmlFree(attvalue);
6598 }
6599
6600failed:
6601
6602 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6603 break;
6604 if (!IS_BLANK(RAW)) {
6605 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6607 ctxt->sax->error(ctxt->userData,
6608 "attributes construct error\n");
6609 ctxt->wellFormed = 0;
6610 ctxt->disableSAX = 1;
6611 }
6612 SKIP_BLANKS;
6613 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6614 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6616 ctxt->sax->error(ctxt->userData,
6617 "xmlParseStartTag: problem parsing attributes\n");
6618 ctxt->wellFormed = 0;
6619 ctxt->disableSAX = 1;
6620 break;
6621 }
6622 GROW;
6623 }
6624
6625 /*
6626 * SAX: Start of Element !
6627 */
6628 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6629 (!ctxt->disableSAX))
6630 ctxt->sax->startElement(ctxt->userData, name, atts);
6631
6632 if (atts != NULL) {
6633 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6634 xmlFree((void *) atts);
6635 }
6636 return(name);
6637}
6638
6639/**
6640 * xmlParseEndTag:
6641 * @ctxt: an XML parser context
6642 *
6643 * parse an end of tag
6644 *
6645 * [42] ETag ::= '</' Name S? '>'
6646 *
6647 * With namespace
6648 *
6649 * [NS 9] ETag ::= '</' QName S? '>'
6650 */
6651
6652void
6653xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6654 xmlChar *name;
6655 xmlChar *oldname;
6656
6657 GROW;
6658 if ((RAW != '<') || (NXT(1) != '/')) {
6659 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6661 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6662 ctxt->wellFormed = 0;
6663 ctxt->disableSAX = 1;
6664 return;
6665 }
6666 SKIP(2);
6667
Daniel Veillard46de64e2002-05-29 08:21:33 +00006668 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006669
6670 /*
6671 * We should definitely be at the ending "S? '>'" part
6672 */
6673 GROW;
6674 SKIP_BLANKS;
6675 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6676 ctxt->errNo = XML_ERR_GT_REQUIRED;
6677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6678 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6679 ctxt->wellFormed = 0;
6680 ctxt->disableSAX = 1;
6681 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006682 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006683
6684 /*
6685 * [ WFC: Element Type Match ]
6686 * The Name in an element's end-tag must match the element type in the
6687 * start-tag.
6688 *
6689 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006690 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006691 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006693 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006694 ctxt->sax->error(ctxt->userData,
6695 "Opening and ending tag mismatch: %s and %s\n",
6696 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006697 xmlFree(name);
6698 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006699 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006700 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006701 }
6702
6703 }
6704 ctxt->wellFormed = 0;
6705 ctxt->disableSAX = 1;
6706 }
6707
6708 /*
6709 * SAX: End of Tag
6710 */
6711 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6712 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006713 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006714
Owen Taylor3473f882001-02-23 17:55:21 +00006715 oldname = namePop(ctxt);
6716 spacePop(ctxt);
6717 if (oldname != NULL) {
6718#ifdef DEBUG_STACK
6719 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6720#endif
6721 xmlFree(oldname);
6722 }
6723 return;
6724}
6725
6726/**
6727 * xmlParseCDSect:
6728 * @ctxt: an XML parser context
6729 *
6730 * Parse escaped pure raw content.
6731 *
6732 * [18] CDSect ::= CDStart CData CDEnd
6733 *
6734 * [19] CDStart ::= '<![CDATA['
6735 *
6736 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6737 *
6738 * [21] CDEnd ::= ']]>'
6739 */
6740void
6741xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6742 xmlChar *buf = NULL;
6743 int len = 0;
6744 int size = XML_PARSER_BUFFER_SIZE;
6745 int r, rl;
6746 int s, sl;
6747 int cur, l;
6748 int count = 0;
6749
6750 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6751 (NXT(2) == '[') && (NXT(3) == 'C') &&
6752 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6753 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6754 (NXT(8) == '[')) {
6755 SKIP(9);
6756 } else
6757 return;
6758
6759 ctxt->instate = XML_PARSER_CDATA_SECTION;
6760 r = CUR_CHAR(rl);
6761 if (!IS_CHAR(r)) {
6762 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6764 ctxt->sax->error(ctxt->userData,
6765 "CData section not finished\n");
6766 ctxt->wellFormed = 0;
6767 ctxt->disableSAX = 1;
6768 ctxt->instate = XML_PARSER_CONTENT;
6769 return;
6770 }
6771 NEXTL(rl);
6772 s = CUR_CHAR(sl);
6773 if (!IS_CHAR(s)) {
6774 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6776 ctxt->sax->error(ctxt->userData,
6777 "CData section not finished\n");
6778 ctxt->wellFormed = 0;
6779 ctxt->disableSAX = 1;
6780 ctxt->instate = XML_PARSER_CONTENT;
6781 return;
6782 }
6783 NEXTL(sl);
6784 cur = CUR_CHAR(l);
6785 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6786 if (buf == NULL) {
6787 xmlGenericError(xmlGenericErrorContext,
6788 "malloc of %d byte failed\n", size);
6789 return;
6790 }
6791 while (IS_CHAR(cur) &&
6792 ((r != ']') || (s != ']') || (cur != '>'))) {
6793 if (len + 5 >= size) {
6794 size *= 2;
6795 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6796 if (buf == NULL) {
6797 xmlGenericError(xmlGenericErrorContext,
6798 "realloc of %d byte failed\n", size);
6799 return;
6800 }
6801 }
6802 COPY_BUF(rl,buf,len,r);
6803 r = s;
6804 rl = sl;
6805 s = cur;
6806 sl = l;
6807 count++;
6808 if (count > 50) {
6809 GROW;
6810 count = 0;
6811 }
6812 NEXTL(l);
6813 cur = CUR_CHAR(l);
6814 }
6815 buf[len] = 0;
6816 ctxt->instate = XML_PARSER_CONTENT;
6817 if (cur != '>') {
6818 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6820 ctxt->sax->error(ctxt->userData,
6821 "CData section not finished\n%.50s\n", buf);
6822 ctxt->wellFormed = 0;
6823 ctxt->disableSAX = 1;
6824 xmlFree(buf);
6825 return;
6826 }
6827 NEXTL(l);
6828
6829 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006830 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006831 */
6832 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6833 if (ctxt->sax->cdataBlock != NULL)
6834 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006835 else if (ctxt->sax->characters != NULL)
6836 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006837 }
6838 xmlFree(buf);
6839}
6840
6841/**
6842 * xmlParseContent:
6843 * @ctxt: an XML parser context
6844 *
6845 * Parse a content:
6846 *
6847 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6848 */
6849
6850void
6851xmlParseContent(xmlParserCtxtPtr ctxt) {
6852 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006853 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006854 ((RAW != '<') || (NXT(1) != '/'))) {
6855 const xmlChar *test = CUR_PTR;
6856 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006857 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006858
6859 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006860 * First case : a Processing Instruction.
6861 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006862 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006863 xmlParsePI(ctxt);
6864 }
6865
6866 /*
6867 * Second case : a CDSection
6868 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006869 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006870 (NXT(2) == '[') && (NXT(3) == 'C') &&
6871 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6872 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6873 (NXT(8) == '[')) {
6874 xmlParseCDSect(ctxt);
6875 }
6876
6877 /*
6878 * Third case : a comment
6879 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006880 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006881 (NXT(2) == '-') && (NXT(3) == '-')) {
6882 xmlParseComment(ctxt);
6883 ctxt->instate = XML_PARSER_CONTENT;
6884 }
6885
6886 /*
6887 * Fourth case : a sub-element.
6888 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006889 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006890 xmlParseElement(ctxt);
6891 }
6892
6893 /*
6894 * Fifth case : a reference. If if has not been resolved,
6895 * parsing returns it's Name, create the node
6896 */
6897
Daniel Veillard21a0f912001-02-25 19:54:14 +00006898 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006899 xmlParseReference(ctxt);
6900 }
6901
6902 /*
6903 * Last case, text. Note that References are handled directly.
6904 */
6905 else {
6906 xmlParseCharData(ctxt, 0);
6907 }
6908
6909 GROW;
6910 /*
6911 * Pop-up of finished entities.
6912 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00006913 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00006914 xmlPopInput(ctxt);
6915 SHRINK;
6916
Daniel Veillardfdc91562002-07-01 21:52:03 +00006917 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006918 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6920 ctxt->sax->error(ctxt->userData,
6921 "detected an error in element content\n");
6922 ctxt->wellFormed = 0;
6923 ctxt->disableSAX = 1;
6924 ctxt->instate = XML_PARSER_EOF;
6925 break;
6926 }
6927 }
6928}
6929
6930/**
6931 * xmlParseElement:
6932 * @ctxt: an XML parser context
6933 *
6934 * parse an XML element, this is highly recursive
6935 *
6936 * [39] element ::= EmptyElemTag | STag content ETag
6937 *
6938 * [ WFC: Element Type Match ]
6939 * The Name in an element's end-tag must match the element type in the
6940 * start-tag.
6941 *
6942 * [ VC: Element Valid ]
6943 * An element is valid if there is a declaration matching elementdecl
6944 * where the Name matches the element type and one of the following holds:
6945 * - The declaration matches EMPTY and the element has no content.
6946 * - The declaration matches children and the sequence of child elements
6947 * belongs to the language generated by the regular expression in the
6948 * content model, with optional white space (characters matching the
6949 * nonterminal S) between each pair of child elements.
6950 * - The declaration matches Mixed and the content consists of character
6951 * data and child elements whose types match names in the content model.
6952 * - The declaration matches ANY, and the types of any child elements have
6953 * been declared.
6954 */
6955
6956void
6957xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00006958 xmlChar *name;
6959 xmlChar *oldname;
6960 xmlParserNodeInfo node_info;
6961 xmlNodePtr ret;
6962
6963 /* Capture start position */
6964 if (ctxt->record_info) {
6965 node_info.begin_pos = ctxt->input->consumed +
6966 (CUR_PTR - ctxt->input->base);
6967 node_info.begin_line = ctxt->input->line;
6968 }
6969
6970 if (ctxt->spaceNr == 0)
6971 spacePush(ctxt, -1);
6972 else
6973 spacePush(ctxt, *ctxt->space);
6974
6975 name = xmlParseStartTag(ctxt);
6976 if (name == NULL) {
6977 spacePop(ctxt);
6978 return;
6979 }
6980 namePush(ctxt, name);
6981 ret = ctxt->node;
6982
6983 /*
6984 * [ VC: Root Element Type ]
6985 * The Name in the document type declaration must match the element
6986 * type of the root element.
6987 */
6988 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6989 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6990 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6991
6992 /*
6993 * Check for an Empty Element.
6994 */
6995 if ((RAW == '/') && (NXT(1) == '>')) {
6996 SKIP(2);
6997 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6998 (!ctxt->disableSAX))
6999 ctxt->sax->endElement(ctxt->userData, name);
7000 oldname = namePop(ctxt);
7001 spacePop(ctxt);
7002 if (oldname != NULL) {
7003#ifdef DEBUG_STACK
7004 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7005#endif
7006 xmlFree(oldname);
7007 }
7008 if ( ret != NULL && ctxt->record_info ) {
7009 node_info.end_pos = ctxt->input->consumed +
7010 (CUR_PTR - ctxt->input->base);
7011 node_info.end_line = ctxt->input->line;
7012 node_info.node = ret;
7013 xmlParserAddNodeInfo(ctxt, &node_info);
7014 }
7015 return;
7016 }
7017 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007018 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007019 } else {
7020 ctxt->errNo = XML_ERR_GT_REQUIRED;
7021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7022 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007023 "Couldn't find end of Start Tag %s\n",
7024 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007025 ctxt->wellFormed = 0;
7026 ctxt->disableSAX = 1;
7027
7028 /*
7029 * end of parsing of this node.
7030 */
7031 nodePop(ctxt);
7032 oldname = namePop(ctxt);
7033 spacePop(ctxt);
7034 if (oldname != NULL) {
7035#ifdef DEBUG_STACK
7036 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7037#endif
7038 xmlFree(oldname);
7039 }
7040
7041 /*
7042 * Capture end position and add node
7043 */
7044 if ( ret != NULL && ctxt->record_info ) {
7045 node_info.end_pos = ctxt->input->consumed +
7046 (CUR_PTR - ctxt->input->base);
7047 node_info.end_line = ctxt->input->line;
7048 node_info.node = ret;
7049 xmlParserAddNodeInfo(ctxt, &node_info);
7050 }
7051 return;
7052 }
7053
7054 /*
7055 * Parse the content of the element:
7056 */
7057 xmlParseContent(ctxt);
7058 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007059 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7061 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007062 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007063 ctxt->wellFormed = 0;
7064 ctxt->disableSAX = 1;
7065
7066 /*
7067 * end of parsing of this node.
7068 */
7069 nodePop(ctxt);
7070 oldname = namePop(ctxt);
7071 spacePop(ctxt);
7072 if (oldname != NULL) {
7073#ifdef DEBUG_STACK
7074 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7075#endif
7076 xmlFree(oldname);
7077 }
7078 return;
7079 }
7080
7081 /*
7082 * parse the end of tag: '</' should be here.
7083 */
7084 xmlParseEndTag(ctxt);
7085
7086 /*
7087 * Capture end position and add node
7088 */
7089 if ( ret != NULL && ctxt->record_info ) {
7090 node_info.end_pos = ctxt->input->consumed +
7091 (CUR_PTR - ctxt->input->base);
7092 node_info.end_line = ctxt->input->line;
7093 node_info.node = ret;
7094 xmlParserAddNodeInfo(ctxt, &node_info);
7095 }
7096}
7097
7098/**
7099 * xmlParseVersionNum:
7100 * @ctxt: an XML parser context
7101 *
7102 * parse the XML version value.
7103 *
7104 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7105 *
7106 * Returns the string giving the XML version number, or NULL
7107 */
7108xmlChar *
7109xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7110 xmlChar *buf = NULL;
7111 int len = 0;
7112 int size = 10;
7113 xmlChar cur;
7114
7115 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7116 if (buf == NULL) {
7117 xmlGenericError(xmlGenericErrorContext,
7118 "malloc of %d byte failed\n", size);
7119 return(NULL);
7120 }
7121 cur = CUR;
7122 while (((cur >= 'a') && (cur <= 'z')) ||
7123 ((cur >= 'A') && (cur <= 'Z')) ||
7124 ((cur >= '0') && (cur <= '9')) ||
7125 (cur == '_') || (cur == '.') ||
7126 (cur == ':') || (cur == '-')) {
7127 if (len + 1 >= size) {
7128 size *= 2;
7129 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7130 if (buf == NULL) {
7131 xmlGenericError(xmlGenericErrorContext,
7132 "realloc of %d byte failed\n", size);
7133 return(NULL);
7134 }
7135 }
7136 buf[len++] = cur;
7137 NEXT;
7138 cur=CUR;
7139 }
7140 buf[len] = 0;
7141 return(buf);
7142}
7143
7144/**
7145 * xmlParseVersionInfo:
7146 * @ctxt: an XML parser context
7147 *
7148 * parse the XML version.
7149 *
7150 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7151 *
7152 * [25] Eq ::= S? '=' S?
7153 *
7154 * Returns the version string, e.g. "1.0"
7155 */
7156
7157xmlChar *
7158xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7159 xmlChar *version = NULL;
7160 const xmlChar *q;
7161
7162 if ((RAW == 'v') && (NXT(1) == 'e') &&
7163 (NXT(2) == 'r') && (NXT(3) == 's') &&
7164 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7165 (NXT(6) == 'n')) {
7166 SKIP(7);
7167 SKIP_BLANKS;
7168 if (RAW != '=') {
7169 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7171 ctxt->sax->error(ctxt->userData,
7172 "xmlParseVersionInfo : expected '='\n");
7173 ctxt->wellFormed = 0;
7174 ctxt->disableSAX = 1;
7175 return(NULL);
7176 }
7177 NEXT;
7178 SKIP_BLANKS;
7179 if (RAW == '"') {
7180 NEXT;
7181 q = CUR_PTR;
7182 version = xmlParseVersionNum(ctxt);
7183 if (RAW != '"') {
7184 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7186 ctxt->sax->error(ctxt->userData,
7187 "String not closed\n%.50s\n", q);
7188 ctxt->wellFormed = 0;
7189 ctxt->disableSAX = 1;
7190 } else
7191 NEXT;
7192 } else if (RAW == '\''){
7193 NEXT;
7194 q = CUR_PTR;
7195 version = xmlParseVersionNum(ctxt);
7196 if (RAW != '\'') {
7197 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7199 ctxt->sax->error(ctxt->userData,
7200 "String not closed\n%.50s\n", q);
7201 ctxt->wellFormed = 0;
7202 ctxt->disableSAX = 1;
7203 } else
7204 NEXT;
7205 } else {
7206 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7208 ctxt->sax->error(ctxt->userData,
7209 "xmlParseVersionInfo : expected ' or \"\n");
7210 ctxt->wellFormed = 0;
7211 ctxt->disableSAX = 1;
7212 }
7213 }
7214 return(version);
7215}
7216
7217/**
7218 * xmlParseEncName:
7219 * @ctxt: an XML parser context
7220 *
7221 * parse the XML encoding name
7222 *
7223 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7224 *
7225 * Returns the encoding name value or NULL
7226 */
7227xmlChar *
7228xmlParseEncName(xmlParserCtxtPtr ctxt) {
7229 xmlChar *buf = NULL;
7230 int len = 0;
7231 int size = 10;
7232 xmlChar cur;
7233
7234 cur = CUR;
7235 if (((cur >= 'a') && (cur <= 'z')) ||
7236 ((cur >= 'A') && (cur <= 'Z'))) {
7237 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7238 if (buf == NULL) {
7239 xmlGenericError(xmlGenericErrorContext,
7240 "malloc of %d byte failed\n", size);
7241 return(NULL);
7242 }
7243
7244 buf[len++] = cur;
7245 NEXT;
7246 cur = CUR;
7247 while (((cur >= 'a') && (cur <= 'z')) ||
7248 ((cur >= 'A') && (cur <= 'Z')) ||
7249 ((cur >= '0') && (cur <= '9')) ||
7250 (cur == '.') || (cur == '_') ||
7251 (cur == '-')) {
7252 if (len + 1 >= size) {
7253 size *= 2;
7254 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7255 if (buf == NULL) {
7256 xmlGenericError(xmlGenericErrorContext,
7257 "realloc of %d byte failed\n", size);
7258 return(NULL);
7259 }
7260 }
7261 buf[len++] = cur;
7262 NEXT;
7263 cur = CUR;
7264 if (cur == 0) {
7265 SHRINK;
7266 GROW;
7267 cur = CUR;
7268 }
7269 }
7270 buf[len] = 0;
7271 } else {
7272 ctxt->errNo = XML_ERR_ENCODING_NAME;
7273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7274 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7275 ctxt->wellFormed = 0;
7276 ctxt->disableSAX = 1;
7277 }
7278 return(buf);
7279}
7280
7281/**
7282 * xmlParseEncodingDecl:
7283 * @ctxt: an XML parser context
7284 *
7285 * parse the XML encoding declaration
7286 *
7287 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7288 *
7289 * this setups the conversion filters.
7290 *
7291 * Returns the encoding value or NULL
7292 */
7293
7294xmlChar *
7295xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7296 xmlChar *encoding = NULL;
7297 const xmlChar *q;
7298
7299 SKIP_BLANKS;
7300 if ((RAW == 'e') && (NXT(1) == 'n') &&
7301 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7302 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7303 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7304 SKIP(8);
7305 SKIP_BLANKS;
7306 if (RAW != '=') {
7307 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7309 ctxt->sax->error(ctxt->userData,
7310 "xmlParseEncodingDecl : expected '='\n");
7311 ctxt->wellFormed = 0;
7312 ctxt->disableSAX = 1;
7313 return(NULL);
7314 }
7315 NEXT;
7316 SKIP_BLANKS;
7317 if (RAW == '"') {
7318 NEXT;
7319 q = CUR_PTR;
7320 encoding = xmlParseEncName(ctxt);
7321 if (RAW != '"') {
7322 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7324 ctxt->sax->error(ctxt->userData,
7325 "String not closed\n%.50s\n", q);
7326 ctxt->wellFormed = 0;
7327 ctxt->disableSAX = 1;
7328 } else
7329 NEXT;
7330 } else if (RAW == '\''){
7331 NEXT;
7332 q = CUR_PTR;
7333 encoding = xmlParseEncName(ctxt);
7334 if (RAW != '\'') {
7335 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7337 ctxt->sax->error(ctxt->userData,
7338 "String not closed\n%.50s\n", q);
7339 ctxt->wellFormed = 0;
7340 ctxt->disableSAX = 1;
7341 } else
7342 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007343 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007344 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7345 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7346 ctxt->sax->error(ctxt->userData,
7347 "xmlParseEncodingDecl : expected ' or \"\n");
7348 ctxt->wellFormed = 0;
7349 ctxt->disableSAX = 1;
7350 }
7351 if (encoding != NULL) {
7352 xmlCharEncoding enc;
7353 xmlCharEncodingHandlerPtr handler;
7354
7355 if (ctxt->input->encoding != NULL)
7356 xmlFree((xmlChar *) ctxt->input->encoding);
7357 ctxt->input->encoding = encoding;
7358
7359 enc = xmlParseCharEncoding((const char *) encoding);
7360 /*
7361 * registered set of known encodings
7362 */
7363 if (enc != XML_CHAR_ENCODING_ERROR) {
7364 xmlSwitchEncoding(ctxt, enc);
7365 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007366 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007367 xmlFree(encoding);
7368 return(NULL);
7369 }
7370 } else {
7371 /*
7372 * fallback for unknown encodings
7373 */
7374 handler = xmlFindCharEncodingHandler((const char *) encoding);
7375 if (handler != NULL) {
7376 xmlSwitchToEncoding(ctxt, handler);
7377 } else {
7378 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7379 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7380 ctxt->sax->error(ctxt->userData,
7381 "Unsupported encoding %s\n", encoding);
7382 return(NULL);
7383 }
7384 }
7385 }
7386 }
7387 return(encoding);
7388}
7389
7390/**
7391 * xmlParseSDDecl:
7392 * @ctxt: an XML parser context
7393 *
7394 * parse the XML standalone declaration
7395 *
7396 * [32] SDDecl ::= S 'standalone' Eq
7397 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7398 *
7399 * [ VC: Standalone Document Declaration ]
7400 * TODO The standalone document declaration must have the value "no"
7401 * if any external markup declarations contain declarations of:
7402 * - attributes with default values, if elements to which these
7403 * attributes apply appear in the document without specifications
7404 * of values for these attributes, or
7405 * - entities (other than amp, lt, gt, apos, quot), if references
7406 * to those entities appear in the document, or
7407 * - attributes with values subject to normalization, where the
7408 * attribute appears in the document with a value which will change
7409 * as a result of normalization, or
7410 * - element types with element content, if white space occurs directly
7411 * within any instance of those types.
7412 *
7413 * Returns 1 if standalone, 0 otherwise
7414 */
7415
7416int
7417xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7418 int standalone = -1;
7419
7420 SKIP_BLANKS;
7421 if ((RAW == 's') && (NXT(1) == 't') &&
7422 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7423 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7424 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7425 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7426 SKIP(10);
7427 SKIP_BLANKS;
7428 if (RAW != '=') {
7429 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7431 ctxt->sax->error(ctxt->userData,
7432 "XML standalone declaration : expected '='\n");
7433 ctxt->wellFormed = 0;
7434 ctxt->disableSAX = 1;
7435 return(standalone);
7436 }
7437 NEXT;
7438 SKIP_BLANKS;
7439 if (RAW == '\''){
7440 NEXT;
7441 if ((RAW == 'n') && (NXT(1) == 'o')) {
7442 standalone = 0;
7443 SKIP(2);
7444 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7445 (NXT(2) == 's')) {
7446 standalone = 1;
7447 SKIP(3);
7448 } else {
7449 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7450 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7451 ctxt->sax->error(ctxt->userData,
7452 "standalone accepts only 'yes' or 'no'\n");
7453 ctxt->wellFormed = 0;
7454 ctxt->disableSAX = 1;
7455 }
7456 if (RAW != '\'') {
7457 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7459 ctxt->sax->error(ctxt->userData, "String not closed\n");
7460 ctxt->wellFormed = 0;
7461 ctxt->disableSAX = 1;
7462 } else
7463 NEXT;
7464 } else if (RAW == '"'){
7465 NEXT;
7466 if ((RAW == 'n') && (NXT(1) == 'o')) {
7467 standalone = 0;
7468 SKIP(2);
7469 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7470 (NXT(2) == 's')) {
7471 standalone = 1;
7472 SKIP(3);
7473 } else {
7474 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7476 ctxt->sax->error(ctxt->userData,
7477 "standalone accepts only 'yes' or 'no'\n");
7478 ctxt->wellFormed = 0;
7479 ctxt->disableSAX = 1;
7480 }
7481 if (RAW != '"') {
7482 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7484 ctxt->sax->error(ctxt->userData, "String not closed\n");
7485 ctxt->wellFormed = 0;
7486 ctxt->disableSAX = 1;
7487 } else
7488 NEXT;
7489 } else {
7490 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7492 ctxt->sax->error(ctxt->userData,
7493 "Standalone value not found\n");
7494 ctxt->wellFormed = 0;
7495 ctxt->disableSAX = 1;
7496 }
7497 }
7498 return(standalone);
7499}
7500
7501/**
7502 * xmlParseXMLDecl:
7503 * @ctxt: an XML parser context
7504 *
7505 * parse an XML declaration header
7506 *
7507 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7508 */
7509
7510void
7511xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7512 xmlChar *version;
7513
7514 /*
7515 * We know that '<?xml' is here.
7516 */
7517 SKIP(5);
7518
7519 if (!IS_BLANK(RAW)) {
7520 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7522 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7523 ctxt->wellFormed = 0;
7524 ctxt->disableSAX = 1;
7525 }
7526 SKIP_BLANKS;
7527
7528 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007529 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007530 */
7531 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007532 if (version == NULL) {
7533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7534 ctxt->sax->error(ctxt->userData,
7535 "Malformed declaration expecting version\n");
7536 ctxt->wellFormed = 0;
7537 ctxt->disableSAX = 1;
7538 } else {
7539 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7540 /*
7541 * TODO: Blueberry should be detected here
7542 */
7543 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7544 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7545 version);
7546 }
7547 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007548 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007549 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007550 }
Owen Taylor3473f882001-02-23 17:55:21 +00007551
7552 /*
7553 * We may have the encoding declaration
7554 */
7555 if (!IS_BLANK(RAW)) {
7556 if ((RAW == '?') && (NXT(1) == '>')) {
7557 SKIP(2);
7558 return;
7559 }
7560 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7562 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7563 ctxt->wellFormed = 0;
7564 ctxt->disableSAX = 1;
7565 }
7566 xmlParseEncodingDecl(ctxt);
7567 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7568 /*
7569 * The XML REC instructs us to stop parsing right here
7570 */
7571 return;
7572 }
7573
7574 /*
7575 * We may have the standalone status.
7576 */
7577 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7578 if ((RAW == '?') && (NXT(1) == '>')) {
7579 SKIP(2);
7580 return;
7581 }
7582 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7584 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7585 ctxt->wellFormed = 0;
7586 ctxt->disableSAX = 1;
7587 }
7588 SKIP_BLANKS;
7589 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7590
7591 SKIP_BLANKS;
7592 if ((RAW == '?') && (NXT(1) == '>')) {
7593 SKIP(2);
7594 } else if (RAW == '>') {
7595 /* Deprecated old WD ... */
7596 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7598 ctxt->sax->error(ctxt->userData,
7599 "XML declaration must end-up with '?>'\n");
7600 ctxt->wellFormed = 0;
7601 ctxt->disableSAX = 1;
7602 NEXT;
7603 } else {
7604 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7606 ctxt->sax->error(ctxt->userData,
7607 "parsing XML declaration: '?>' expected\n");
7608 ctxt->wellFormed = 0;
7609 ctxt->disableSAX = 1;
7610 MOVETO_ENDTAG(CUR_PTR);
7611 NEXT;
7612 }
7613}
7614
7615/**
7616 * xmlParseMisc:
7617 * @ctxt: an XML parser context
7618 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007619 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007620 *
7621 * [27] Misc ::= Comment | PI | S
7622 */
7623
7624void
7625xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007626 while (((RAW == '<') && (NXT(1) == '?')) ||
7627 ((RAW == '<') && (NXT(1) == '!') &&
7628 (NXT(2) == '-') && (NXT(3) == '-')) ||
7629 IS_BLANK(CUR)) {
7630 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007631 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007632 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007633 NEXT;
7634 } else
7635 xmlParseComment(ctxt);
7636 }
7637}
7638
7639/**
7640 * xmlParseDocument:
7641 * @ctxt: an XML parser context
7642 *
7643 * parse an XML document (and build a tree if using the standard SAX
7644 * interface).
7645 *
7646 * [1] document ::= prolog element Misc*
7647 *
7648 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7649 *
7650 * Returns 0, -1 in case of error. the parser context is augmented
7651 * as a result of the parsing.
7652 */
7653
7654int
7655xmlParseDocument(xmlParserCtxtPtr ctxt) {
7656 xmlChar start[4];
7657 xmlCharEncoding enc;
7658
7659 xmlInitParser();
7660
7661 GROW;
7662
7663 /*
7664 * SAX: beginning of the document processing.
7665 */
7666 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7667 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7668
Daniel Veillard50f34372001-08-03 12:06:36 +00007669 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007670 /*
7671 * Get the 4 first bytes and decode the charset
7672 * if enc != XML_CHAR_ENCODING_NONE
7673 * plug some encoding conversion routines.
7674 */
7675 start[0] = RAW;
7676 start[1] = NXT(1);
7677 start[2] = NXT(2);
7678 start[3] = NXT(3);
7679 enc = xmlDetectCharEncoding(start, 4);
7680 if (enc != XML_CHAR_ENCODING_NONE) {
7681 xmlSwitchEncoding(ctxt, enc);
7682 }
Owen Taylor3473f882001-02-23 17:55:21 +00007683 }
7684
7685
7686 if (CUR == 0) {
7687 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7689 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7690 ctxt->wellFormed = 0;
7691 ctxt->disableSAX = 1;
7692 }
7693
7694 /*
7695 * Check for the XMLDecl in the Prolog.
7696 */
7697 GROW;
7698 if ((RAW == '<') && (NXT(1) == '?') &&
7699 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7700 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7701
7702 /*
7703 * Note that we will switch encoding on the fly.
7704 */
7705 xmlParseXMLDecl(ctxt);
7706 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7707 /*
7708 * The XML REC instructs us to stop parsing right here
7709 */
7710 return(-1);
7711 }
7712 ctxt->standalone = ctxt->input->standalone;
7713 SKIP_BLANKS;
7714 } else {
7715 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7716 }
7717 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7718 ctxt->sax->startDocument(ctxt->userData);
7719
7720 /*
7721 * The Misc part of the Prolog
7722 */
7723 GROW;
7724 xmlParseMisc(ctxt);
7725
7726 /*
7727 * Then possibly doc type declaration(s) and more Misc
7728 * (doctypedecl Misc*)?
7729 */
7730 GROW;
7731 if ((RAW == '<') && (NXT(1) == '!') &&
7732 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7733 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7734 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7735 (NXT(8) == 'E')) {
7736
7737 ctxt->inSubset = 1;
7738 xmlParseDocTypeDecl(ctxt);
7739 if (RAW == '[') {
7740 ctxt->instate = XML_PARSER_DTD;
7741 xmlParseInternalSubset(ctxt);
7742 }
7743
7744 /*
7745 * Create and update the external subset.
7746 */
7747 ctxt->inSubset = 2;
7748 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7749 (!ctxt->disableSAX))
7750 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7751 ctxt->extSubSystem, ctxt->extSubURI);
7752 ctxt->inSubset = 0;
7753
7754
7755 ctxt->instate = XML_PARSER_PROLOG;
7756 xmlParseMisc(ctxt);
7757 }
7758
7759 /*
7760 * Time to start parsing the tree itself
7761 */
7762 GROW;
7763 if (RAW != '<') {
7764 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7766 ctxt->sax->error(ctxt->userData,
7767 "Start tag expected, '<' not found\n");
7768 ctxt->wellFormed = 0;
7769 ctxt->disableSAX = 1;
7770 ctxt->instate = XML_PARSER_EOF;
7771 } else {
7772 ctxt->instate = XML_PARSER_CONTENT;
7773 xmlParseElement(ctxt);
7774 ctxt->instate = XML_PARSER_EPILOG;
7775
7776
7777 /*
7778 * The Misc part at the end
7779 */
7780 xmlParseMisc(ctxt);
7781
Daniel Veillard561b7f82002-03-20 21:55:57 +00007782 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007783 ctxt->errNo = XML_ERR_DOCUMENT_END;
7784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7785 ctxt->sax->error(ctxt->userData,
7786 "Extra content at the end of the document\n");
7787 ctxt->wellFormed = 0;
7788 ctxt->disableSAX = 1;
7789 }
7790 ctxt->instate = XML_PARSER_EOF;
7791 }
7792
7793 /*
7794 * SAX: end of the document processing.
7795 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007796 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007797 ctxt->sax->endDocument(ctxt->userData);
7798
Daniel Veillard5997aca2002-03-18 18:36:20 +00007799 /*
7800 * Remove locally kept entity definitions if the tree was not built
7801 */
7802 if ((ctxt->myDoc != NULL) &&
7803 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7804 xmlFreeDoc(ctxt->myDoc);
7805 ctxt->myDoc = NULL;
7806 }
7807
Daniel Veillardc7612992002-02-17 22:47:37 +00007808 if (! ctxt->wellFormed) {
7809 ctxt->valid = 0;
7810 return(-1);
7811 }
Owen Taylor3473f882001-02-23 17:55:21 +00007812 return(0);
7813}
7814
7815/**
7816 * xmlParseExtParsedEnt:
7817 * @ctxt: an XML parser context
7818 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007819 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007820 * An external general parsed entity is well-formed if it matches the
7821 * production labeled extParsedEnt.
7822 *
7823 * [78] extParsedEnt ::= TextDecl? content
7824 *
7825 * Returns 0, -1 in case of error. the parser context is augmented
7826 * as a result of the parsing.
7827 */
7828
7829int
7830xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7831 xmlChar start[4];
7832 xmlCharEncoding enc;
7833
7834 xmlDefaultSAXHandlerInit();
7835
7836 GROW;
7837
7838 /*
7839 * SAX: beginning of the document processing.
7840 */
7841 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7842 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7843
7844 /*
7845 * Get the 4 first bytes and decode the charset
7846 * if enc != XML_CHAR_ENCODING_NONE
7847 * plug some encoding conversion routines.
7848 */
7849 start[0] = RAW;
7850 start[1] = NXT(1);
7851 start[2] = NXT(2);
7852 start[3] = NXT(3);
7853 enc = xmlDetectCharEncoding(start, 4);
7854 if (enc != XML_CHAR_ENCODING_NONE) {
7855 xmlSwitchEncoding(ctxt, enc);
7856 }
7857
7858
7859 if (CUR == 0) {
7860 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7862 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7863 ctxt->wellFormed = 0;
7864 ctxt->disableSAX = 1;
7865 }
7866
7867 /*
7868 * Check for the XMLDecl in the Prolog.
7869 */
7870 GROW;
7871 if ((RAW == '<') && (NXT(1) == '?') &&
7872 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7873 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7874
7875 /*
7876 * Note that we will switch encoding on the fly.
7877 */
7878 xmlParseXMLDecl(ctxt);
7879 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7880 /*
7881 * The XML REC instructs us to stop parsing right here
7882 */
7883 return(-1);
7884 }
7885 SKIP_BLANKS;
7886 } else {
7887 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7888 }
7889 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7890 ctxt->sax->startDocument(ctxt->userData);
7891
7892 /*
7893 * Doing validity checking on chunk doesn't make sense
7894 */
7895 ctxt->instate = XML_PARSER_CONTENT;
7896 ctxt->validate = 0;
7897 ctxt->loadsubset = 0;
7898 ctxt->depth = 0;
7899
7900 xmlParseContent(ctxt);
7901
7902 if ((RAW == '<') && (NXT(1) == '/')) {
7903 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7904 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7905 ctxt->sax->error(ctxt->userData,
7906 "chunk is not well balanced\n");
7907 ctxt->wellFormed = 0;
7908 ctxt->disableSAX = 1;
7909 } else if (RAW != 0) {
7910 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7912 ctxt->sax->error(ctxt->userData,
7913 "extra content at the end of well balanced chunk\n");
7914 ctxt->wellFormed = 0;
7915 ctxt->disableSAX = 1;
7916 }
7917
7918 /*
7919 * SAX: end of the document processing.
7920 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007921 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007922 ctxt->sax->endDocument(ctxt->userData);
7923
7924 if (! ctxt->wellFormed) return(-1);
7925 return(0);
7926}
7927
7928/************************************************************************
7929 * *
7930 * Progressive parsing interfaces *
7931 * *
7932 ************************************************************************/
7933
7934/**
7935 * xmlParseLookupSequence:
7936 * @ctxt: an XML parser context
7937 * @first: the first char to lookup
7938 * @next: the next char to lookup or zero
7939 * @third: the next char to lookup or zero
7940 *
7941 * Try to find if a sequence (first, next, third) or just (first next) or
7942 * (first) is available in the input stream.
7943 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7944 * to avoid rescanning sequences of bytes, it DOES change the state of the
7945 * parser, do not use liberally.
7946 *
7947 * Returns the index to the current parsing point if the full sequence
7948 * is available, -1 otherwise.
7949 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007950static int
Owen Taylor3473f882001-02-23 17:55:21 +00007951xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7952 xmlChar next, xmlChar third) {
7953 int base, len;
7954 xmlParserInputPtr in;
7955 const xmlChar *buf;
7956
7957 in = ctxt->input;
7958 if (in == NULL) return(-1);
7959 base = in->cur - in->base;
7960 if (base < 0) return(-1);
7961 if (ctxt->checkIndex > base)
7962 base = ctxt->checkIndex;
7963 if (in->buf == NULL) {
7964 buf = in->base;
7965 len = in->length;
7966 } else {
7967 buf = in->buf->buffer->content;
7968 len = in->buf->buffer->use;
7969 }
7970 /* take into account the sequence length */
7971 if (third) len -= 2;
7972 else if (next) len --;
7973 for (;base < len;base++) {
7974 if (buf[base] == first) {
7975 if (third != 0) {
7976 if ((buf[base + 1] != next) ||
7977 (buf[base + 2] != third)) continue;
7978 } else if (next != 0) {
7979 if (buf[base + 1] != next) continue;
7980 }
7981 ctxt->checkIndex = 0;
7982#ifdef DEBUG_PUSH
7983 if (next == 0)
7984 xmlGenericError(xmlGenericErrorContext,
7985 "PP: lookup '%c' found at %d\n",
7986 first, base);
7987 else if (third == 0)
7988 xmlGenericError(xmlGenericErrorContext,
7989 "PP: lookup '%c%c' found at %d\n",
7990 first, next, base);
7991 else
7992 xmlGenericError(xmlGenericErrorContext,
7993 "PP: lookup '%c%c%c' found at %d\n",
7994 first, next, third, base);
7995#endif
7996 return(base - (in->cur - in->base));
7997 }
7998 }
7999 ctxt->checkIndex = base;
8000#ifdef DEBUG_PUSH
8001 if (next == 0)
8002 xmlGenericError(xmlGenericErrorContext,
8003 "PP: lookup '%c' failed\n", first);
8004 else if (third == 0)
8005 xmlGenericError(xmlGenericErrorContext,
8006 "PP: lookup '%c%c' failed\n", first, next);
8007 else
8008 xmlGenericError(xmlGenericErrorContext,
8009 "PP: lookup '%c%c%c' failed\n", first, next, third);
8010#endif
8011 return(-1);
8012}
8013
8014/**
8015 * xmlParseTryOrFinish:
8016 * @ctxt: an XML parser context
8017 * @terminate: last chunk indicator
8018 *
8019 * Try to progress on parsing
8020 *
8021 * Returns zero if no parsing was possible
8022 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008023static int
Owen Taylor3473f882001-02-23 17:55:21 +00008024xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8025 int ret = 0;
8026 int avail;
8027 xmlChar cur, next;
8028
8029#ifdef DEBUG_PUSH
8030 switch (ctxt->instate) {
8031 case XML_PARSER_EOF:
8032 xmlGenericError(xmlGenericErrorContext,
8033 "PP: try EOF\n"); break;
8034 case XML_PARSER_START:
8035 xmlGenericError(xmlGenericErrorContext,
8036 "PP: try START\n"); break;
8037 case XML_PARSER_MISC:
8038 xmlGenericError(xmlGenericErrorContext,
8039 "PP: try MISC\n");break;
8040 case XML_PARSER_COMMENT:
8041 xmlGenericError(xmlGenericErrorContext,
8042 "PP: try COMMENT\n");break;
8043 case XML_PARSER_PROLOG:
8044 xmlGenericError(xmlGenericErrorContext,
8045 "PP: try PROLOG\n");break;
8046 case XML_PARSER_START_TAG:
8047 xmlGenericError(xmlGenericErrorContext,
8048 "PP: try START_TAG\n");break;
8049 case XML_PARSER_CONTENT:
8050 xmlGenericError(xmlGenericErrorContext,
8051 "PP: try CONTENT\n");break;
8052 case XML_PARSER_CDATA_SECTION:
8053 xmlGenericError(xmlGenericErrorContext,
8054 "PP: try CDATA_SECTION\n");break;
8055 case XML_PARSER_END_TAG:
8056 xmlGenericError(xmlGenericErrorContext,
8057 "PP: try END_TAG\n");break;
8058 case XML_PARSER_ENTITY_DECL:
8059 xmlGenericError(xmlGenericErrorContext,
8060 "PP: try ENTITY_DECL\n");break;
8061 case XML_PARSER_ENTITY_VALUE:
8062 xmlGenericError(xmlGenericErrorContext,
8063 "PP: try ENTITY_VALUE\n");break;
8064 case XML_PARSER_ATTRIBUTE_VALUE:
8065 xmlGenericError(xmlGenericErrorContext,
8066 "PP: try ATTRIBUTE_VALUE\n");break;
8067 case XML_PARSER_DTD:
8068 xmlGenericError(xmlGenericErrorContext,
8069 "PP: try DTD\n");break;
8070 case XML_PARSER_EPILOG:
8071 xmlGenericError(xmlGenericErrorContext,
8072 "PP: try EPILOG\n");break;
8073 case XML_PARSER_PI:
8074 xmlGenericError(xmlGenericErrorContext,
8075 "PP: try PI\n");break;
8076 case XML_PARSER_IGNORE:
8077 xmlGenericError(xmlGenericErrorContext,
8078 "PP: try IGNORE\n");break;
8079 }
8080#endif
8081
8082 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008083 SHRINK;
8084
Owen Taylor3473f882001-02-23 17:55:21 +00008085 /*
8086 * Pop-up of finished entities.
8087 */
8088 while ((RAW == 0) && (ctxt->inputNr > 1))
8089 xmlPopInput(ctxt);
8090
8091 if (ctxt->input ==NULL) break;
8092 if (ctxt->input->buf == NULL)
8093 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008094 else {
8095 /*
8096 * If we are operating on converted input, try to flush
8097 * remainng chars to avoid them stalling in the non-converted
8098 * buffer.
8099 */
8100 if ((ctxt->input->buf->raw != NULL) &&
8101 (ctxt->input->buf->raw->use > 0)) {
8102 int base = ctxt->input->base -
8103 ctxt->input->buf->buffer->content;
8104 int current = ctxt->input->cur - ctxt->input->base;
8105
8106 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8107 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8108 ctxt->input->cur = ctxt->input->base + current;
8109 ctxt->input->end =
8110 &ctxt->input->buf->buffer->content[
8111 ctxt->input->buf->buffer->use];
8112 }
8113 avail = ctxt->input->buf->buffer->use -
8114 (ctxt->input->cur - ctxt->input->base);
8115 }
Owen Taylor3473f882001-02-23 17:55:21 +00008116 if (avail < 1)
8117 goto done;
8118 switch (ctxt->instate) {
8119 case XML_PARSER_EOF:
8120 /*
8121 * Document parsing is done !
8122 */
8123 goto done;
8124 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008125 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8126 xmlChar start[4];
8127 xmlCharEncoding enc;
8128
8129 /*
8130 * Very first chars read from the document flow.
8131 */
8132 if (avail < 4)
8133 goto done;
8134
8135 /*
8136 * Get the 4 first bytes and decode the charset
8137 * if enc != XML_CHAR_ENCODING_NONE
8138 * plug some encoding conversion routines.
8139 */
8140 start[0] = RAW;
8141 start[1] = NXT(1);
8142 start[2] = NXT(2);
8143 start[3] = NXT(3);
8144 enc = xmlDetectCharEncoding(start, 4);
8145 if (enc != XML_CHAR_ENCODING_NONE) {
8146 xmlSwitchEncoding(ctxt, enc);
8147 }
8148 break;
8149 }
Owen Taylor3473f882001-02-23 17:55:21 +00008150
8151 cur = ctxt->input->cur[0];
8152 next = ctxt->input->cur[1];
8153 if (cur == 0) {
8154 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8155 ctxt->sax->setDocumentLocator(ctxt->userData,
8156 &xmlDefaultSAXLocator);
8157 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8158 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8159 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8160 ctxt->wellFormed = 0;
8161 ctxt->disableSAX = 1;
8162 ctxt->instate = XML_PARSER_EOF;
8163#ifdef DEBUG_PUSH
8164 xmlGenericError(xmlGenericErrorContext,
8165 "PP: entering EOF\n");
8166#endif
8167 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8168 ctxt->sax->endDocument(ctxt->userData);
8169 goto done;
8170 }
8171 if ((cur == '<') && (next == '?')) {
8172 /* PI or XML decl */
8173 if (avail < 5) return(ret);
8174 if ((!terminate) &&
8175 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8176 return(ret);
8177 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8178 ctxt->sax->setDocumentLocator(ctxt->userData,
8179 &xmlDefaultSAXLocator);
8180 if ((ctxt->input->cur[2] == 'x') &&
8181 (ctxt->input->cur[3] == 'm') &&
8182 (ctxt->input->cur[4] == 'l') &&
8183 (IS_BLANK(ctxt->input->cur[5]))) {
8184 ret += 5;
8185#ifdef DEBUG_PUSH
8186 xmlGenericError(xmlGenericErrorContext,
8187 "PP: Parsing XML Decl\n");
8188#endif
8189 xmlParseXMLDecl(ctxt);
8190 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8191 /*
8192 * The XML REC instructs us to stop parsing right
8193 * here
8194 */
8195 ctxt->instate = XML_PARSER_EOF;
8196 return(0);
8197 }
8198 ctxt->standalone = ctxt->input->standalone;
8199 if ((ctxt->encoding == NULL) &&
8200 (ctxt->input->encoding != NULL))
8201 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8202 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8203 (!ctxt->disableSAX))
8204 ctxt->sax->startDocument(ctxt->userData);
8205 ctxt->instate = XML_PARSER_MISC;
8206#ifdef DEBUG_PUSH
8207 xmlGenericError(xmlGenericErrorContext,
8208 "PP: entering MISC\n");
8209#endif
8210 } else {
8211 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8212 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8213 (!ctxt->disableSAX))
8214 ctxt->sax->startDocument(ctxt->userData);
8215 ctxt->instate = XML_PARSER_MISC;
8216#ifdef DEBUG_PUSH
8217 xmlGenericError(xmlGenericErrorContext,
8218 "PP: entering MISC\n");
8219#endif
8220 }
8221 } else {
8222 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8223 ctxt->sax->setDocumentLocator(ctxt->userData,
8224 &xmlDefaultSAXLocator);
8225 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8226 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8227 (!ctxt->disableSAX))
8228 ctxt->sax->startDocument(ctxt->userData);
8229 ctxt->instate = XML_PARSER_MISC;
8230#ifdef DEBUG_PUSH
8231 xmlGenericError(xmlGenericErrorContext,
8232 "PP: entering MISC\n");
8233#endif
8234 }
8235 break;
8236 case XML_PARSER_MISC:
8237 SKIP_BLANKS;
8238 if (ctxt->input->buf == NULL)
8239 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8240 else
8241 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8242 if (avail < 2)
8243 goto done;
8244 cur = ctxt->input->cur[0];
8245 next = ctxt->input->cur[1];
8246 if ((cur == '<') && (next == '?')) {
8247 if ((!terminate) &&
8248 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8249 goto done;
8250#ifdef DEBUG_PUSH
8251 xmlGenericError(xmlGenericErrorContext,
8252 "PP: Parsing PI\n");
8253#endif
8254 xmlParsePI(ctxt);
8255 } else if ((cur == '<') && (next == '!') &&
8256 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8257 if ((!terminate) &&
8258 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8259 goto done;
8260#ifdef DEBUG_PUSH
8261 xmlGenericError(xmlGenericErrorContext,
8262 "PP: Parsing Comment\n");
8263#endif
8264 xmlParseComment(ctxt);
8265 ctxt->instate = XML_PARSER_MISC;
8266 } else if ((cur == '<') && (next == '!') &&
8267 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8268 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8269 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8270 (ctxt->input->cur[8] == 'E')) {
8271 if ((!terminate) &&
8272 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8273 goto done;
8274#ifdef DEBUG_PUSH
8275 xmlGenericError(xmlGenericErrorContext,
8276 "PP: Parsing internal subset\n");
8277#endif
8278 ctxt->inSubset = 1;
8279 xmlParseDocTypeDecl(ctxt);
8280 if (RAW == '[') {
8281 ctxt->instate = XML_PARSER_DTD;
8282#ifdef DEBUG_PUSH
8283 xmlGenericError(xmlGenericErrorContext,
8284 "PP: entering DTD\n");
8285#endif
8286 } else {
8287 /*
8288 * Create and update the external subset.
8289 */
8290 ctxt->inSubset = 2;
8291 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8292 (ctxt->sax->externalSubset != NULL))
8293 ctxt->sax->externalSubset(ctxt->userData,
8294 ctxt->intSubName, ctxt->extSubSystem,
8295 ctxt->extSubURI);
8296 ctxt->inSubset = 0;
8297 ctxt->instate = XML_PARSER_PROLOG;
8298#ifdef DEBUG_PUSH
8299 xmlGenericError(xmlGenericErrorContext,
8300 "PP: entering PROLOG\n");
8301#endif
8302 }
8303 } else if ((cur == '<') && (next == '!') &&
8304 (avail < 9)) {
8305 goto done;
8306 } else {
8307 ctxt->instate = XML_PARSER_START_TAG;
8308#ifdef DEBUG_PUSH
8309 xmlGenericError(xmlGenericErrorContext,
8310 "PP: entering START_TAG\n");
8311#endif
8312 }
8313 break;
8314 case XML_PARSER_IGNORE:
8315 xmlGenericError(xmlGenericErrorContext,
8316 "PP: internal error, state == IGNORE");
8317 ctxt->instate = XML_PARSER_DTD;
8318#ifdef DEBUG_PUSH
8319 xmlGenericError(xmlGenericErrorContext,
8320 "PP: entering DTD\n");
8321#endif
8322 break;
8323 case XML_PARSER_PROLOG:
8324 SKIP_BLANKS;
8325 if (ctxt->input->buf == NULL)
8326 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8327 else
8328 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8329 if (avail < 2)
8330 goto done;
8331 cur = ctxt->input->cur[0];
8332 next = ctxt->input->cur[1];
8333 if ((cur == '<') && (next == '?')) {
8334 if ((!terminate) &&
8335 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8336 goto done;
8337#ifdef DEBUG_PUSH
8338 xmlGenericError(xmlGenericErrorContext,
8339 "PP: Parsing PI\n");
8340#endif
8341 xmlParsePI(ctxt);
8342 } else if ((cur == '<') && (next == '!') &&
8343 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8344 if ((!terminate) &&
8345 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8346 goto done;
8347#ifdef DEBUG_PUSH
8348 xmlGenericError(xmlGenericErrorContext,
8349 "PP: Parsing Comment\n");
8350#endif
8351 xmlParseComment(ctxt);
8352 ctxt->instate = XML_PARSER_PROLOG;
8353 } else if ((cur == '<') && (next == '!') &&
8354 (avail < 4)) {
8355 goto done;
8356 } else {
8357 ctxt->instate = XML_PARSER_START_TAG;
8358#ifdef DEBUG_PUSH
8359 xmlGenericError(xmlGenericErrorContext,
8360 "PP: entering START_TAG\n");
8361#endif
8362 }
8363 break;
8364 case XML_PARSER_EPILOG:
8365 SKIP_BLANKS;
8366 if (ctxt->input->buf == NULL)
8367 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8368 else
8369 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8370 if (avail < 2)
8371 goto done;
8372 cur = ctxt->input->cur[0];
8373 next = ctxt->input->cur[1];
8374 if ((cur == '<') && (next == '?')) {
8375 if ((!terminate) &&
8376 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8377 goto done;
8378#ifdef DEBUG_PUSH
8379 xmlGenericError(xmlGenericErrorContext,
8380 "PP: Parsing PI\n");
8381#endif
8382 xmlParsePI(ctxt);
8383 ctxt->instate = XML_PARSER_EPILOG;
8384 } else if ((cur == '<') && (next == '!') &&
8385 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8386 if ((!terminate) &&
8387 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8388 goto done;
8389#ifdef DEBUG_PUSH
8390 xmlGenericError(xmlGenericErrorContext,
8391 "PP: Parsing Comment\n");
8392#endif
8393 xmlParseComment(ctxt);
8394 ctxt->instate = XML_PARSER_EPILOG;
8395 } else if ((cur == '<') && (next == '!') &&
8396 (avail < 4)) {
8397 goto done;
8398 } else {
8399 ctxt->errNo = XML_ERR_DOCUMENT_END;
8400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8401 ctxt->sax->error(ctxt->userData,
8402 "Extra content at the end of the document\n");
8403 ctxt->wellFormed = 0;
8404 ctxt->disableSAX = 1;
8405 ctxt->instate = XML_PARSER_EOF;
8406#ifdef DEBUG_PUSH
8407 xmlGenericError(xmlGenericErrorContext,
8408 "PP: entering EOF\n");
8409#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008410 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008411 ctxt->sax->endDocument(ctxt->userData);
8412 goto done;
8413 }
8414 break;
8415 case XML_PARSER_START_TAG: {
8416 xmlChar *name, *oldname;
8417
8418 if ((avail < 2) && (ctxt->inputNr == 1))
8419 goto done;
8420 cur = ctxt->input->cur[0];
8421 if (cur != '<') {
8422 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8423 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8424 ctxt->sax->error(ctxt->userData,
8425 "Start tag expect, '<' not found\n");
8426 ctxt->wellFormed = 0;
8427 ctxt->disableSAX = 1;
8428 ctxt->instate = XML_PARSER_EOF;
8429#ifdef DEBUG_PUSH
8430 xmlGenericError(xmlGenericErrorContext,
8431 "PP: entering EOF\n");
8432#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008433 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008434 ctxt->sax->endDocument(ctxt->userData);
8435 goto done;
8436 }
8437 if ((!terminate) &&
8438 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8439 goto done;
8440 if (ctxt->spaceNr == 0)
8441 spacePush(ctxt, -1);
8442 else
8443 spacePush(ctxt, *ctxt->space);
8444 name = xmlParseStartTag(ctxt);
8445 if (name == NULL) {
8446 spacePop(ctxt);
8447 ctxt->instate = XML_PARSER_EOF;
8448#ifdef DEBUG_PUSH
8449 xmlGenericError(xmlGenericErrorContext,
8450 "PP: entering EOF\n");
8451#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008452 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008453 ctxt->sax->endDocument(ctxt->userData);
8454 goto done;
8455 }
8456 namePush(ctxt, xmlStrdup(name));
8457
8458 /*
8459 * [ VC: Root Element Type ]
8460 * The Name in the document type declaration must match
8461 * the element type of the root element.
8462 */
8463 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8464 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8465 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8466
8467 /*
8468 * Check for an Empty Element.
8469 */
8470 if ((RAW == '/') && (NXT(1) == '>')) {
8471 SKIP(2);
8472 if ((ctxt->sax != NULL) &&
8473 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8474 ctxt->sax->endElement(ctxt->userData, name);
8475 xmlFree(name);
8476 oldname = namePop(ctxt);
8477 spacePop(ctxt);
8478 if (oldname != NULL) {
8479#ifdef DEBUG_STACK
8480 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8481#endif
8482 xmlFree(oldname);
8483 }
8484 if (ctxt->name == NULL) {
8485 ctxt->instate = XML_PARSER_EPILOG;
8486#ifdef DEBUG_PUSH
8487 xmlGenericError(xmlGenericErrorContext,
8488 "PP: entering EPILOG\n");
8489#endif
8490 } else {
8491 ctxt->instate = XML_PARSER_CONTENT;
8492#ifdef DEBUG_PUSH
8493 xmlGenericError(xmlGenericErrorContext,
8494 "PP: entering CONTENT\n");
8495#endif
8496 }
8497 break;
8498 }
8499 if (RAW == '>') {
8500 NEXT;
8501 } else {
8502 ctxt->errNo = XML_ERR_GT_REQUIRED;
8503 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8504 ctxt->sax->error(ctxt->userData,
8505 "Couldn't find end of Start Tag %s\n",
8506 name);
8507 ctxt->wellFormed = 0;
8508 ctxt->disableSAX = 1;
8509
8510 /*
8511 * end of parsing of this node.
8512 */
8513 nodePop(ctxt);
8514 oldname = namePop(ctxt);
8515 spacePop(ctxt);
8516 if (oldname != NULL) {
8517#ifdef DEBUG_STACK
8518 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8519#endif
8520 xmlFree(oldname);
8521 }
8522 }
8523 xmlFree(name);
8524 ctxt->instate = XML_PARSER_CONTENT;
8525#ifdef DEBUG_PUSH
8526 xmlGenericError(xmlGenericErrorContext,
8527 "PP: entering CONTENT\n");
8528#endif
8529 break;
8530 }
8531 case XML_PARSER_CONTENT: {
8532 const xmlChar *test;
8533 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008534 if ((avail < 2) && (ctxt->inputNr == 1))
8535 goto done;
8536 cur = ctxt->input->cur[0];
8537 next = ctxt->input->cur[1];
8538
8539 test = CUR_PTR;
8540 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008541 if ((cur == '<') && (next == '?')) {
8542 if ((!terminate) &&
8543 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8544 goto done;
8545#ifdef DEBUG_PUSH
8546 xmlGenericError(xmlGenericErrorContext,
8547 "PP: Parsing PI\n");
8548#endif
8549 xmlParsePI(ctxt);
8550 } else if ((cur == '<') && (next == '!') &&
8551 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8552 if ((!terminate) &&
8553 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8554 goto done;
8555#ifdef DEBUG_PUSH
8556 xmlGenericError(xmlGenericErrorContext,
8557 "PP: Parsing Comment\n");
8558#endif
8559 xmlParseComment(ctxt);
8560 ctxt->instate = XML_PARSER_CONTENT;
8561 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8562 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8563 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8564 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8565 (ctxt->input->cur[8] == '[')) {
8566 SKIP(9);
8567 ctxt->instate = XML_PARSER_CDATA_SECTION;
8568#ifdef DEBUG_PUSH
8569 xmlGenericError(xmlGenericErrorContext,
8570 "PP: entering CDATA_SECTION\n");
8571#endif
8572 break;
8573 } else if ((cur == '<') && (next == '!') &&
8574 (avail < 9)) {
8575 goto done;
8576 } else if ((cur == '<') && (next == '/')) {
8577 ctxt->instate = XML_PARSER_END_TAG;
8578#ifdef DEBUG_PUSH
8579 xmlGenericError(xmlGenericErrorContext,
8580 "PP: entering END_TAG\n");
8581#endif
8582 break;
8583 } else if (cur == '<') {
8584 ctxt->instate = XML_PARSER_START_TAG;
8585#ifdef DEBUG_PUSH
8586 xmlGenericError(xmlGenericErrorContext,
8587 "PP: entering START_TAG\n");
8588#endif
8589 break;
8590 } else if (cur == '&') {
8591 if ((!terminate) &&
8592 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8593 goto done;
8594#ifdef DEBUG_PUSH
8595 xmlGenericError(xmlGenericErrorContext,
8596 "PP: Parsing Reference\n");
8597#endif
8598 xmlParseReference(ctxt);
8599 } else {
8600 /* TODO Avoid the extra copy, handle directly !!! */
8601 /*
8602 * Goal of the following test is:
8603 * - minimize calls to the SAX 'character' callback
8604 * when they are mergeable
8605 * - handle an problem for isBlank when we only parse
8606 * a sequence of blank chars and the next one is
8607 * not available to check against '<' presence.
8608 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008609 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008610 * of the parser.
8611 */
8612 if ((ctxt->inputNr == 1) &&
8613 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8614 if ((!terminate) &&
8615 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8616 goto done;
8617 }
8618 ctxt->checkIndex = 0;
8619#ifdef DEBUG_PUSH
8620 xmlGenericError(xmlGenericErrorContext,
8621 "PP: Parsing char data\n");
8622#endif
8623 xmlParseCharData(ctxt, 0);
8624 }
8625 /*
8626 * Pop-up of finished entities.
8627 */
8628 while ((RAW == 0) && (ctxt->inputNr > 1))
8629 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008630 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008631 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8633 ctxt->sax->error(ctxt->userData,
8634 "detected an error in element content\n");
8635 ctxt->wellFormed = 0;
8636 ctxt->disableSAX = 1;
8637 ctxt->instate = XML_PARSER_EOF;
8638 break;
8639 }
8640 break;
8641 }
8642 case XML_PARSER_CDATA_SECTION: {
8643 /*
8644 * The Push mode need to have the SAX callback for
8645 * cdataBlock merge back contiguous callbacks.
8646 */
8647 int base;
8648
8649 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8650 if (base < 0) {
8651 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8652 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8653 if (ctxt->sax->cdataBlock != NULL)
8654 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8655 XML_PARSER_BIG_BUFFER_SIZE);
8656 }
8657 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8658 ctxt->checkIndex = 0;
8659 }
8660 goto done;
8661 } else {
8662 if ((ctxt->sax != NULL) && (base > 0) &&
8663 (!ctxt->disableSAX)) {
8664 if (ctxt->sax->cdataBlock != NULL)
8665 ctxt->sax->cdataBlock(ctxt->userData,
8666 ctxt->input->cur, base);
8667 }
8668 SKIP(base + 3);
8669 ctxt->checkIndex = 0;
8670 ctxt->instate = XML_PARSER_CONTENT;
8671#ifdef DEBUG_PUSH
8672 xmlGenericError(xmlGenericErrorContext,
8673 "PP: entering CONTENT\n");
8674#endif
8675 }
8676 break;
8677 }
8678 case XML_PARSER_END_TAG:
8679 if (avail < 2)
8680 goto done;
8681 if ((!terminate) &&
8682 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8683 goto done;
8684 xmlParseEndTag(ctxt);
8685 if (ctxt->name == NULL) {
8686 ctxt->instate = XML_PARSER_EPILOG;
8687#ifdef DEBUG_PUSH
8688 xmlGenericError(xmlGenericErrorContext,
8689 "PP: entering EPILOG\n");
8690#endif
8691 } else {
8692 ctxt->instate = XML_PARSER_CONTENT;
8693#ifdef DEBUG_PUSH
8694 xmlGenericError(xmlGenericErrorContext,
8695 "PP: entering CONTENT\n");
8696#endif
8697 }
8698 break;
8699 case XML_PARSER_DTD: {
8700 /*
8701 * Sorry but progressive parsing of the internal subset
8702 * is not expected to be supported. We first check that
8703 * the full content of the internal subset is available and
8704 * the parsing is launched only at that point.
8705 * Internal subset ends up with "']' S? '>'" in an unescaped
8706 * section and not in a ']]>' sequence which are conditional
8707 * sections (whoever argued to keep that crap in XML deserve
8708 * a place in hell !).
8709 */
8710 int base, i;
8711 xmlChar *buf;
8712 xmlChar quote = 0;
8713
8714 base = ctxt->input->cur - ctxt->input->base;
8715 if (base < 0) return(0);
8716 if (ctxt->checkIndex > base)
8717 base = ctxt->checkIndex;
8718 buf = ctxt->input->buf->buffer->content;
8719 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8720 base++) {
8721 if (quote != 0) {
8722 if (buf[base] == quote)
8723 quote = 0;
8724 continue;
8725 }
8726 if (buf[base] == '"') {
8727 quote = '"';
8728 continue;
8729 }
8730 if (buf[base] == '\'') {
8731 quote = '\'';
8732 continue;
8733 }
8734 if (buf[base] == ']') {
8735 if ((unsigned int) base +1 >=
8736 ctxt->input->buf->buffer->use)
8737 break;
8738 if (buf[base + 1] == ']') {
8739 /* conditional crap, skip both ']' ! */
8740 base++;
8741 continue;
8742 }
8743 for (i = 0;
8744 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8745 i++) {
8746 if (buf[base + i] == '>')
8747 goto found_end_int_subset;
8748 }
8749 break;
8750 }
8751 }
8752 /*
8753 * We didn't found the end of the Internal subset
8754 */
8755 if (quote == 0)
8756 ctxt->checkIndex = base;
8757#ifdef DEBUG_PUSH
8758 if (next == 0)
8759 xmlGenericError(xmlGenericErrorContext,
8760 "PP: lookup of int subset end filed\n");
8761#endif
8762 goto done;
8763
8764found_end_int_subset:
8765 xmlParseInternalSubset(ctxt);
8766 ctxt->inSubset = 2;
8767 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8768 (ctxt->sax->externalSubset != NULL))
8769 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8770 ctxt->extSubSystem, ctxt->extSubURI);
8771 ctxt->inSubset = 0;
8772 ctxt->instate = XML_PARSER_PROLOG;
8773 ctxt->checkIndex = 0;
8774#ifdef DEBUG_PUSH
8775 xmlGenericError(xmlGenericErrorContext,
8776 "PP: entering PROLOG\n");
8777#endif
8778 break;
8779 }
8780 case XML_PARSER_COMMENT:
8781 xmlGenericError(xmlGenericErrorContext,
8782 "PP: internal error, state == COMMENT\n");
8783 ctxt->instate = XML_PARSER_CONTENT;
8784#ifdef DEBUG_PUSH
8785 xmlGenericError(xmlGenericErrorContext,
8786 "PP: entering CONTENT\n");
8787#endif
8788 break;
8789 case XML_PARSER_PI:
8790 xmlGenericError(xmlGenericErrorContext,
8791 "PP: internal error, state == PI\n");
8792 ctxt->instate = XML_PARSER_CONTENT;
8793#ifdef DEBUG_PUSH
8794 xmlGenericError(xmlGenericErrorContext,
8795 "PP: entering CONTENT\n");
8796#endif
8797 break;
8798 case XML_PARSER_ENTITY_DECL:
8799 xmlGenericError(xmlGenericErrorContext,
8800 "PP: internal error, state == ENTITY_DECL\n");
8801 ctxt->instate = XML_PARSER_DTD;
8802#ifdef DEBUG_PUSH
8803 xmlGenericError(xmlGenericErrorContext,
8804 "PP: entering DTD\n");
8805#endif
8806 break;
8807 case XML_PARSER_ENTITY_VALUE:
8808 xmlGenericError(xmlGenericErrorContext,
8809 "PP: internal error, state == ENTITY_VALUE\n");
8810 ctxt->instate = XML_PARSER_CONTENT;
8811#ifdef DEBUG_PUSH
8812 xmlGenericError(xmlGenericErrorContext,
8813 "PP: entering DTD\n");
8814#endif
8815 break;
8816 case XML_PARSER_ATTRIBUTE_VALUE:
8817 xmlGenericError(xmlGenericErrorContext,
8818 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8819 ctxt->instate = XML_PARSER_START_TAG;
8820#ifdef DEBUG_PUSH
8821 xmlGenericError(xmlGenericErrorContext,
8822 "PP: entering START_TAG\n");
8823#endif
8824 break;
8825 case XML_PARSER_SYSTEM_LITERAL:
8826 xmlGenericError(xmlGenericErrorContext,
8827 "PP: internal error, state == SYSTEM_LITERAL\n");
8828 ctxt->instate = XML_PARSER_START_TAG;
8829#ifdef DEBUG_PUSH
8830 xmlGenericError(xmlGenericErrorContext,
8831 "PP: entering START_TAG\n");
8832#endif
8833 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008834 case XML_PARSER_PUBLIC_LITERAL:
8835 xmlGenericError(xmlGenericErrorContext,
8836 "PP: internal error, state == PUBLIC_LITERAL\n");
8837 ctxt->instate = XML_PARSER_START_TAG;
8838#ifdef DEBUG_PUSH
8839 xmlGenericError(xmlGenericErrorContext,
8840 "PP: entering START_TAG\n");
8841#endif
8842 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008843 }
8844 }
8845done:
8846#ifdef DEBUG_PUSH
8847 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8848#endif
8849 return(ret);
8850}
8851
8852/**
Owen Taylor3473f882001-02-23 17:55:21 +00008853 * xmlParseChunk:
8854 * @ctxt: an XML parser context
8855 * @chunk: an char array
8856 * @size: the size in byte of the chunk
8857 * @terminate: last chunk indicator
8858 *
8859 * Parse a Chunk of memory
8860 *
8861 * Returns zero if no error, the xmlParserErrors otherwise.
8862 */
8863int
8864xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8865 int terminate) {
8866 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8867 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8868 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8869 int cur = ctxt->input->cur - ctxt->input->base;
8870
8871 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8872 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8873 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008874 ctxt->input->end =
8875 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008876#ifdef DEBUG_PUSH
8877 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8878#endif
8879
8880 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8881 xmlParseTryOrFinish(ctxt, terminate);
8882 } else if (ctxt->instate != XML_PARSER_EOF) {
8883 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8884 xmlParserInputBufferPtr in = ctxt->input->buf;
8885 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8886 (in->raw != NULL)) {
8887 int nbchars;
8888
8889 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8890 if (nbchars < 0) {
8891 xmlGenericError(xmlGenericErrorContext,
8892 "xmlParseChunk: encoder error\n");
8893 return(XML_ERR_INVALID_ENCODING);
8894 }
8895 }
8896 }
8897 }
8898 xmlParseTryOrFinish(ctxt, terminate);
8899 if (terminate) {
8900 /*
8901 * Check for termination
8902 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00008903 int avail = 0;
8904 if (ctxt->input->buf == NULL)
8905 avail = ctxt->input->length -
8906 (ctxt->input->cur - ctxt->input->base);
8907 else
8908 avail = ctxt->input->buf->buffer->use -
8909 (ctxt->input->cur - ctxt->input->base);
8910
Owen Taylor3473f882001-02-23 17:55:21 +00008911 if ((ctxt->instate != XML_PARSER_EOF) &&
8912 (ctxt->instate != XML_PARSER_EPILOG)) {
8913 ctxt->errNo = XML_ERR_DOCUMENT_END;
8914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8915 ctxt->sax->error(ctxt->userData,
8916 "Extra content at the end of the document\n");
8917 ctxt->wellFormed = 0;
8918 ctxt->disableSAX = 1;
8919 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00008920 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
8921 ctxt->errNo = XML_ERR_DOCUMENT_END;
8922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8923 ctxt->sax->error(ctxt->userData,
8924 "Extra content at the end of the document\n");
8925 ctxt->wellFormed = 0;
8926 ctxt->disableSAX = 1;
8927
8928 }
Owen Taylor3473f882001-02-23 17:55:21 +00008929 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008930 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008931 ctxt->sax->endDocument(ctxt->userData);
8932 }
8933 ctxt->instate = XML_PARSER_EOF;
8934 }
8935 return((xmlParserErrors) ctxt->errNo);
8936}
8937
8938/************************************************************************
8939 * *
8940 * I/O front end functions to the parser *
8941 * *
8942 ************************************************************************/
8943
8944/**
8945 * xmlStopParser:
8946 * @ctxt: an XML parser context
8947 *
8948 * Blocks further parser processing
8949 */
8950void
8951xmlStopParser(xmlParserCtxtPtr ctxt) {
8952 ctxt->instate = XML_PARSER_EOF;
8953 if (ctxt->input != NULL)
8954 ctxt->input->cur = BAD_CAST"";
8955}
8956
8957/**
8958 * xmlCreatePushParserCtxt:
8959 * @sax: a SAX handler
8960 * @user_data: The user data returned on SAX callbacks
8961 * @chunk: a pointer to an array of chars
8962 * @size: number of chars in the array
8963 * @filename: an optional file name or URI
8964 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00008965 * Create a parser context for using the XML parser in push mode.
8966 * If @buffer and @size are non-NULL, the data is used to detect
8967 * the encoding. The remaining characters will be parsed so they
8968 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00008969 * To allow content encoding detection, @size should be >= 4
8970 * The value of @filename is used for fetching external entities
8971 * and error/warning reports.
8972 *
8973 * Returns the new parser context or NULL
8974 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00008975
Owen Taylor3473f882001-02-23 17:55:21 +00008976xmlParserCtxtPtr
8977xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8978 const char *chunk, int size, const char *filename) {
8979 xmlParserCtxtPtr ctxt;
8980 xmlParserInputPtr inputStream;
8981 xmlParserInputBufferPtr buf;
8982 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8983
8984 /*
8985 * plug some encoding conversion routines
8986 */
8987 if ((chunk != NULL) && (size >= 4))
8988 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8989
8990 buf = xmlAllocParserInputBuffer(enc);
8991 if (buf == NULL) return(NULL);
8992
8993 ctxt = xmlNewParserCtxt();
8994 if (ctxt == NULL) {
8995 xmlFree(buf);
8996 return(NULL);
8997 }
8998 if (sax != NULL) {
8999 if (ctxt->sax != &xmlDefaultSAXHandler)
9000 xmlFree(ctxt->sax);
9001 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9002 if (ctxt->sax == NULL) {
9003 xmlFree(buf);
9004 xmlFree(ctxt);
9005 return(NULL);
9006 }
9007 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9008 if (user_data != NULL)
9009 ctxt->userData = user_data;
9010 }
9011 if (filename == NULL) {
9012 ctxt->directory = NULL;
9013 } else {
9014 ctxt->directory = xmlParserGetDirectory(filename);
9015 }
9016
9017 inputStream = xmlNewInputStream(ctxt);
9018 if (inputStream == NULL) {
9019 xmlFreeParserCtxt(ctxt);
9020 return(NULL);
9021 }
9022
9023 if (filename == NULL)
9024 inputStream->filename = NULL;
9025 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009026 inputStream->filename = (char *)
9027 xmlNormalizeWindowsPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009028 inputStream->buf = buf;
9029 inputStream->base = inputStream->buf->buffer->content;
9030 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009031 inputStream->end =
9032 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009033
9034 inputPush(ctxt, inputStream);
9035
9036 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9037 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009038 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9039 int cur = ctxt->input->cur - ctxt->input->base;
9040
Owen Taylor3473f882001-02-23 17:55:21 +00009041 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009042
9043 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9044 ctxt->input->cur = ctxt->input->base + cur;
9045 ctxt->input->end =
9046 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009047#ifdef DEBUG_PUSH
9048 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9049#endif
9050 }
9051
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009052 if (enc != XML_CHAR_ENCODING_NONE) {
9053 xmlSwitchEncoding(ctxt, enc);
9054 }
9055
Owen Taylor3473f882001-02-23 17:55:21 +00009056 return(ctxt);
9057}
9058
9059/**
9060 * xmlCreateIOParserCtxt:
9061 * @sax: a SAX handler
9062 * @user_data: The user data returned on SAX callbacks
9063 * @ioread: an I/O read function
9064 * @ioclose: an I/O close function
9065 * @ioctx: an I/O handler
9066 * @enc: the charset encoding if known
9067 *
9068 * Create a parser context for using the XML parser with an existing
9069 * I/O stream
9070 *
9071 * Returns the new parser context or NULL
9072 */
9073xmlParserCtxtPtr
9074xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9075 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9076 void *ioctx, xmlCharEncoding enc) {
9077 xmlParserCtxtPtr ctxt;
9078 xmlParserInputPtr inputStream;
9079 xmlParserInputBufferPtr buf;
9080
9081 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9082 if (buf == NULL) return(NULL);
9083
9084 ctxt = xmlNewParserCtxt();
9085 if (ctxt == NULL) {
9086 xmlFree(buf);
9087 return(NULL);
9088 }
9089 if (sax != NULL) {
9090 if (ctxt->sax != &xmlDefaultSAXHandler)
9091 xmlFree(ctxt->sax);
9092 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9093 if (ctxt->sax == NULL) {
9094 xmlFree(buf);
9095 xmlFree(ctxt);
9096 return(NULL);
9097 }
9098 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9099 if (user_data != NULL)
9100 ctxt->userData = user_data;
9101 }
9102
9103 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9104 if (inputStream == NULL) {
9105 xmlFreeParserCtxt(ctxt);
9106 return(NULL);
9107 }
9108 inputPush(ctxt, inputStream);
9109
9110 return(ctxt);
9111}
9112
9113/************************************************************************
9114 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009115 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009116 * *
9117 ************************************************************************/
9118
9119/**
9120 * xmlIOParseDTD:
9121 * @sax: the SAX handler block or NULL
9122 * @input: an Input Buffer
9123 * @enc: the charset encoding if known
9124 *
9125 * Load and parse a DTD
9126 *
9127 * Returns the resulting xmlDtdPtr or NULL in case of error.
9128 * @input will be freed at parsing end.
9129 */
9130
9131xmlDtdPtr
9132xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9133 xmlCharEncoding enc) {
9134 xmlDtdPtr ret = NULL;
9135 xmlParserCtxtPtr ctxt;
9136 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009137 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009138
9139 if (input == NULL)
9140 return(NULL);
9141
9142 ctxt = xmlNewParserCtxt();
9143 if (ctxt == NULL) {
9144 return(NULL);
9145 }
9146
9147 /*
9148 * Set-up the SAX context
9149 */
9150 if (sax != NULL) {
9151 if (ctxt->sax != NULL)
9152 xmlFree(ctxt->sax);
9153 ctxt->sax = sax;
9154 ctxt->userData = NULL;
9155 }
9156
9157 /*
9158 * generate a parser input from the I/O handler
9159 */
9160
9161 pinput = xmlNewIOInputStream(ctxt, input, enc);
9162 if (pinput == NULL) {
9163 if (sax != NULL) ctxt->sax = NULL;
9164 xmlFreeParserCtxt(ctxt);
9165 return(NULL);
9166 }
9167
9168 /*
9169 * plug some encoding conversion routines here.
9170 */
9171 xmlPushInput(ctxt, pinput);
9172
9173 pinput->filename = NULL;
9174 pinput->line = 1;
9175 pinput->col = 1;
9176 pinput->base = ctxt->input->cur;
9177 pinput->cur = ctxt->input->cur;
9178 pinput->free = NULL;
9179
9180 /*
9181 * let's parse that entity knowing it's an external subset.
9182 */
9183 ctxt->inSubset = 2;
9184 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9185 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9186 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009187
9188 if (enc == XML_CHAR_ENCODING_NONE) {
9189 /*
9190 * Get the 4 first bytes and decode the charset
9191 * if enc != XML_CHAR_ENCODING_NONE
9192 * plug some encoding conversion routines.
9193 */
9194 start[0] = RAW;
9195 start[1] = NXT(1);
9196 start[2] = NXT(2);
9197 start[3] = NXT(3);
9198 enc = xmlDetectCharEncoding(start, 4);
9199 if (enc != XML_CHAR_ENCODING_NONE) {
9200 xmlSwitchEncoding(ctxt, enc);
9201 }
9202 }
9203
Owen Taylor3473f882001-02-23 17:55:21 +00009204 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9205
9206 if (ctxt->myDoc != NULL) {
9207 if (ctxt->wellFormed) {
9208 ret = ctxt->myDoc->extSubset;
9209 ctxt->myDoc->extSubset = NULL;
9210 } else {
9211 ret = NULL;
9212 }
9213 xmlFreeDoc(ctxt->myDoc);
9214 ctxt->myDoc = NULL;
9215 }
9216 if (sax != NULL) ctxt->sax = NULL;
9217 xmlFreeParserCtxt(ctxt);
9218
9219 return(ret);
9220}
9221
9222/**
9223 * xmlSAXParseDTD:
9224 * @sax: the SAX handler block
9225 * @ExternalID: a NAME* containing the External ID of the DTD
9226 * @SystemID: a NAME* containing the URL to the DTD
9227 *
9228 * Load and parse an external subset.
9229 *
9230 * Returns the resulting xmlDtdPtr or NULL in case of error.
9231 */
9232
9233xmlDtdPtr
9234xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9235 const xmlChar *SystemID) {
9236 xmlDtdPtr ret = NULL;
9237 xmlParserCtxtPtr ctxt;
9238 xmlParserInputPtr input = NULL;
9239 xmlCharEncoding enc;
9240
9241 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9242
9243 ctxt = xmlNewParserCtxt();
9244 if (ctxt == NULL) {
9245 return(NULL);
9246 }
9247
9248 /*
9249 * Set-up the SAX context
9250 */
9251 if (sax != NULL) {
9252 if (ctxt->sax != NULL)
9253 xmlFree(ctxt->sax);
9254 ctxt->sax = sax;
9255 ctxt->userData = NULL;
9256 }
9257
9258 /*
9259 * Ask the Entity resolver to load the damn thing
9260 */
9261
9262 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9263 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9264 if (input == NULL) {
9265 if (sax != NULL) ctxt->sax = NULL;
9266 xmlFreeParserCtxt(ctxt);
9267 return(NULL);
9268 }
9269
9270 /*
9271 * plug some encoding conversion routines here.
9272 */
9273 xmlPushInput(ctxt, input);
9274 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9275 xmlSwitchEncoding(ctxt, enc);
9276
9277 if (input->filename == NULL)
9278 input->filename = (char *) xmlStrdup(SystemID);
9279 input->line = 1;
9280 input->col = 1;
9281 input->base = ctxt->input->cur;
9282 input->cur = ctxt->input->cur;
9283 input->free = NULL;
9284
9285 /*
9286 * let's parse that entity knowing it's an external subset.
9287 */
9288 ctxt->inSubset = 2;
9289 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9290 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9291 ExternalID, SystemID);
9292 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9293
9294 if (ctxt->myDoc != NULL) {
9295 if (ctxt->wellFormed) {
9296 ret = ctxt->myDoc->extSubset;
9297 ctxt->myDoc->extSubset = NULL;
9298 } else {
9299 ret = NULL;
9300 }
9301 xmlFreeDoc(ctxt->myDoc);
9302 ctxt->myDoc = NULL;
9303 }
9304 if (sax != NULL) ctxt->sax = NULL;
9305 xmlFreeParserCtxt(ctxt);
9306
9307 return(ret);
9308}
9309
9310/**
9311 * xmlParseDTD:
9312 * @ExternalID: a NAME* containing the External ID of the DTD
9313 * @SystemID: a NAME* containing the URL to the DTD
9314 *
9315 * Load and parse an external subset.
9316 *
9317 * Returns the resulting xmlDtdPtr or NULL in case of error.
9318 */
9319
9320xmlDtdPtr
9321xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9322 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9323}
9324
9325/************************************************************************
9326 * *
9327 * Front ends when parsing an Entity *
9328 * *
9329 ************************************************************************/
9330
9331/**
Owen Taylor3473f882001-02-23 17:55:21 +00009332 * xmlParseCtxtExternalEntity:
9333 * @ctx: the existing parsing context
9334 * @URL: the URL for the entity to load
9335 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009336 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009337 *
9338 * Parse an external general entity within an existing parsing context
9339 * An external general parsed entity is well-formed if it matches the
9340 * production labeled extParsedEnt.
9341 *
9342 * [78] extParsedEnt ::= TextDecl? content
9343 *
9344 * Returns 0 if the entity is well formed, -1 in case of args problem and
9345 * the parser error code otherwise
9346 */
9347
9348int
9349xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009350 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009351 xmlParserCtxtPtr ctxt;
9352 xmlDocPtr newDoc;
9353 xmlSAXHandlerPtr oldsax = NULL;
9354 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009355 xmlChar start[4];
9356 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009357
9358 if (ctx->depth > 40) {
9359 return(XML_ERR_ENTITY_LOOP);
9360 }
9361
Daniel Veillardcda96922001-08-21 10:56:31 +00009362 if (lst != NULL)
9363 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009364 if ((URL == NULL) && (ID == NULL))
9365 return(-1);
9366 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9367 return(-1);
9368
9369
9370 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9371 if (ctxt == NULL) return(-1);
9372 ctxt->userData = ctxt;
9373 oldsax = ctxt->sax;
9374 ctxt->sax = ctx->sax;
9375 newDoc = xmlNewDoc(BAD_CAST "1.0");
9376 if (newDoc == NULL) {
9377 xmlFreeParserCtxt(ctxt);
9378 return(-1);
9379 }
9380 if (ctx->myDoc != NULL) {
9381 newDoc->intSubset = ctx->myDoc->intSubset;
9382 newDoc->extSubset = ctx->myDoc->extSubset;
9383 }
9384 if (ctx->myDoc->URL != NULL) {
9385 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9386 }
9387 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9388 if (newDoc->children == NULL) {
9389 ctxt->sax = oldsax;
9390 xmlFreeParserCtxt(ctxt);
9391 newDoc->intSubset = NULL;
9392 newDoc->extSubset = NULL;
9393 xmlFreeDoc(newDoc);
9394 return(-1);
9395 }
9396 nodePush(ctxt, newDoc->children);
9397 if (ctx->myDoc == NULL) {
9398 ctxt->myDoc = newDoc;
9399 } else {
9400 ctxt->myDoc = ctx->myDoc;
9401 newDoc->children->doc = ctx->myDoc;
9402 }
9403
Daniel Veillard87a764e2001-06-20 17:41:10 +00009404 /*
9405 * Get the 4 first bytes and decode the charset
9406 * if enc != XML_CHAR_ENCODING_NONE
9407 * plug some encoding conversion routines.
9408 */
9409 GROW
9410 start[0] = RAW;
9411 start[1] = NXT(1);
9412 start[2] = NXT(2);
9413 start[3] = NXT(3);
9414 enc = xmlDetectCharEncoding(start, 4);
9415 if (enc != XML_CHAR_ENCODING_NONE) {
9416 xmlSwitchEncoding(ctxt, enc);
9417 }
9418
Owen Taylor3473f882001-02-23 17:55:21 +00009419 /*
9420 * Parse a possible text declaration first
9421 */
Owen Taylor3473f882001-02-23 17:55:21 +00009422 if ((RAW == '<') && (NXT(1) == '?') &&
9423 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9424 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9425 xmlParseTextDecl(ctxt);
9426 }
9427
9428 /*
9429 * Doing validity checking on chunk doesn't make sense
9430 */
9431 ctxt->instate = XML_PARSER_CONTENT;
9432 ctxt->validate = ctx->validate;
9433 ctxt->loadsubset = ctx->loadsubset;
9434 ctxt->depth = ctx->depth + 1;
9435 ctxt->replaceEntities = ctx->replaceEntities;
9436 if (ctxt->validate) {
9437 ctxt->vctxt.error = ctx->vctxt.error;
9438 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009439 } else {
9440 ctxt->vctxt.error = NULL;
9441 ctxt->vctxt.warning = NULL;
9442 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009443 ctxt->vctxt.nodeTab = NULL;
9444 ctxt->vctxt.nodeNr = 0;
9445 ctxt->vctxt.nodeMax = 0;
9446 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009447
9448 xmlParseContent(ctxt);
9449
9450 if ((RAW == '<') && (NXT(1) == '/')) {
9451 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9453 ctxt->sax->error(ctxt->userData,
9454 "chunk is not well balanced\n");
9455 ctxt->wellFormed = 0;
9456 ctxt->disableSAX = 1;
9457 } else if (RAW != 0) {
9458 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9460 ctxt->sax->error(ctxt->userData,
9461 "extra content at the end of well balanced chunk\n");
9462 ctxt->wellFormed = 0;
9463 ctxt->disableSAX = 1;
9464 }
9465 if (ctxt->node != newDoc->children) {
9466 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9468 ctxt->sax->error(ctxt->userData,
9469 "chunk is not well balanced\n");
9470 ctxt->wellFormed = 0;
9471 ctxt->disableSAX = 1;
9472 }
9473
9474 if (!ctxt->wellFormed) {
9475 if (ctxt->errNo == 0)
9476 ret = 1;
9477 else
9478 ret = ctxt->errNo;
9479 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009480 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009481 xmlNodePtr cur;
9482
9483 /*
9484 * Return the newly created nodeset after unlinking it from
9485 * they pseudo parent.
9486 */
9487 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009488 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009489 while (cur != NULL) {
9490 cur->parent = NULL;
9491 cur = cur->next;
9492 }
9493 newDoc->children->children = NULL;
9494 }
9495 ret = 0;
9496 }
9497 ctxt->sax = oldsax;
9498 xmlFreeParserCtxt(ctxt);
9499 newDoc->intSubset = NULL;
9500 newDoc->extSubset = NULL;
9501 xmlFreeDoc(newDoc);
9502
9503 return(ret);
9504}
9505
9506/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009507 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009508 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009509 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009510 * @sax: the SAX handler bloc (possibly NULL)
9511 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9512 * @depth: Used for loop detection, use 0
9513 * @URL: the URL for the entity to load
9514 * @ID: the System ID for the entity to load
9515 * @list: the return value for the set of parsed nodes
9516 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009517 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009518 *
9519 * Returns 0 if the entity is well formed, -1 in case of args problem and
9520 * the parser error code otherwise
9521 */
9522
Daniel Veillard257d9102001-05-08 10:41:44 +00009523static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009524xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9525 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009526 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009527 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009528 xmlParserCtxtPtr ctxt;
9529 xmlDocPtr newDoc;
9530 xmlSAXHandlerPtr oldsax = NULL;
9531 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009532 xmlChar start[4];
9533 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009534
9535 if (depth > 40) {
9536 return(XML_ERR_ENTITY_LOOP);
9537 }
9538
9539
9540
9541 if (list != NULL)
9542 *list = NULL;
9543 if ((URL == NULL) && (ID == NULL))
9544 return(-1);
9545 if (doc == NULL) /* @@ relax but check for dereferences */
9546 return(-1);
9547
9548
9549 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9550 if (ctxt == NULL) return(-1);
9551 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009552 if (oldctxt != NULL) {
9553 ctxt->_private = oldctxt->_private;
9554 ctxt->loadsubset = oldctxt->loadsubset;
9555 ctxt->validate = oldctxt->validate;
9556 ctxt->external = oldctxt->external;
9557 } else {
9558 /*
9559 * Doing validity checking on chunk without context
9560 * doesn't make sense
9561 */
9562 ctxt->_private = NULL;
9563 ctxt->validate = 0;
9564 ctxt->external = 2;
9565 ctxt->loadsubset = 0;
9566 }
Owen Taylor3473f882001-02-23 17:55:21 +00009567 if (sax != NULL) {
9568 oldsax = ctxt->sax;
9569 ctxt->sax = sax;
9570 if (user_data != NULL)
9571 ctxt->userData = user_data;
9572 }
9573 newDoc = xmlNewDoc(BAD_CAST "1.0");
9574 if (newDoc == NULL) {
9575 xmlFreeParserCtxt(ctxt);
9576 return(-1);
9577 }
9578 if (doc != NULL) {
9579 newDoc->intSubset = doc->intSubset;
9580 newDoc->extSubset = doc->extSubset;
9581 }
9582 if (doc->URL != NULL) {
9583 newDoc->URL = xmlStrdup(doc->URL);
9584 }
9585 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9586 if (newDoc->children == NULL) {
9587 if (sax != NULL)
9588 ctxt->sax = oldsax;
9589 xmlFreeParserCtxt(ctxt);
9590 newDoc->intSubset = NULL;
9591 newDoc->extSubset = NULL;
9592 xmlFreeDoc(newDoc);
9593 return(-1);
9594 }
9595 nodePush(ctxt, newDoc->children);
9596 if (doc == NULL) {
9597 ctxt->myDoc = newDoc;
9598 } else {
9599 ctxt->myDoc = doc;
9600 newDoc->children->doc = doc;
9601 }
9602
Daniel Veillard87a764e2001-06-20 17:41:10 +00009603 /*
9604 * Get the 4 first bytes and decode the charset
9605 * if enc != XML_CHAR_ENCODING_NONE
9606 * plug some encoding conversion routines.
9607 */
9608 GROW;
9609 start[0] = RAW;
9610 start[1] = NXT(1);
9611 start[2] = NXT(2);
9612 start[3] = NXT(3);
9613 enc = xmlDetectCharEncoding(start, 4);
9614 if (enc != XML_CHAR_ENCODING_NONE) {
9615 xmlSwitchEncoding(ctxt, enc);
9616 }
9617
Owen Taylor3473f882001-02-23 17:55:21 +00009618 /*
9619 * Parse a possible text declaration first
9620 */
Owen Taylor3473f882001-02-23 17:55:21 +00009621 if ((RAW == '<') && (NXT(1) == '?') &&
9622 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9623 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9624 xmlParseTextDecl(ctxt);
9625 }
9626
Owen Taylor3473f882001-02-23 17:55:21 +00009627 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009628 ctxt->depth = depth;
9629
9630 xmlParseContent(ctxt);
9631
Daniel Veillard561b7f82002-03-20 21:55:57 +00009632 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009633 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9635 ctxt->sax->error(ctxt->userData,
9636 "chunk is not well balanced\n");
9637 ctxt->wellFormed = 0;
9638 ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009639 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009640 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9642 ctxt->sax->error(ctxt->userData,
9643 "extra content at the end of well balanced chunk\n");
9644 ctxt->wellFormed = 0;
9645 ctxt->disableSAX = 1;
9646 }
9647 if (ctxt->node != newDoc->children) {
9648 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9650 ctxt->sax->error(ctxt->userData,
9651 "chunk is not well balanced\n");
9652 ctxt->wellFormed = 0;
9653 ctxt->disableSAX = 1;
9654 }
9655
9656 if (!ctxt->wellFormed) {
9657 if (ctxt->errNo == 0)
9658 ret = 1;
9659 else
9660 ret = ctxt->errNo;
9661 } else {
9662 if (list != NULL) {
9663 xmlNodePtr cur;
9664
9665 /*
9666 * Return the newly created nodeset after unlinking it from
9667 * they pseudo parent.
9668 */
9669 cur = newDoc->children->children;
9670 *list = cur;
9671 while (cur != NULL) {
9672 cur->parent = NULL;
9673 cur = cur->next;
9674 }
9675 newDoc->children->children = NULL;
9676 }
9677 ret = 0;
9678 }
9679 if (sax != NULL)
9680 ctxt->sax = oldsax;
9681 xmlFreeParserCtxt(ctxt);
9682 newDoc->intSubset = NULL;
9683 newDoc->extSubset = NULL;
9684 xmlFreeDoc(newDoc);
9685
9686 return(ret);
9687}
9688
9689/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009690 * xmlParseExternalEntity:
9691 * @doc: the document the chunk pertains to
9692 * @sax: the SAX handler bloc (possibly NULL)
9693 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9694 * @depth: Used for loop detection, use 0
9695 * @URL: the URL for the entity to load
9696 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009697 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009698 *
9699 * Parse an external general entity
9700 * An external general parsed entity is well-formed if it matches the
9701 * production labeled extParsedEnt.
9702 *
9703 * [78] extParsedEnt ::= TextDecl? content
9704 *
9705 * Returns 0 if the entity is well formed, -1 in case of args problem and
9706 * the parser error code otherwise
9707 */
9708
9709int
9710xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009711 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009712 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009713 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009714}
9715
9716/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009717 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009718 * @doc: the document the chunk pertains to
9719 * @sax: the SAX handler bloc (possibly NULL)
9720 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9721 * @depth: Used for loop detection, use 0
9722 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009723 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009724 *
9725 * Parse a well-balanced chunk of an XML document
9726 * called by the parser
9727 * The allowed sequence for the Well Balanced Chunk is the one defined by
9728 * the content production in the XML grammar:
9729 *
9730 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9731 *
9732 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9733 * the parser error code otherwise
9734 */
9735
9736int
9737xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009738 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009739 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9740 depth, string, lst, 0 );
9741}
9742
9743/**
9744 * xmlParseBalancedChunkMemoryRecover:
9745 * @doc: the document the chunk pertains to
9746 * @sax: the SAX handler bloc (possibly NULL)
9747 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9748 * @depth: Used for loop detection, use 0
9749 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9750 * @lst: the return value for the set of parsed nodes
9751 * @recover: return nodes even if the data is broken (use 0)
9752 *
9753 *
9754 * Parse a well-balanced chunk of an XML document
9755 * called by the parser
9756 * The allowed sequence for the Well Balanced Chunk is the one defined by
9757 * the content production in the XML grammar:
9758 *
9759 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9760 *
9761 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9762 * the parser error code otherwise
9763 *
9764 * In case recover is set to 1, the nodelist will not be empty even if
9765 * the parsed chunk is not well balanced.
9766 */
9767int
9768xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9769 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
9770 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +00009771 xmlParserCtxtPtr ctxt;
9772 xmlDocPtr newDoc;
9773 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +00009774 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +00009775 int size;
9776 int ret = 0;
9777
9778 if (depth > 40) {
9779 return(XML_ERR_ENTITY_LOOP);
9780 }
9781
9782
Daniel Veillardcda96922001-08-21 10:56:31 +00009783 if (lst != NULL)
9784 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009785 if (string == NULL)
9786 return(-1);
9787
9788 size = xmlStrlen(string);
9789
9790 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9791 if (ctxt == NULL) return(-1);
9792 ctxt->userData = ctxt;
9793 if (sax != NULL) {
9794 oldsax = ctxt->sax;
9795 ctxt->sax = sax;
9796 if (user_data != NULL)
9797 ctxt->userData = user_data;
9798 }
9799 newDoc = xmlNewDoc(BAD_CAST "1.0");
9800 if (newDoc == NULL) {
9801 xmlFreeParserCtxt(ctxt);
9802 return(-1);
9803 }
9804 if (doc != NULL) {
9805 newDoc->intSubset = doc->intSubset;
9806 newDoc->extSubset = doc->extSubset;
9807 }
9808 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9809 if (newDoc->children == NULL) {
9810 if (sax != NULL)
9811 ctxt->sax = oldsax;
9812 xmlFreeParserCtxt(ctxt);
9813 newDoc->intSubset = NULL;
9814 newDoc->extSubset = NULL;
9815 xmlFreeDoc(newDoc);
9816 return(-1);
9817 }
9818 nodePush(ctxt, newDoc->children);
9819 if (doc == NULL) {
9820 ctxt->myDoc = newDoc;
9821 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +00009822 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +00009823 newDoc->children->doc = doc;
9824 }
9825 ctxt->instate = XML_PARSER_CONTENT;
9826 ctxt->depth = depth;
9827
9828 /*
9829 * Doing validity checking on chunk doesn't make sense
9830 */
9831 ctxt->validate = 0;
9832 ctxt->loadsubset = 0;
9833
Daniel Veillard935494a2002-10-22 14:22:46 +00009834 content = doc->children;
9835 doc->children = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009836 xmlParseContent(ctxt);
Daniel Veillard935494a2002-10-22 14:22:46 +00009837 doc->children = content;
Owen Taylor3473f882001-02-23 17:55:21 +00009838
9839 if ((RAW == '<') && (NXT(1) == '/')) {
9840 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9842 ctxt->sax->error(ctxt->userData,
9843 "chunk is not well balanced\n");
9844 ctxt->wellFormed = 0;
9845 ctxt->disableSAX = 1;
9846 } else if (RAW != 0) {
9847 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9849 ctxt->sax->error(ctxt->userData,
9850 "extra content at the end of well balanced chunk\n");
9851 ctxt->wellFormed = 0;
9852 ctxt->disableSAX = 1;
9853 }
9854 if (ctxt->node != newDoc->children) {
9855 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9857 ctxt->sax->error(ctxt->userData,
9858 "chunk is not well balanced\n");
9859 ctxt->wellFormed = 0;
9860 ctxt->disableSAX = 1;
9861 }
9862
9863 if (!ctxt->wellFormed) {
9864 if (ctxt->errNo == 0)
9865 ret = 1;
9866 else
9867 ret = ctxt->errNo;
9868 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009869 ret = 0;
9870 }
9871
9872 if (lst != NULL && (ret == 0 || recover == 1)) {
9873 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009874
9875 /*
9876 * Return the newly created nodeset after unlinking it from
9877 * they pseudo parent.
9878 */
9879 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009880 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009881 while (cur != NULL) {
9882 cur->parent = NULL;
9883 cur = cur->next;
9884 }
9885 newDoc->children->children = NULL;
9886 }
Daniel Veillard58e44c92002-08-02 22:19:49 +00009887
Owen Taylor3473f882001-02-23 17:55:21 +00009888 if (sax != NULL)
9889 ctxt->sax = oldsax;
9890 xmlFreeParserCtxt(ctxt);
9891 newDoc->intSubset = NULL;
9892 newDoc->extSubset = NULL;
9893 xmlFreeDoc(newDoc);
9894
9895 return(ret);
9896}
9897
9898/**
9899 * xmlSAXParseEntity:
9900 * @sax: the SAX handler block
9901 * @filename: the filename
9902 *
9903 * parse an XML external entity out of context and build a tree.
9904 * It use the given SAX function block to handle the parsing callback.
9905 * If sax is NULL, fallback to the default DOM tree building routines.
9906 *
9907 * [78] extParsedEnt ::= TextDecl? content
9908 *
9909 * This correspond to a "Well Balanced" chunk
9910 *
9911 * Returns the resulting document tree
9912 */
9913
9914xmlDocPtr
9915xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9916 xmlDocPtr ret;
9917 xmlParserCtxtPtr ctxt;
9918 char *directory = NULL;
9919
9920 ctxt = xmlCreateFileParserCtxt(filename);
9921 if (ctxt == NULL) {
9922 return(NULL);
9923 }
9924 if (sax != NULL) {
9925 if (ctxt->sax != NULL)
9926 xmlFree(ctxt->sax);
9927 ctxt->sax = sax;
9928 ctxt->userData = NULL;
9929 }
9930
9931 if ((ctxt->directory == NULL) && (directory == NULL))
9932 directory = xmlParserGetDirectory(filename);
9933
9934 xmlParseExtParsedEnt(ctxt);
9935
9936 if (ctxt->wellFormed)
9937 ret = ctxt->myDoc;
9938 else {
9939 ret = NULL;
9940 xmlFreeDoc(ctxt->myDoc);
9941 ctxt->myDoc = NULL;
9942 }
9943 if (sax != NULL)
9944 ctxt->sax = NULL;
9945 xmlFreeParserCtxt(ctxt);
9946
9947 return(ret);
9948}
9949
9950/**
9951 * xmlParseEntity:
9952 * @filename: the filename
9953 *
9954 * parse an XML external entity out of context and build a tree.
9955 *
9956 * [78] extParsedEnt ::= TextDecl? content
9957 *
9958 * This correspond to a "Well Balanced" chunk
9959 *
9960 * Returns the resulting document tree
9961 */
9962
9963xmlDocPtr
9964xmlParseEntity(const char *filename) {
9965 return(xmlSAXParseEntity(NULL, filename));
9966}
9967
9968/**
9969 * xmlCreateEntityParserCtxt:
9970 * @URL: the entity URL
9971 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009972 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009973 *
9974 * Create a parser context for an external entity
9975 * Automatic support for ZLIB/Compress compressed document is provided
9976 * by default if found at compile-time.
9977 *
9978 * Returns the new parser context or NULL
9979 */
9980xmlParserCtxtPtr
9981xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9982 const xmlChar *base) {
9983 xmlParserCtxtPtr ctxt;
9984 xmlParserInputPtr inputStream;
9985 char *directory = NULL;
9986 xmlChar *uri;
9987
9988 ctxt = xmlNewParserCtxt();
9989 if (ctxt == NULL) {
9990 return(NULL);
9991 }
9992
9993 uri = xmlBuildURI(URL, base);
9994
9995 if (uri == NULL) {
9996 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9997 if (inputStream == NULL) {
9998 xmlFreeParserCtxt(ctxt);
9999 return(NULL);
10000 }
10001
10002 inputPush(ctxt, inputStream);
10003
10004 if ((ctxt->directory == NULL) && (directory == NULL))
10005 directory = xmlParserGetDirectory((char *)URL);
10006 if ((ctxt->directory == NULL) && (directory != NULL))
10007 ctxt->directory = directory;
10008 } else {
10009 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10010 if (inputStream == NULL) {
10011 xmlFree(uri);
10012 xmlFreeParserCtxt(ctxt);
10013 return(NULL);
10014 }
10015
10016 inputPush(ctxt, inputStream);
10017
10018 if ((ctxt->directory == NULL) && (directory == NULL))
10019 directory = xmlParserGetDirectory((char *)uri);
10020 if ((ctxt->directory == NULL) && (directory != NULL))
10021 ctxt->directory = directory;
10022 xmlFree(uri);
10023 }
10024
10025 return(ctxt);
10026}
10027
10028/************************************************************************
10029 * *
10030 * Front ends when parsing from a file *
10031 * *
10032 ************************************************************************/
10033
10034/**
10035 * xmlCreateFileParserCtxt:
10036 * @filename: the filename
10037 *
10038 * Create a parser context for a file content.
10039 * Automatic support for ZLIB/Compress compressed document is provided
10040 * by default if found at compile-time.
10041 *
10042 * Returns the new parser context or NULL
10043 */
10044xmlParserCtxtPtr
10045xmlCreateFileParserCtxt(const char *filename)
10046{
10047 xmlParserCtxtPtr ctxt;
10048 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010049 char *directory = NULL;
Daniel Veillardf4862f02002-09-10 11:13:43 +000010050 xmlChar *normalized;
Owen Taylor3473f882001-02-23 17:55:21 +000010051
Owen Taylor3473f882001-02-23 17:55:21 +000010052 ctxt = xmlNewParserCtxt();
10053 if (ctxt == NULL) {
10054 if (xmlDefaultSAXHandler.error != NULL) {
10055 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10056 }
10057 return(NULL);
10058 }
10059
Daniel Veillardf4862f02002-09-10 11:13:43 +000010060 normalized = xmlNormalizeWindowsPath((const xmlChar *) filename);
10061 if (normalized == NULL) {
10062 xmlFreeParserCtxt(ctxt);
10063 return(NULL);
10064 }
10065 inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010066 if (inputStream == NULL) {
10067 xmlFreeParserCtxt(ctxt);
Daniel Veillardf4862f02002-09-10 11:13:43 +000010068 xmlFree(normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010069 return(NULL);
10070 }
10071
Owen Taylor3473f882001-02-23 17:55:21 +000010072 inputPush(ctxt, inputStream);
10073 if ((ctxt->directory == NULL) && (directory == NULL))
Daniel Veillardf4862f02002-09-10 11:13:43 +000010074 directory = xmlParserGetDirectory((char *) normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010075 if ((ctxt->directory == NULL) && (directory != NULL))
10076 ctxt->directory = directory;
10077
Daniel Veillardf4862f02002-09-10 11:13:43 +000010078 xmlFree(normalized);
10079
Owen Taylor3473f882001-02-23 17:55:21 +000010080 return(ctxt);
10081}
10082
10083/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010084 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010085 * @sax: the SAX handler block
10086 * @filename: the filename
10087 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10088 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010089 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010090 *
10091 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10092 * compressed document is provided by default if found at compile-time.
10093 * It use the given SAX function block to handle the parsing callback.
10094 * If sax is NULL, fallback to the default DOM tree building routines.
10095 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010096 * User data (void *) is stored within the parser context in the
10097 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010098 *
Owen Taylor3473f882001-02-23 17:55:21 +000010099 * Returns the resulting document tree
10100 */
10101
10102xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010103xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10104 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010105 xmlDocPtr ret;
10106 xmlParserCtxtPtr ctxt;
10107 char *directory = NULL;
10108
Daniel Veillard635ef722001-10-29 11:48:19 +000010109 xmlInitParser();
10110
Owen Taylor3473f882001-02-23 17:55:21 +000010111 ctxt = xmlCreateFileParserCtxt(filename);
10112 if (ctxt == NULL) {
10113 return(NULL);
10114 }
10115 if (sax != NULL) {
10116 if (ctxt->sax != NULL)
10117 xmlFree(ctxt->sax);
10118 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010119 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010120 if (data!=NULL) {
10121 ctxt->_private=data;
10122 }
Owen Taylor3473f882001-02-23 17:55:21 +000010123
10124 if ((ctxt->directory == NULL) && (directory == NULL))
10125 directory = xmlParserGetDirectory(filename);
10126 if ((ctxt->directory == NULL) && (directory != NULL))
10127 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10128
10129 xmlParseDocument(ctxt);
10130
10131 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10132 else {
10133 ret = NULL;
10134 xmlFreeDoc(ctxt->myDoc);
10135 ctxt->myDoc = NULL;
10136 }
10137 if (sax != NULL)
10138 ctxt->sax = NULL;
10139 xmlFreeParserCtxt(ctxt);
10140
10141 return(ret);
10142}
10143
10144/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010145 * xmlSAXParseFile:
10146 * @sax: the SAX handler block
10147 * @filename: the filename
10148 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10149 * documents
10150 *
10151 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10152 * compressed document is provided by default if found at compile-time.
10153 * It use the given SAX function block to handle the parsing callback.
10154 * If sax is NULL, fallback to the default DOM tree building routines.
10155 *
10156 * Returns the resulting document tree
10157 */
10158
10159xmlDocPtr
10160xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10161 int recovery) {
10162 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10163}
10164
10165/**
Owen Taylor3473f882001-02-23 17:55:21 +000010166 * xmlRecoverDoc:
10167 * @cur: a pointer to an array of xmlChar
10168 *
10169 * parse an XML in-memory document and build a tree.
10170 * In the case the document is not Well Formed, a tree is built anyway
10171 *
10172 * Returns the resulting document tree
10173 */
10174
10175xmlDocPtr
10176xmlRecoverDoc(xmlChar *cur) {
10177 return(xmlSAXParseDoc(NULL, cur, 1));
10178}
10179
10180/**
10181 * xmlParseFile:
10182 * @filename: the filename
10183 *
10184 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10185 * compressed document is provided by default if found at compile-time.
10186 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010187 * Returns the resulting document tree if the file was wellformed,
10188 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010189 */
10190
10191xmlDocPtr
10192xmlParseFile(const char *filename) {
10193 return(xmlSAXParseFile(NULL, filename, 0));
10194}
10195
10196/**
10197 * xmlRecoverFile:
10198 * @filename: the filename
10199 *
10200 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10201 * compressed document is provided by default if found at compile-time.
10202 * In the case the document is not Well Formed, a tree is built anyway
10203 *
10204 * Returns the resulting document tree
10205 */
10206
10207xmlDocPtr
10208xmlRecoverFile(const char *filename) {
10209 return(xmlSAXParseFile(NULL, filename, 1));
10210}
10211
10212
10213/**
10214 * xmlSetupParserForBuffer:
10215 * @ctxt: an XML parser context
10216 * @buffer: a xmlChar * buffer
10217 * @filename: a file name
10218 *
10219 * Setup the parser context to parse a new buffer; Clears any prior
10220 * contents from the parser context. The buffer parameter must not be
10221 * NULL, but the filename parameter can be
10222 */
10223void
10224xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10225 const char* filename)
10226{
10227 xmlParserInputPtr input;
10228
10229 input = xmlNewInputStream(ctxt);
10230 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010231 xmlGenericError(xmlGenericErrorContext,
10232 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010233 xmlFree(ctxt);
10234 return;
10235 }
10236
10237 xmlClearParserCtxt(ctxt);
10238 if (filename != NULL)
10239 input->filename = xmlMemStrdup(filename);
10240 input->base = buffer;
10241 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010242 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010243 inputPush(ctxt, input);
10244}
10245
10246/**
10247 * xmlSAXUserParseFile:
10248 * @sax: a SAX handler
10249 * @user_data: The user data returned on SAX callbacks
10250 * @filename: a file name
10251 *
10252 * parse an XML file and call the given SAX handler routines.
10253 * Automatic support for ZLIB/Compress compressed document is provided
10254 *
10255 * Returns 0 in case of success or a error number otherwise
10256 */
10257int
10258xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10259 const char *filename) {
10260 int ret = 0;
10261 xmlParserCtxtPtr ctxt;
10262
10263 ctxt = xmlCreateFileParserCtxt(filename);
10264 if (ctxt == NULL) return -1;
10265 if (ctxt->sax != &xmlDefaultSAXHandler)
10266 xmlFree(ctxt->sax);
10267 ctxt->sax = sax;
10268 if (user_data != NULL)
10269 ctxt->userData = user_data;
10270
10271 xmlParseDocument(ctxt);
10272
10273 if (ctxt->wellFormed)
10274 ret = 0;
10275 else {
10276 if (ctxt->errNo != 0)
10277 ret = ctxt->errNo;
10278 else
10279 ret = -1;
10280 }
10281 if (sax != NULL)
10282 ctxt->sax = NULL;
10283 xmlFreeParserCtxt(ctxt);
10284
10285 return ret;
10286}
10287
10288/************************************************************************
10289 * *
10290 * Front ends when parsing from memory *
10291 * *
10292 ************************************************************************/
10293
10294/**
10295 * xmlCreateMemoryParserCtxt:
10296 * @buffer: a pointer to a char array
10297 * @size: the size of the array
10298 *
10299 * Create a parser context for an XML in-memory document.
10300 *
10301 * Returns the new parser context or NULL
10302 */
10303xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010304xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010305 xmlParserCtxtPtr ctxt;
10306 xmlParserInputPtr input;
10307 xmlParserInputBufferPtr buf;
10308
10309 if (buffer == NULL)
10310 return(NULL);
10311 if (size <= 0)
10312 return(NULL);
10313
10314 ctxt = xmlNewParserCtxt();
10315 if (ctxt == NULL)
10316 return(NULL);
10317
10318 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10319 if (buf == NULL) return(NULL);
10320
10321 input = xmlNewInputStream(ctxt);
10322 if (input == NULL) {
10323 xmlFreeParserCtxt(ctxt);
10324 return(NULL);
10325 }
10326
10327 input->filename = NULL;
10328 input->buf = buf;
10329 input->base = input->buf->buffer->content;
10330 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010331 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010332
10333 inputPush(ctxt, input);
10334 return(ctxt);
10335}
10336
10337/**
10338 * xmlSAXParseMemory:
10339 * @sax: the SAX handler block
10340 * @buffer: an pointer to a char array
10341 * @size: the size of the array
10342 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10343 * documents
10344 *
10345 * parse an XML in-memory block and use the given SAX function block
10346 * to handle the parsing callback. If sax is NULL, fallback to the default
10347 * DOM tree building routines.
10348 *
10349 * Returns the resulting document tree
10350 */
10351xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010352xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10353 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010354 xmlDocPtr ret;
10355 xmlParserCtxtPtr ctxt;
10356
10357 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10358 if (ctxt == NULL) return(NULL);
10359 if (sax != NULL) {
10360 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010361 }
10362
10363 xmlParseDocument(ctxt);
10364
10365 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10366 else {
10367 ret = NULL;
10368 xmlFreeDoc(ctxt->myDoc);
10369 ctxt->myDoc = NULL;
10370 }
10371 if (sax != NULL)
10372 ctxt->sax = NULL;
10373 xmlFreeParserCtxt(ctxt);
10374
10375 return(ret);
10376}
10377
10378/**
10379 * xmlParseMemory:
10380 * @buffer: an pointer to a char array
10381 * @size: the size of the array
10382 *
10383 * parse an XML in-memory block and build a tree.
10384 *
10385 * Returns the resulting document tree
10386 */
10387
Daniel Veillard50822cb2001-07-26 20:05:51 +000010388xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010389 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10390}
10391
10392/**
10393 * xmlRecoverMemory:
10394 * @buffer: an pointer to a char array
10395 * @size: the size of the array
10396 *
10397 * parse an XML in-memory block and build a tree.
10398 * In the case the document is not Well Formed, a tree is built anyway
10399 *
10400 * Returns the resulting document tree
10401 */
10402
Daniel Veillard50822cb2001-07-26 20:05:51 +000010403xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010404 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10405}
10406
10407/**
10408 * xmlSAXUserParseMemory:
10409 * @sax: a SAX handler
10410 * @user_data: The user data returned on SAX callbacks
10411 * @buffer: an in-memory XML document input
10412 * @size: the length of the XML document in bytes
10413 *
10414 * A better SAX parsing routine.
10415 * parse an XML in-memory buffer and call the given SAX handler routines.
10416 *
10417 * Returns 0 in case of success or a error number otherwise
10418 */
10419int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010420 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010421 int ret = 0;
10422 xmlParserCtxtPtr ctxt;
10423 xmlSAXHandlerPtr oldsax = NULL;
10424
Daniel Veillard9e923512002-08-14 08:48:52 +000010425 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010426 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10427 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010428 oldsax = ctxt->sax;
10429 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010430 if (user_data != NULL)
10431 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010432
10433 xmlParseDocument(ctxt);
10434
10435 if (ctxt->wellFormed)
10436 ret = 0;
10437 else {
10438 if (ctxt->errNo != 0)
10439 ret = ctxt->errNo;
10440 else
10441 ret = -1;
10442 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010443 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010444 xmlFreeParserCtxt(ctxt);
10445
10446 return ret;
10447}
10448
10449/**
10450 * xmlCreateDocParserCtxt:
10451 * @cur: a pointer to an array of xmlChar
10452 *
10453 * Creates a parser context for an XML in-memory document.
10454 *
10455 * Returns the new parser context or NULL
10456 */
10457xmlParserCtxtPtr
10458xmlCreateDocParserCtxt(xmlChar *cur) {
10459 int len;
10460
10461 if (cur == NULL)
10462 return(NULL);
10463 len = xmlStrlen(cur);
10464 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10465}
10466
10467/**
10468 * xmlSAXParseDoc:
10469 * @sax: the SAX handler block
10470 * @cur: a pointer to an array of xmlChar
10471 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10472 * documents
10473 *
10474 * parse an XML in-memory document and build a tree.
10475 * It use the given SAX function block to handle the parsing callback.
10476 * If sax is NULL, fallback to the default DOM tree building routines.
10477 *
10478 * Returns the resulting document tree
10479 */
10480
10481xmlDocPtr
10482xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10483 xmlDocPtr ret;
10484 xmlParserCtxtPtr ctxt;
10485
10486 if (cur == NULL) return(NULL);
10487
10488
10489 ctxt = xmlCreateDocParserCtxt(cur);
10490 if (ctxt == NULL) return(NULL);
10491 if (sax != NULL) {
10492 ctxt->sax = sax;
10493 ctxt->userData = NULL;
10494 }
10495
10496 xmlParseDocument(ctxt);
10497 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10498 else {
10499 ret = NULL;
10500 xmlFreeDoc(ctxt->myDoc);
10501 ctxt->myDoc = NULL;
10502 }
10503 if (sax != NULL)
10504 ctxt->sax = NULL;
10505 xmlFreeParserCtxt(ctxt);
10506
10507 return(ret);
10508}
10509
10510/**
10511 * xmlParseDoc:
10512 * @cur: a pointer to an array of xmlChar
10513 *
10514 * parse an XML in-memory document and build a tree.
10515 *
10516 * Returns the resulting document tree
10517 */
10518
10519xmlDocPtr
10520xmlParseDoc(xmlChar *cur) {
10521 return(xmlSAXParseDoc(NULL, cur, 0));
10522}
10523
Daniel Veillard8107a222002-01-13 14:10:10 +000010524/************************************************************************
10525 * *
10526 * Specific function to keep track of entities references *
10527 * and used by the XSLT debugger *
10528 * *
10529 ************************************************************************/
10530
10531static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10532
10533/**
10534 * xmlAddEntityReference:
10535 * @ent : A valid entity
10536 * @firstNode : A valid first node for children of entity
10537 * @lastNode : A valid last node of children entity
10538 *
10539 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10540 */
10541static void
10542xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10543 xmlNodePtr lastNode)
10544{
10545 if (xmlEntityRefFunc != NULL) {
10546 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10547 }
10548}
10549
10550
10551/**
10552 * xmlSetEntityReferenceFunc:
10553 * @func : A valid function
10554 *
10555 * Set the function to call call back when a xml reference has been made
10556 */
10557void
10558xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10559{
10560 xmlEntityRefFunc = func;
10561}
Owen Taylor3473f882001-02-23 17:55:21 +000010562
10563/************************************************************************
10564 * *
10565 * Miscellaneous *
10566 * *
10567 ************************************************************************/
10568
10569#ifdef LIBXML_XPATH_ENABLED
10570#include <libxml/xpath.h>
10571#endif
10572
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010573extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010574static int xmlParserInitialized = 0;
10575
10576/**
10577 * xmlInitParser:
10578 *
10579 * Initialization function for the XML parser.
10580 * This is not reentrant. Call once before processing in case of
10581 * use in multithreaded programs.
10582 */
10583
10584void
10585xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010586 if (xmlParserInitialized != 0)
10587 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010588
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010589 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10590 (xmlGenericError == NULL))
10591 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010592 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010593 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010594 xmlInitCharEncodingHandlers();
10595 xmlInitializePredefinedEntities();
10596 xmlDefaultSAXHandlerInit();
10597 xmlRegisterDefaultInputCallbacks();
10598 xmlRegisterDefaultOutputCallbacks();
10599#ifdef LIBXML_HTML_ENABLED
10600 htmlInitAutoClose();
10601 htmlDefaultSAXHandlerInit();
10602#endif
10603#ifdef LIBXML_XPATH_ENABLED
10604 xmlXPathInit();
10605#endif
10606 xmlParserInitialized = 1;
10607}
10608
10609/**
10610 * xmlCleanupParser:
10611 *
10612 * Cleanup function for the XML parser. It tries to reclaim all
10613 * parsing related global memory allocated for the parser processing.
10614 * It doesn't deallocate any document related memory. Calling this
10615 * function should not prevent reusing the parser.
10616 */
10617
10618void
10619xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010620 xmlCleanupCharEncodingHandlers();
10621 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010622#ifdef LIBXML_CATALOG_ENABLED
10623 xmlCatalogCleanup();
10624#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010625 xmlCleanupThreads();
10626 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010627}