blob: a10b3e3a585a45d23b30825c51d4192d80d113da [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Owen Taylor3473f882001-02-23 17:55:21 +0000108/************************************************************************
109 * *
110 * Parser stacks related functions and macros *
111 * *
112 ************************************************************************/
113
114xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
115 const xmlChar ** str);
116
117/*
118 * Generic function for accessing stacks in the Parser Context
119 */
120
121#define PUSH_AND_POP(scope, type, name) \
122scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
123 if (ctxt->name##Nr >= ctxt->name##Max) { \
124 ctxt->name##Max *= 2; \
125 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
126 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
127 if (ctxt->name##Tab == NULL) { \
128 xmlGenericError(xmlGenericErrorContext, \
129 "realloc failed !\n"); \
130 return(0); \
131 } \
132 } \
133 ctxt->name##Tab[ctxt->name##Nr] = value; \
134 ctxt->name = value; \
135 return(ctxt->name##Nr++); \
136} \
137scope type name##Pop(xmlParserCtxtPtr ctxt) { \
138 type ret; \
139 if (ctxt->name##Nr <= 0) return(0); \
140 ctxt->name##Nr--; \
141 if (ctxt->name##Nr > 0) \
142 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
143 else \
144 ctxt->name = NULL; \
145 ret = ctxt->name##Tab[ctxt->name##Nr]; \
146 ctxt->name##Tab[ctxt->name##Nr] = 0; \
147 return(ret); \
148} \
149
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000150/**
151 * inputPop:
152 * @ctxt: an XML parser context
153 *
154 * Pops the top parser input from the input stack
155 *
156 * Returns the input just removed
157 */
158/**
159 * inputPush:
160 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 *
163 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000164 *
165 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000166 */
167/**
168 * namePop:
169 * @ctxt: an XML parser context
170 *
171 * Pops the top element name from the name stack
172 *
173 * Returns the name just removed
174 */
175/**
176 * namePush:
177 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 *
180 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000181 *
182 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 */
184/**
185 * nodePop:
186 * @ctxt: an XML parser context
187 *
188 * Pops the top element node from the node stack
189 *
190 * Returns the node just removed
191 */
192/**
193 * nodePush:
194 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000196 *
197 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000198 *
199 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000200 */
Owen Taylor3473f882001-02-23 17:55:21 +0000201/*
202 * Those macros actually generate the functions
203 */
204PUSH_AND_POP(extern, xmlParserInputPtr, input)
205PUSH_AND_POP(extern, xmlNodePtr, node)
206PUSH_AND_POP(extern, xmlChar*, name)
207
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000208static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000209 if (ctxt->spaceNr >= ctxt->spaceMax) {
210 ctxt->spaceMax *= 2;
211 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
212 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
213 if (ctxt->spaceTab == NULL) {
214 xmlGenericError(xmlGenericErrorContext,
215 "realloc failed !\n");
216 return(0);
217 }
218 }
219 ctxt->spaceTab[ctxt->spaceNr] = val;
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
221 return(ctxt->spaceNr++);
222}
223
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000224static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000225 int ret;
226 if (ctxt->spaceNr <= 0) return(0);
227 ctxt->spaceNr--;
228 if (ctxt->spaceNr > 0)
229 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
230 else
231 ctxt->space = NULL;
232 ret = ctxt->spaceTab[ctxt->spaceNr];
233 ctxt->spaceTab[ctxt->spaceNr] = -1;
234 return(ret);
235}
236
237/*
238 * Macros for accessing the content. Those should be used only by the parser,
239 * and not exported.
240 *
241 * Dirty macros, i.e. one often need to make assumption on the context to
242 * use them
243 *
244 * CUR_PTR return the current pointer to the xmlChar to be parsed.
245 * To be used with extreme caution since operations consuming
246 * characters may move the input buffer to a different location !
247 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
248 * This should be used internally by the parser
249 * only to compare to ASCII values otherwise it would break when
250 * running with UTF-8 encoding.
251 * RAW same as CUR but in the input buffer, bypass any token
252 * extraction that may have been done
253 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
254 * to compare on ASCII based substring.
255 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
256 * strings within the parser.
257 *
258 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
259 *
260 * NEXT Skip to the next character, this does the proper decoding
261 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000262 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000263 * CUR_CHAR(l) returns the current unicode character (int), set l
264 * to the number of xmlChars used for the encoding [0-5].
265 * CUR_SCHAR same but operate on a string instead of the context
266 * COPY_BUF copy the current unicode char to the target buffer, increment
267 * the index
268 * GROW, SHRINK handling of input buffers
269 */
270
Daniel Veillardfdc91562002-07-01 21:52:03 +0000271#define RAW (*ctxt->input->cur)
272#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000273#define NXT(val) ctxt->input->cur[(val)]
274#define CUR_PTR ctxt->input->cur
275
276#define SKIP(val) do { \
277 ctxt->nbChars += (val),ctxt->input->cur += (val); \
278 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000279 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
281 xmlPopInput(ctxt); \
282 } while (0)
283
Daniel Veillard46de64e2002-05-29 08:21:33 +0000284#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
285 xmlSHRINK (ctxt);
286
287static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
288 xmlParserInputShrink(ctxt->input);
289 if ((*ctxt->input->cur == 0) &&
290 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
291 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000292 }
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard46de64e2002-05-29 08:21:33 +0000294#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
295 xmlGROW (ctxt);
296
297static void xmlGROW (xmlParserCtxtPtr ctxt) {
298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
299 if ((*ctxt->input->cur == 0) &&
300 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
301 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000302 }
Owen Taylor3473f882001-02-23 17:55:21 +0000303
304#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
305
306#define NEXT xmlNextChar(ctxt)
307
Daniel Veillard21a0f912001-02-25 19:54:14 +0000308#define NEXT1 { \
309 ctxt->input->cur++; \
310 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000311 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000312 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
313 }
314
Owen Taylor3473f882001-02-23 17:55:21 +0000315#define NEXTL(l) do { \
316 if (*(ctxt->input->cur) == '\n') { \
317 ctxt->input->line++; ctxt->input->col = 1; \
318 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000319 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000320 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000321 } while (0)
322
323#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
324#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
325
326#define COPY_BUF(l,b,i,v) \
327 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000328 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000329
330/**
331 * xmlSkipBlankChars:
332 * @ctxt: the XML parser context
333 *
334 * skip all blanks character found at that point in the input streams.
335 * It pops up finished entities in the process if allowable at that point.
336 *
337 * Returns the number of space chars skipped
338 */
339
340int
341xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000342 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000343
344 /*
345 * It's Okay to use CUR/NEXT here since all the blanks are on
346 * the ASCII range.
347 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000348 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
349 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000350 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000351 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000352 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000353 cur = ctxt->input->cur;
354 while (IS_BLANK(*cur)) {
355 if (*cur == '\n') {
356 ctxt->input->line++; ctxt->input->col = 1;
357 }
358 cur++;
359 res++;
360 if (*cur == 0) {
361 ctxt->input->cur = cur;
362 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
363 cur = ctxt->input->cur;
364 }
365 }
366 ctxt->input->cur = cur;
367 } else {
368 int cur;
369 do {
370 cur = CUR;
371 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
372 NEXT;
373 cur = CUR;
374 res++;
375 }
376 while ((cur == 0) && (ctxt->inputNr > 1) &&
377 (ctxt->instate != XML_PARSER_COMMENT)) {
378 xmlPopInput(ctxt);
379 cur = CUR;
380 }
381 /*
382 * Need to handle support of entities branching here
383 */
384 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
385 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
386 }
Owen Taylor3473f882001-02-23 17:55:21 +0000387 return(res);
388}
389
390/************************************************************************
391 * *
392 * Commodity functions to handle entities *
393 * *
394 ************************************************************************/
395
396/**
397 * xmlPopInput:
398 * @ctxt: an XML parser context
399 *
400 * xmlPopInput: the current input pointed by ctxt->input came to an end
401 * pop it and return the next char.
402 *
403 * Returns the current xmlChar in the parser context
404 */
405xmlChar
406xmlPopInput(xmlParserCtxtPtr ctxt) {
407 if (ctxt->inputNr == 1) return(0); /* End of main Input */
408 if (xmlParserDebugEntities)
409 xmlGenericError(xmlGenericErrorContext,
410 "Popping input %d\n", ctxt->inputNr);
411 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000412 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000413 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
414 return(xmlPopInput(ctxt));
415 return(CUR);
416}
417
418/**
419 * xmlPushInput:
420 * @ctxt: an XML parser context
421 * @input: an XML parser input fragment (entity, XML fragment ...).
422 *
423 * xmlPushInput: switch to a new input stream which is stacked on top
424 * of the previous one(s).
425 */
426void
427xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
428 if (input == NULL) return;
429
430 if (xmlParserDebugEntities) {
431 if ((ctxt->input != NULL) && (ctxt->input->filename))
432 xmlGenericError(xmlGenericErrorContext,
433 "%s(%d): ", ctxt->input->filename,
434 ctxt->input->line);
435 xmlGenericError(xmlGenericErrorContext,
436 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
437 }
438 inputPush(ctxt, input);
439 GROW;
440}
441
442/**
443 * xmlParseCharRef:
444 * @ctxt: an XML parser context
445 *
446 * parse Reference declarations
447 *
448 * [66] CharRef ::= '&#' [0-9]+ ';' |
449 * '&#x' [0-9a-fA-F]+ ';'
450 *
451 * [ WFC: Legal Character ]
452 * Characters referred to using character references must match the
453 * production for Char.
454 *
455 * Returns the value parsed (as an int), 0 in case of error
456 */
457int
458xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000459 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000460 int count = 0;
461
Owen Taylor3473f882001-02-23 17:55:21 +0000462 /*
463 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
464 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000465 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000466 (NXT(2) == 'x')) {
467 SKIP(3);
468 GROW;
469 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000470 if (count++ > 20) {
471 count = 0;
472 GROW;
473 }
474 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000475 val = val * 16 + (CUR - '0');
476 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
477 val = val * 16 + (CUR - 'a') + 10;
478 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
479 val = val * 16 + (CUR - 'A') + 10;
480 else {
481 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
483 ctxt->sax->error(ctxt->userData,
484 "xmlParseCharRef: invalid hexadecimal value\n");
485 ctxt->wellFormed = 0;
486 ctxt->disableSAX = 1;
487 val = 0;
488 break;
489 }
490 NEXT;
491 count++;
492 }
493 if (RAW == ';') {
494 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
495 ctxt->nbChars ++;
496 ctxt->input->cur++;
497 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000498 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000499 SKIP(2);
500 GROW;
501 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000502 if (count++ > 20) {
503 count = 0;
504 GROW;
505 }
506 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000507 val = val * 10 + (CUR - '0');
508 else {
509 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
511 ctxt->sax->error(ctxt->userData,
512 "xmlParseCharRef: invalid decimal value\n");
513 ctxt->wellFormed = 0;
514 ctxt->disableSAX = 1;
515 val = 0;
516 break;
517 }
518 NEXT;
519 count++;
520 }
521 if (RAW == ';') {
522 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
523 ctxt->nbChars ++;
524 ctxt->input->cur++;
525 }
526 } else {
527 ctxt->errNo = XML_ERR_INVALID_CHARREF;
528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
529 ctxt->sax->error(ctxt->userData,
530 "xmlParseCharRef: invalid value\n");
531 ctxt->wellFormed = 0;
532 ctxt->disableSAX = 1;
533 }
534
535 /*
536 * [ WFC: Legal Character ]
537 * Characters referred to using character references must match the
538 * production for Char.
539 */
540 if (IS_CHAR(val)) {
541 return(val);
542 } else {
543 ctxt->errNo = XML_ERR_INVALID_CHAR;
544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000545 ctxt->sax->error(ctxt->userData,
546 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000547 val);
548 ctxt->wellFormed = 0;
549 ctxt->disableSAX = 1;
550 }
551 return(0);
552}
553
554/**
555 * xmlParseStringCharRef:
556 * @ctxt: an XML parser context
557 * @str: a pointer to an index in the string
558 *
559 * parse Reference declarations, variant parsing from a string rather
560 * than an an input flow.
561 *
562 * [66] CharRef ::= '&#' [0-9]+ ';' |
563 * '&#x' [0-9a-fA-F]+ ';'
564 *
565 * [ WFC: Legal Character ]
566 * Characters referred to using character references must match the
567 * production for Char.
568 *
569 * Returns the value parsed (as an int), 0 in case of error, str will be
570 * updated to the current value of the index
571 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000572static int
Owen Taylor3473f882001-02-23 17:55:21 +0000573xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
574 const xmlChar *ptr;
575 xmlChar cur;
576 int val = 0;
577
578 if ((str == NULL) || (*str == NULL)) return(0);
579 ptr = *str;
580 cur = *ptr;
581 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
582 ptr += 3;
583 cur = *ptr;
584 while (cur != ';') { /* Non input consuming loop */
585 if ((cur >= '0') && (cur <= '9'))
586 val = val * 16 + (cur - '0');
587 else if ((cur >= 'a') && (cur <= 'f'))
588 val = val * 16 + (cur - 'a') + 10;
589 else if ((cur >= 'A') && (cur <= 'F'))
590 val = val * 16 + (cur - 'A') + 10;
591 else {
592 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
594 ctxt->sax->error(ctxt->userData,
595 "xmlParseStringCharRef: invalid hexadecimal value\n");
596 ctxt->wellFormed = 0;
597 ctxt->disableSAX = 1;
598 val = 0;
599 break;
600 }
601 ptr++;
602 cur = *ptr;
603 }
604 if (cur == ';')
605 ptr++;
606 } else if ((cur == '&') && (ptr[1] == '#')){
607 ptr += 2;
608 cur = *ptr;
609 while (cur != ';') { /* Non input consuming loops */
610 if ((cur >= '0') && (cur <= '9'))
611 val = val * 10 + (cur - '0');
612 else {
613 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
615 ctxt->sax->error(ctxt->userData,
616 "xmlParseStringCharRef: invalid decimal value\n");
617 ctxt->wellFormed = 0;
618 ctxt->disableSAX = 1;
619 val = 0;
620 break;
621 }
622 ptr++;
623 cur = *ptr;
624 }
625 if (cur == ';')
626 ptr++;
627 } else {
628 ctxt->errNo = XML_ERR_INVALID_CHARREF;
629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
630 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000631 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000632 ctxt->wellFormed = 0;
633 ctxt->disableSAX = 1;
634 return(0);
635 }
636 *str = ptr;
637
638 /*
639 * [ WFC: Legal Character ]
640 * Characters referred to using character references must match the
641 * production for Char.
642 */
643 if (IS_CHAR(val)) {
644 return(val);
645 } else {
646 ctxt->errNo = XML_ERR_INVALID_CHAR;
647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
648 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000649 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000650 ctxt->wellFormed = 0;
651 ctxt->disableSAX = 1;
652 }
653 return(0);
654}
655
656/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000657 * xmlNewBlanksWrapperInputStream:
658 * @ctxt: an XML parser context
659 * @entity: an Entity pointer
660 *
661 * Create a new input stream for wrapping
662 * blanks around a PEReference
663 *
664 * Returns the new input stream or NULL
665 */
666
667static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
668
Daniel Veillardf4862f02002-09-10 11:13:43 +0000669static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000670xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
671 xmlParserInputPtr input;
672 xmlChar *buffer;
673 size_t length;
674 if (entity == NULL) {
675 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
677 ctxt->sax->error(ctxt->userData,
678 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
679 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
680 return(NULL);
681 }
682 if (xmlParserDebugEntities)
683 xmlGenericError(xmlGenericErrorContext,
684 "new blanks wrapper for entity: %s\n", entity->name);
685 input = xmlNewInputStream(ctxt);
686 if (input == NULL) {
687 return(NULL);
688 }
689 length = xmlStrlen(entity->name) + 5;
690 buffer = xmlMalloc(length);
691 if (buffer == NULL) {
692 return(NULL);
693 }
694 buffer [0] = ' ';
695 buffer [1] = '%';
696 buffer [length-3] = ';';
697 buffer [length-2] = ' ';
698 buffer [length-1] = 0;
699 memcpy(buffer + 2, entity->name, length - 5);
700 input->free = deallocblankswrapper;
701 input->base = buffer;
702 input->cur = buffer;
703 input->length = length;
704 input->end = &buffer[length];
705 return(input);
706}
707
708/**
Owen Taylor3473f882001-02-23 17:55:21 +0000709 * xmlParserHandlePEReference:
710 * @ctxt: the parser context
711 *
712 * [69] PEReference ::= '%' Name ';'
713 *
714 * [ WFC: No Recursion ]
715 * A parsed entity must not contain a recursive
716 * reference to itself, either directly or indirectly.
717 *
718 * [ WFC: Entity Declared ]
719 * In a document without any DTD, a document with only an internal DTD
720 * subset which contains no parameter entity references, or a document
721 * with "standalone='yes'", ... ... The declaration of a parameter
722 * entity must precede any reference to it...
723 *
724 * [ VC: Entity Declared ]
725 * In a document with an external subset or external parameter entities
726 * with "standalone='no'", ... ... The declaration of a parameter entity
727 * must precede any reference to it...
728 *
729 * [ WFC: In DTD ]
730 * Parameter-entity references may only appear in the DTD.
731 * NOTE: misleading but this is handled.
732 *
733 * A PEReference may have been detected in the current input stream
734 * the handling is done accordingly to
735 * http://www.w3.org/TR/REC-xml#entproc
736 * i.e.
737 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000738 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000739 */
740void
741xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
742 xmlChar *name;
743 xmlEntityPtr entity = NULL;
744 xmlParserInputPtr input;
745
Owen Taylor3473f882001-02-23 17:55:21 +0000746 if (RAW != '%') return;
747 switch(ctxt->instate) {
748 case XML_PARSER_CDATA_SECTION:
749 return;
750 case XML_PARSER_COMMENT:
751 return;
752 case XML_PARSER_START_TAG:
753 return;
754 case XML_PARSER_END_TAG:
755 return;
756 case XML_PARSER_EOF:
757 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
759 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
760 ctxt->wellFormed = 0;
761 ctxt->disableSAX = 1;
762 return;
763 case XML_PARSER_PROLOG:
764 case XML_PARSER_START:
765 case XML_PARSER_MISC:
766 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
768 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
769 ctxt->wellFormed = 0;
770 ctxt->disableSAX = 1;
771 return;
772 case XML_PARSER_ENTITY_DECL:
773 case XML_PARSER_CONTENT:
774 case XML_PARSER_ATTRIBUTE_VALUE:
775 case XML_PARSER_PI:
776 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000777 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000778 /* we just ignore it there */
779 return;
780 case XML_PARSER_EPILOG:
781 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
783 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
784 ctxt->wellFormed = 0;
785 ctxt->disableSAX = 1;
786 return;
787 case XML_PARSER_ENTITY_VALUE:
788 /*
789 * NOTE: in the case of entity values, we don't do the
790 * substitution here since we need the literal
791 * entity value to be able to save the internal
792 * subset of the document.
793 * This will be handled by xmlStringDecodeEntities
794 */
795 return;
796 case XML_PARSER_DTD:
797 /*
798 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
799 * In the internal DTD subset, parameter-entity references
800 * can occur only where markup declarations can occur, not
801 * within markup declarations.
802 * In that case this is handled in xmlParseMarkupDecl
803 */
804 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
805 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000806 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
807 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000808 break;
809 case XML_PARSER_IGNORE:
810 return;
811 }
812
813 NEXT;
814 name = xmlParseName(ctxt);
815 if (xmlParserDebugEntities)
816 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000817 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000818 if (name == NULL) {
819 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000821 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000822 ctxt->wellFormed = 0;
823 ctxt->disableSAX = 1;
824 } else {
825 if (RAW == ';') {
826 NEXT;
827 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
828 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
829 if (entity == NULL) {
830
831 /*
832 * [ WFC: Entity Declared ]
833 * In a document without any DTD, a document with only an
834 * internal DTD subset which contains no parameter entity
835 * references, or a document with "standalone='yes'", ...
836 * ... The declaration of a parameter entity must precede
837 * any reference to it...
838 */
839 if ((ctxt->standalone == 1) ||
840 ((ctxt->hasExternalSubset == 0) &&
841 (ctxt->hasPErefs == 0))) {
842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
843 ctxt->sax->error(ctxt->userData,
844 "PEReference: %%%s; not found\n", name);
845 ctxt->wellFormed = 0;
846 ctxt->disableSAX = 1;
847 } else {
848 /*
849 * [ VC: Entity Declared ]
850 * In a document with an external subset or external
851 * parameter entities with "standalone='no'", ...
852 * ... The declaration of a parameter entity must precede
853 * any reference to it...
854 */
855 if ((!ctxt->disableSAX) &&
856 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
857 ctxt->vctxt.error(ctxt->vctxt.userData,
858 "PEReference: %%%s; not found\n", name);
859 } else if ((!ctxt->disableSAX) &&
860 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
861 ctxt->sax->warning(ctxt->userData,
862 "PEReference: %%%s; not found\n", name);
863 ctxt->valid = 0;
864 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000865 } else if (ctxt->input->free != deallocblankswrapper) {
866 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
867 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000868 } else {
869 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
870 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000871 xmlChar start[4];
872 xmlCharEncoding enc;
873
Owen Taylor3473f882001-02-23 17:55:21 +0000874 /*
875 * handle the extra spaces added before and after
876 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000877 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000878 */
879 input = xmlNewEntityInputStream(ctxt, entity);
880 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000881
882 /*
883 * Get the 4 first bytes and decode the charset
884 * if enc != XML_CHAR_ENCODING_NONE
885 * plug some encoding conversion routines.
886 */
887 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000888 if (entity->length >= 4) {
889 start[0] = RAW;
890 start[1] = NXT(1);
891 start[2] = NXT(2);
892 start[3] = NXT(3);
893 enc = xmlDetectCharEncoding(start, 4);
894 if (enc != XML_CHAR_ENCODING_NONE) {
895 xmlSwitchEncoding(ctxt, enc);
896 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000897 }
898
Owen Taylor3473f882001-02-23 17:55:21 +0000899 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
900 (RAW == '<') && (NXT(1) == '?') &&
901 (NXT(2) == 'x') && (NXT(3) == 'm') &&
902 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
903 xmlParseTextDecl(ctxt);
904 }
Owen Taylor3473f882001-02-23 17:55:21 +0000905 } else {
906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
907 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000908 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000909 name);
910 ctxt->wellFormed = 0;
911 ctxt->disableSAX = 1;
912 }
913 }
914 } else {
915 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
917 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000918 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000919 ctxt->wellFormed = 0;
920 ctxt->disableSAX = 1;
921 }
922 xmlFree(name);
923 }
924}
925
926/*
927 * Macro used to grow the current buffer.
928 */
929#define growBuffer(buffer) { \
930 buffer##_size *= 2; \
931 buffer = (xmlChar *) \
932 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
933 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000934 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +0000935 return(NULL); \
936 } \
937}
938
939/**
940 * xmlStringDecodeEntities:
941 * @ctxt: the parser context
942 * @str: the input string
943 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
944 * @end: an end marker xmlChar, 0 if none
945 * @end2: an end marker xmlChar, 0 if none
946 * @end3: an end marker xmlChar, 0 if none
947 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000948 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000949 *
950 * [67] Reference ::= EntityRef | CharRef
951 *
952 * [69] PEReference ::= '%' Name ';'
953 *
954 * Returns A newly allocated string with the substitution done. The caller
955 * must deallocate it !
956 */
957xmlChar *
958xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
959 xmlChar end, xmlChar end2, xmlChar end3) {
960 xmlChar *buffer = NULL;
961 int buffer_size = 0;
962
963 xmlChar *current = NULL;
964 xmlEntityPtr ent;
965 int c,l;
966 int nbchars = 0;
967
968 if (str == NULL)
969 return(NULL);
970
971 if (ctxt->depth > 40) {
972 ctxt->errNo = XML_ERR_ENTITY_LOOP;
973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
974 ctxt->sax->error(ctxt->userData,
975 "Detected entity reference loop\n");
976 ctxt->wellFormed = 0;
977 ctxt->disableSAX = 1;
978 return(NULL);
979 }
980
981 /*
982 * allocate a translation buffer.
983 */
984 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
985 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
986 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000987 xmlGenericError(xmlGenericErrorContext,
988 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000989 return(NULL);
990 }
991
992 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000993 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000994 * we are operating on already parsed values.
995 */
996 c = CUR_SCHAR(str, l);
997 while ((c != 0) && (c != end) && /* non input consuming loop */
998 (c != end2) && (c != end3)) {
999
1000 if (c == 0) break;
1001 if ((c == '&') && (str[1] == '#')) {
1002 int val = xmlParseStringCharRef(ctxt, &str);
1003 if (val != 0) {
1004 COPY_BUF(0,buffer,nbchars,val);
1005 }
1006 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1007 if (xmlParserDebugEntities)
1008 xmlGenericError(xmlGenericErrorContext,
1009 "String decoding Entity Reference: %.30s\n",
1010 str);
1011 ent = xmlParseStringEntityRef(ctxt, &str);
1012 if ((ent != NULL) &&
1013 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1014 if (ent->content != NULL) {
1015 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1016 } else {
1017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1018 ctxt->sax->error(ctxt->userData,
1019 "internal error entity has no content\n");
1020 }
1021 } else if ((ent != NULL) && (ent->content != NULL)) {
1022 xmlChar *rep;
1023
1024 ctxt->depth++;
1025 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1026 0, 0, 0);
1027 ctxt->depth--;
1028 if (rep != NULL) {
1029 current = rep;
1030 while (*current != 0) { /* non input consuming loop */
1031 buffer[nbchars++] = *current++;
1032 if (nbchars >
1033 buffer_size - XML_PARSER_BUFFER_SIZE) {
1034 growBuffer(buffer);
1035 }
1036 }
1037 xmlFree(rep);
1038 }
1039 } else if (ent != NULL) {
1040 int i = xmlStrlen(ent->name);
1041 const xmlChar *cur = ent->name;
1042
1043 buffer[nbchars++] = '&';
1044 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1045 growBuffer(buffer);
1046 }
1047 for (;i > 0;i--)
1048 buffer[nbchars++] = *cur++;
1049 buffer[nbchars++] = ';';
1050 }
1051 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1052 if (xmlParserDebugEntities)
1053 xmlGenericError(xmlGenericErrorContext,
1054 "String decoding PE Reference: %.30s\n", str);
1055 ent = xmlParseStringPEReference(ctxt, &str);
1056 if (ent != NULL) {
1057 xmlChar *rep;
1058
1059 ctxt->depth++;
1060 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1061 0, 0, 0);
1062 ctxt->depth--;
1063 if (rep != NULL) {
1064 current = rep;
1065 while (*current != 0) { /* non input consuming loop */
1066 buffer[nbchars++] = *current++;
1067 if (nbchars >
1068 buffer_size - XML_PARSER_BUFFER_SIZE) {
1069 growBuffer(buffer);
1070 }
1071 }
1072 xmlFree(rep);
1073 }
1074 }
1075 } else {
1076 COPY_BUF(l,buffer,nbchars,c);
1077 str += l;
1078 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1079 growBuffer(buffer);
1080 }
1081 }
1082 c = CUR_SCHAR(str, l);
1083 }
1084 buffer[nbchars++] = 0;
1085 return(buffer);
1086}
1087
1088
1089/************************************************************************
1090 * *
1091 * Commodity functions to handle xmlChars *
1092 * *
1093 ************************************************************************/
1094
1095/**
1096 * xmlStrndup:
1097 * @cur: the input xmlChar *
1098 * @len: the len of @cur
1099 *
1100 * a strndup for array of xmlChar's
1101 *
1102 * Returns a new xmlChar * or NULL
1103 */
1104xmlChar *
1105xmlStrndup(const xmlChar *cur, int len) {
1106 xmlChar *ret;
1107
1108 if ((cur == NULL) || (len < 0)) return(NULL);
1109 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1110 if (ret == NULL) {
1111 xmlGenericError(xmlGenericErrorContext,
1112 "malloc of %ld byte failed\n",
1113 (len + 1) * (long)sizeof(xmlChar));
1114 return(NULL);
1115 }
1116 memcpy(ret, cur, len * sizeof(xmlChar));
1117 ret[len] = 0;
1118 return(ret);
1119}
1120
1121/**
1122 * xmlStrdup:
1123 * @cur: the input xmlChar *
1124 *
1125 * a strdup for array of xmlChar's. Since they are supposed to be
1126 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1127 * a termination mark of '0'.
1128 *
1129 * Returns a new xmlChar * or NULL
1130 */
1131xmlChar *
1132xmlStrdup(const xmlChar *cur) {
1133 const xmlChar *p = cur;
1134
1135 if (cur == NULL) return(NULL);
1136 while (*p != 0) p++; /* non input consuming */
1137 return(xmlStrndup(cur, p - cur));
1138}
1139
1140/**
1141 * xmlCharStrndup:
1142 * @cur: the input char *
1143 * @len: the len of @cur
1144 *
1145 * a strndup for char's to xmlChar's
1146 *
1147 * Returns a new xmlChar * or NULL
1148 */
1149
1150xmlChar *
1151xmlCharStrndup(const char *cur, int len) {
1152 int i;
1153 xmlChar *ret;
1154
1155 if ((cur == NULL) || (len < 0)) return(NULL);
1156 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1157 if (ret == NULL) {
1158 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1159 (len + 1) * (long)sizeof(xmlChar));
1160 return(NULL);
1161 }
1162 for (i = 0;i < len;i++)
1163 ret[i] = (xmlChar) cur[i];
1164 ret[len] = 0;
1165 return(ret);
1166}
1167
1168/**
1169 * xmlCharStrdup:
1170 * @cur: the input char *
1171 * @len: the len of @cur
1172 *
1173 * a strdup for char's to xmlChar's
1174 *
1175 * Returns a new xmlChar * or NULL
1176 */
1177
1178xmlChar *
1179xmlCharStrdup(const char *cur) {
1180 const char *p = cur;
1181
1182 if (cur == NULL) return(NULL);
1183 while (*p != '\0') p++; /* non input consuming */
1184 return(xmlCharStrndup(cur, p - cur));
1185}
1186
1187/**
1188 * xmlStrcmp:
1189 * @str1: the first xmlChar *
1190 * @str2: the second xmlChar *
1191 *
1192 * a strcmp for xmlChar's
1193 *
1194 * Returns the integer result of the comparison
1195 */
1196
1197int
1198xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1199 register int tmp;
1200
1201 if (str1 == str2) return(0);
1202 if (str1 == NULL) return(-1);
1203 if (str2 == NULL) return(1);
1204 do {
1205 tmp = *str1++ - *str2;
1206 if (tmp != 0) return(tmp);
1207 } while (*str2++ != 0);
1208 return 0;
1209}
1210
1211/**
1212 * xmlStrEqual:
1213 * @str1: the first xmlChar *
1214 * @str2: the second xmlChar *
1215 *
1216 * Check if both string are equal of have same content
1217 * Should be a bit more readable and faster than xmlStrEqual()
1218 *
1219 * Returns 1 if they are equal, 0 if they are different
1220 */
1221
1222int
1223xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1224 if (str1 == str2) return(1);
1225 if (str1 == NULL) return(0);
1226 if (str2 == NULL) return(0);
1227 do {
1228 if (*str1++ != *str2) return(0);
1229 } while (*str2++);
1230 return(1);
1231}
1232
1233/**
1234 * xmlStrncmp:
1235 * @str1: the first xmlChar *
1236 * @str2: the second xmlChar *
1237 * @len: the max comparison length
1238 *
1239 * a strncmp for xmlChar's
1240 *
1241 * Returns the integer result of the comparison
1242 */
1243
1244int
1245xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1246 register int tmp;
1247
1248 if (len <= 0) return(0);
1249 if (str1 == str2) return(0);
1250 if (str1 == NULL) return(-1);
1251 if (str2 == NULL) return(1);
1252 do {
1253 tmp = *str1++ - *str2;
1254 if (tmp != 0 || --len == 0) return(tmp);
1255 } while (*str2++ != 0);
1256 return 0;
1257}
1258
Daniel Veillardb44025c2001-10-11 22:55:55 +00001259static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001260 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1261 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1262 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1263 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1264 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1265 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1266 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1267 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1268 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1269 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1270 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1271 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1272 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1273 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1274 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1275 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1276 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1277 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1278 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1279 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1280 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1281 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1282 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1283 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1284 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1285 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1286 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1287 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1288 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1289 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1290 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1291 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1292};
1293
1294/**
1295 * xmlStrcasecmp:
1296 * @str1: the first xmlChar *
1297 * @str2: the second xmlChar *
1298 *
1299 * a strcasecmp for xmlChar's
1300 *
1301 * Returns the integer result of the comparison
1302 */
1303
1304int
1305xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1306 register int tmp;
1307
1308 if (str1 == str2) return(0);
1309 if (str1 == NULL) return(-1);
1310 if (str2 == NULL) return(1);
1311 do {
1312 tmp = casemap[*str1++] - casemap[*str2];
1313 if (tmp != 0) return(tmp);
1314 } while (*str2++ != 0);
1315 return 0;
1316}
1317
1318/**
1319 * xmlStrncasecmp:
1320 * @str1: the first xmlChar *
1321 * @str2: the second xmlChar *
1322 * @len: the max comparison length
1323 *
1324 * a strncasecmp for xmlChar's
1325 *
1326 * Returns the integer result of the comparison
1327 */
1328
1329int
1330xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1331 register int tmp;
1332
1333 if (len <= 0) return(0);
1334 if (str1 == str2) return(0);
1335 if (str1 == NULL) return(-1);
1336 if (str2 == NULL) return(1);
1337 do {
1338 tmp = casemap[*str1++] - casemap[*str2];
1339 if (tmp != 0 || --len == 0) return(tmp);
1340 } while (*str2++ != 0);
1341 return 0;
1342}
1343
1344/**
1345 * xmlStrchr:
1346 * @str: the xmlChar * array
1347 * @val: the xmlChar to search
1348 *
1349 * a strchr for xmlChar's
1350 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001351 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001352 */
1353
1354const xmlChar *
1355xmlStrchr(const xmlChar *str, xmlChar val) {
1356 if (str == NULL) return(NULL);
1357 while (*str != 0) { /* non input consuming */
1358 if (*str == val) return((xmlChar *) str);
1359 str++;
1360 }
1361 return(NULL);
1362}
1363
1364/**
1365 * xmlStrstr:
1366 * @str: the xmlChar * array (haystack)
1367 * @val: the xmlChar to search (needle)
1368 *
1369 * a strstr for xmlChar's
1370 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001371 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001372 */
1373
1374const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001375xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001376 int n;
1377
1378 if (str == NULL) return(NULL);
1379 if (val == NULL) return(NULL);
1380 n = xmlStrlen(val);
1381
1382 if (n == 0) return(str);
1383 while (*str != 0) { /* non input consuming */
1384 if (*str == *val) {
1385 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1386 }
1387 str++;
1388 }
1389 return(NULL);
1390}
1391
1392/**
1393 * xmlStrcasestr:
1394 * @str: the xmlChar * array (haystack)
1395 * @val: the xmlChar to search (needle)
1396 *
1397 * a case-ignoring strstr for xmlChar's
1398 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001399 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001400 */
1401
1402const xmlChar *
1403xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1404 int n;
1405
1406 if (str == NULL) return(NULL);
1407 if (val == NULL) return(NULL);
1408 n = xmlStrlen(val);
1409
1410 if (n == 0) return(str);
1411 while (*str != 0) { /* non input consuming */
1412 if (casemap[*str] == casemap[*val])
1413 if (!xmlStrncasecmp(str, val, n)) return(str);
1414 str++;
1415 }
1416 return(NULL);
1417}
1418
1419/**
1420 * xmlStrsub:
1421 * @str: the xmlChar * array (haystack)
1422 * @start: the index of the first char (zero based)
1423 * @len: the length of the substring
1424 *
1425 * Extract a substring of a given string
1426 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001427 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001428 */
1429
1430xmlChar *
1431xmlStrsub(const xmlChar *str, int start, int len) {
1432 int i;
1433
1434 if (str == NULL) return(NULL);
1435 if (start < 0) return(NULL);
1436 if (len < 0) return(NULL);
1437
1438 for (i = 0;i < start;i++) {
1439 if (*str == 0) return(NULL);
1440 str++;
1441 }
1442 if (*str == 0) return(NULL);
1443 return(xmlStrndup(str, len));
1444}
1445
1446/**
1447 * xmlStrlen:
1448 * @str: the xmlChar * array
1449 *
1450 * length of a xmlChar's string
1451 *
1452 * Returns the number of xmlChar contained in the ARRAY.
1453 */
1454
1455int
1456xmlStrlen(const xmlChar *str) {
1457 int len = 0;
1458
1459 if (str == NULL) return(0);
1460 while (*str != 0) { /* non input consuming */
1461 str++;
1462 len++;
1463 }
1464 return(len);
1465}
1466
1467/**
1468 * xmlStrncat:
1469 * @cur: the original xmlChar * array
1470 * @add: the xmlChar * array added
1471 * @len: the length of @add
1472 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001473 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001474 * first bytes of @add.
1475 *
1476 * Returns a new xmlChar *, the original @cur is reallocated if needed
1477 * and should not be freed
1478 */
1479
1480xmlChar *
1481xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1482 int size;
1483 xmlChar *ret;
1484
1485 if ((add == NULL) || (len == 0))
1486 return(cur);
1487 if (cur == NULL)
1488 return(xmlStrndup(add, len));
1489
1490 size = xmlStrlen(cur);
1491 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1492 if (ret == NULL) {
1493 xmlGenericError(xmlGenericErrorContext,
1494 "xmlStrncat: realloc of %ld byte failed\n",
1495 (size + len + 1) * (long)sizeof(xmlChar));
1496 return(cur);
1497 }
1498 memcpy(&ret[size], add, len * sizeof(xmlChar));
1499 ret[size + len] = 0;
1500 return(ret);
1501}
1502
1503/**
1504 * xmlStrcat:
1505 * @cur: the original xmlChar * array
1506 * @add: the xmlChar * array added
1507 *
1508 * a strcat for array of xmlChar's. Since they are supposed to be
1509 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1510 * a termination mark of '0'.
1511 *
1512 * Returns a new xmlChar * containing the concatenated string.
1513 */
1514xmlChar *
1515xmlStrcat(xmlChar *cur, const xmlChar *add) {
1516 const xmlChar *p = add;
1517
1518 if (add == NULL) return(cur);
1519 if (cur == NULL)
1520 return(xmlStrdup(add));
1521
1522 while (*p != 0) p++; /* non input consuming */
1523 return(xmlStrncat(cur, add, p - add));
1524}
1525
1526/************************************************************************
1527 * *
1528 * Commodity functions, cleanup needed ? *
1529 * *
1530 ************************************************************************/
1531
1532/**
1533 * areBlanks:
1534 * @ctxt: an XML parser context
1535 * @str: a xmlChar *
1536 * @len: the size of @str
1537 *
1538 * Is this a sequence of blank chars that one can ignore ?
1539 *
1540 * Returns 1 if ignorable 0 otherwise.
1541 */
1542
1543static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1544 int i, ret;
1545 xmlNodePtr lastChild;
1546
Daniel Veillard05c13a22001-09-09 08:38:09 +00001547 /*
1548 * Don't spend time trying to differentiate them, the same callback is
1549 * used !
1550 */
1551 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001552 return(0);
1553
Owen Taylor3473f882001-02-23 17:55:21 +00001554 /*
1555 * Check for xml:space value.
1556 */
1557 if (*(ctxt->space) == 1)
1558 return(0);
1559
1560 /*
1561 * Check that the string is made of blanks
1562 */
1563 for (i = 0;i < len;i++)
1564 if (!(IS_BLANK(str[i]))) return(0);
1565
1566 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001567 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001568 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001569 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001570 if (ctxt->myDoc != NULL) {
1571 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1572 if (ret == 0) return(1);
1573 if (ret == 1) return(0);
1574 }
1575
1576 /*
1577 * Otherwise, heuristic :-\
1578 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001579 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001580 if ((ctxt->node->children == NULL) &&
1581 (RAW == '<') && (NXT(1) == '/')) return(0);
1582
1583 lastChild = xmlGetLastChild(ctxt->node);
1584 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001585 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1586 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001587 } else if (xmlNodeIsText(lastChild))
1588 return(0);
1589 else if ((ctxt->node->children != NULL) &&
1590 (xmlNodeIsText(ctxt->node->children)))
1591 return(0);
1592 return(1);
1593}
1594
Owen Taylor3473f882001-02-23 17:55:21 +00001595/************************************************************************
1596 * *
1597 * Extra stuff for namespace support *
1598 * Relates to http://www.w3.org/TR/WD-xml-names *
1599 * *
1600 ************************************************************************/
1601
1602/**
1603 * xmlSplitQName:
1604 * @ctxt: an XML parser context
1605 * @name: an XML parser context
1606 * @prefix: a xmlChar **
1607 *
1608 * parse an UTF8 encoded XML qualified name string
1609 *
1610 * [NS 5] QName ::= (Prefix ':')? LocalPart
1611 *
1612 * [NS 6] Prefix ::= NCName
1613 *
1614 * [NS 7] LocalPart ::= NCName
1615 *
1616 * Returns the local part, and prefix is updated
1617 * to get the Prefix if any.
1618 */
1619
1620xmlChar *
1621xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1622 xmlChar buf[XML_MAX_NAMELEN + 5];
1623 xmlChar *buffer = NULL;
1624 int len = 0;
1625 int max = XML_MAX_NAMELEN;
1626 xmlChar *ret = NULL;
1627 const xmlChar *cur = name;
1628 int c;
1629
1630 *prefix = NULL;
1631
1632#ifndef XML_XML_NAMESPACE
1633 /* xml: prefix is not really a namespace */
1634 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1635 (cur[2] == 'l') && (cur[3] == ':'))
1636 return(xmlStrdup(name));
1637#endif
1638
1639 /* nasty but valid */
1640 if (cur[0] == ':')
1641 return(xmlStrdup(name));
1642
1643 c = *cur++;
1644 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1645 buf[len++] = c;
1646 c = *cur++;
1647 }
1648 if (len >= max) {
1649 /*
1650 * Okay someone managed to make a huge name, so he's ready to pay
1651 * for the processing speed.
1652 */
1653 max = len * 2;
1654
1655 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1656 if (buffer == NULL) {
1657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1658 ctxt->sax->error(ctxt->userData,
1659 "xmlSplitQName: out of memory\n");
1660 return(NULL);
1661 }
1662 memcpy(buffer, buf, len);
1663 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1664 if (len + 10 > max) {
1665 max *= 2;
1666 buffer = (xmlChar *) xmlRealloc(buffer,
1667 max * sizeof(xmlChar));
1668 if (buffer == NULL) {
1669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1670 ctxt->sax->error(ctxt->userData,
1671 "xmlSplitQName: out of memory\n");
1672 return(NULL);
1673 }
1674 }
1675 buffer[len++] = c;
1676 c = *cur++;
1677 }
1678 buffer[len] = 0;
1679 }
1680
1681 if (buffer == NULL)
1682 ret = xmlStrndup(buf, len);
1683 else {
1684 ret = buffer;
1685 buffer = NULL;
1686 max = XML_MAX_NAMELEN;
1687 }
1688
1689
1690 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001691 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001692 if (c == 0) return(ret);
1693 *prefix = ret;
1694 len = 0;
1695
Daniel Veillardbb284f42002-10-16 18:02:47 +00001696 /*
1697 * Check that the first character is proper to start
1698 * a new name
1699 */
1700 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1701 ((c >= 0x41) && (c <= 0x5A)) ||
1702 (c == '_') || (c == ':'))) {
1703 int l;
1704 int first = CUR_SCHAR(cur, l);
1705
1706 if (!IS_LETTER(first) && (first != '_')) {
1707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1708 ctxt->sax->error(ctxt->userData,
1709 "Name %s is not XML Namespace compliant\n",
1710 name);
1711 }
1712 }
1713 cur++;
1714
Owen Taylor3473f882001-02-23 17:55:21 +00001715 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1716 buf[len++] = c;
1717 c = *cur++;
1718 }
1719 if (len >= max) {
1720 /*
1721 * Okay someone managed to make a huge name, so he's ready to pay
1722 * for the processing speed.
1723 */
1724 max = len * 2;
1725
1726 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1727 if (buffer == NULL) {
1728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1729 ctxt->sax->error(ctxt->userData,
1730 "xmlSplitQName: out of memory\n");
1731 return(NULL);
1732 }
1733 memcpy(buffer, buf, len);
1734 while (c != 0) { /* tested bigname2.xml */
1735 if (len + 10 > max) {
1736 max *= 2;
1737 buffer = (xmlChar *) xmlRealloc(buffer,
1738 max * sizeof(xmlChar));
1739 if (buffer == NULL) {
1740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1741 ctxt->sax->error(ctxt->userData,
1742 "xmlSplitQName: out of memory\n");
1743 return(NULL);
1744 }
1745 }
1746 buffer[len++] = c;
1747 c = *cur++;
1748 }
1749 buffer[len] = 0;
1750 }
1751
1752 if (buffer == NULL)
1753 ret = xmlStrndup(buf, len);
1754 else {
1755 ret = buffer;
1756 }
1757 }
1758
1759 return(ret);
1760}
1761
1762/************************************************************************
1763 * *
1764 * The parser itself *
1765 * Relates to http://www.w3.org/TR/REC-xml *
1766 * *
1767 ************************************************************************/
1768
Daniel Veillard76d66f42001-05-16 21:05:17 +00001769static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001770/**
1771 * xmlParseName:
1772 * @ctxt: an XML parser context
1773 *
1774 * parse an XML name.
1775 *
1776 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1777 * CombiningChar | Extender
1778 *
1779 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1780 *
1781 * [6] Names ::= Name (S Name)*
1782 *
1783 * Returns the Name parsed or NULL
1784 */
1785
1786xmlChar *
1787xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001788 const xmlChar *in;
1789 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001790 int count = 0;
1791
1792 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001793
1794 /*
1795 * Accelerator for simple ASCII names
1796 */
1797 in = ctxt->input->cur;
1798 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1799 ((*in >= 0x41) && (*in <= 0x5A)) ||
1800 (*in == '_') || (*in == ':')) {
1801 in++;
1802 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1803 ((*in >= 0x41) && (*in <= 0x5A)) ||
1804 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001805 (*in == '_') || (*in == '-') ||
1806 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001807 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001808 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001809 count = in - ctxt->input->cur;
1810 ret = xmlStrndup(ctxt->input->cur, count);
1811 ctxt->input->cur = in;
1812 return(ret);
1813 }
1814 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001815 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001816}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001817
Daniel Veillard46de64e2002-05-29 08:21:33 +00001818/**
1819 * xmlParseNameAndCompare:
1820 * @ctxt: an XML parser context
1821 *
1822 * parse an XML name and compares for match
1823 * (specialized for endtag parsing)
1824 *
1825 *
1826 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1827 * and the name for mismatch
1828 */
1829
Daniel Veillardf4862f02002-09-10 11:13:43 +00001830static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001831xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1832 const xmlChar *cmp = other;
1833 const xmlChar *in;
1834 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001835
1836 GROW;
1837
1838 in = ctxt->input->cur;
1839 while (*in != 0 && *in == *cmp) {
1840 ++in;
1841 ++cmp;
1842 }
1843 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1844 /* success */
1845 ctxt->input->cur = in;
1846 return (xmlChar*) 1;
1847 }
1848 /* failure (or end of input buffer), check with full function */
1849 ret = xmlParseName (ctxt);
1850 if (ret != 0 && xmlStrEqual (ret, other)) {
1851 xmlFree (ret);
1852 return (xmlChar*) 1;
1853 }
1854 return ret;
1855}
1856
Daniel Veillard76d66f42001-05-16 21:05:17 +00001857static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001858xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1859 xmlChar buf[XML_MAX_NAMELEN + 5];
1860 int len = 0, l;
1861 int c;
1862 int count = 0;
1863
1864 /*
1865 * Handler for more complex cases
1866 */
1867 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001868 c = CUR_CHAR(l);
1869 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1870 (!IS_LETTER(c) && (c != '_') &&
1871 (c != ':'))) {
1872 return(NULL);
1873 }
1874
1875 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1876 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1877 (c == '.') || (c == '-') ||
1878 (c == '_') || (c == ':') ||
1879 (IS_COMBINING(c)) ||
1880 (IS_EXTENDER(c)))) {
1881 if (count++ > 100) {
1882 count = 0;
1883 GROW;
1884 }
1885 COPY_BUF(l,buf,len,c);
1886 NEXTL(l);
1887 c = CUR_CHAR(l);
1888 if (len >= XML_MAX_NAMELEN) {
1889 /*
1890 * Okay someone managed to make a huge name, so he's ready to pay
1891 * for the processing speed.
1892 */
1893 xmlChar *buffer;
1894 int max = len * 2;
1895
1896 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1897 if (buffer == NULL) {
1898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1899 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001900 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001901 return(NULL);
1902 }
1903 memcpy(buffer, buf, len);
1904 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1905 (c == '.') || (c == '-') ||
1906 (c == '_') || (c == ':') ||
1907 (IS_COMBINING(c)) ||
1908 (IS_EXTENDER(c))) {
1909 if (count++ > 100) {
1910 count = 0;
1911 GROW;
1912 }
1913 if (len + 10 > max) {
1914 max *= 2;
1915 buffer = (xmlChar *) xmlRealloc(buffer,
1916 max * sizeof(xmlChar));
1917 if (buffer == NULL) {
1918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1919 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001920 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001921 return(NULL);
1922 }
1923 }
1924 COPY_BUF(l,buffer,len,c);
1925 NEXTL(l);
1926 c = CUR_CHAR(l);
1927 }
1928 buffer[len] = 0;
1929 return(buffer);
1930 }
1931 }
1932 return(xmlStrndup(buf, len));
1933}
1934
1935/**
1936 * xmlParseStringName:
1937 * @ctxt: an XML parser context
1938 * @str: a pointer to the string pointer (IN/OUT)
1939 *
1940 * parse an XML name.
1941 *
1942 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1943 * CombiningChar | Extender
1944 *
1945 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1946 *
1947 * [6] Names ::= Name (S Name)*
1948 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001949 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001950 * is updated to the current location in the string.
1951 */
1952
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001953static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001954xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1955 xmlChar buf[XML_MAX_NAMELEN + 5];
1956 const xmlChar *cur = *str;
1957 int len = 0, l;
1958 int c;
1959
1960 c = CUR_SCHAR(cur, l);
1961 if (!IS_LETTER(c) && (c != '_') &&
1962 (c != ':')) {
1963 return(NULL);
1964 }
1965
1966 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1967 (c == '.') || (c == '-') ||
1968 (c == '_') || (c == ':') ||
1969 (IS_COMBINING(c)) ||
1970 (IS_EXTENDER(c))) {
1971 COPY_BUF(l,buf,len,c);
1972 cur += l;
1973 c = CUR_SCHAR(cur, l);
1974 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1975 /*
1976 * Okay someone managed to make a huge name, so he's ready to pay
1977 * for the processing speed.
1978 */
1979 xmlChar *buffer;
1980 int max = len * 2;
1981
1982 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1983 if (buffer == NULL) {
1984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1985 ctxt->sax->error(ctxt->userData,
1986 "xmlParseStringName: out of memory\n");
1987 return(NULL);
1988 }
1989 memcpy(buffer, buf, len);
1990 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1991 (c == '.') || (c == '-') ||
1992 (c == '_') || (c == ':') ||
1993 (IS_COMBINING(c)) ||
1994 (IS_EXTENDER(c))) {
1995 if (len + 10 > max) {
1996 max *= 2;
1997 buffer = (xmlChar *) xmlRealloc(buffer,
1998 max * sizeof(xmlChar));
1999 if (buffer == NULL) {
2000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2001 ctxt->sax->error(ctxt->userData,
2002 "xmlParseStringName: out of memory\n");
2003 return(NULL);
2004 }
2005 }
2006 COPY_BUF(l,buffer,len,c);
2007 cur += l;
2008 c = CUR_SCHAR(cur, l);
2009 }
2010 buffer[len] = 0;
2011 *str = cur;
2012 return(buffer);
2013 }
2014 }
2015 *str = cur;
2016 return(xmlStrndup(buf, len));
2017}
2018
2019/**
2020 * xmlParseNmtoken:
2021 * @ctxt: an XML parser context
2022 *
2023 * parse an XML Nmtoken.
2024 *
2025 * [7] Nmtoken ::= (NameChar)+
2026 *
2027 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2028 *
2029 * Returns the Nmtoken parsed or NULL
2030 */
2031
2032xmlChar *
2033xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2034 xmlChar buf[XML_MAX_NAMELEN + 5];
2035 int len = 0, l;
2036 int c;
2037 int count = 0;
2038
2039 GROW;
2040 c = CUR_CHAR(l);
2041
2042 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2043 (c == '.') || (c == '-') ||
2044 (c == '_') || (c == ':') ||
2045 (IS_COMBINING(c)) ||
2046 (IS_EXTENDER(c))) {
2047 if (count++ > 100) {
2048 count = 0;
2049 GROW;
2050 }
2051 COPY_BUF(l,buf,len,c);
2052 NEXTL(l);
2053 c = CUR_CHAR(l);
2054 if (len >= XML_MAX_NAMELEN) {
2055 /*
2056 * Okay someone managed to make a huge token, so he's ready to pay
2057 * for the processing speed.
2058 */
2059 xmlChar *buffer;
2060 int max = len * 2;
2061
2062 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2063 if (buffer == NULL) {
2064 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2065 ctxt->sax->error(ctxt->userData,
2066 "xmlParseNmtoken: out of memory\n");
2067 return(NULL);
2068 }
2069 memcpy(buffer, buf, len);
2070 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2071 (c == '.') || (c == '-') ||
2072 (c == '_') || (c == ':') ||
2073 (IS_COMBINING(c)) ||
2074 (IS_EXTENDER(c))) {
2075 if (count++ > 100) {
2076 count = 0;
2077 GROW;
2078 }
2079 if (len + 10 > max) {
2080 max *= 2;
2081 buffer = (xmlChar *) xmlRealloc(buffer,
2082 max * sizeof(xmlChar));
2083 if (buffer == NULL) {
2084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2085 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002086 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002087 return(NULL);
2088 }
2089 }
2090 COPY_BUF(l,buffer,len,c);
2091 NEXTL(l);
2092 c = CUR_CHAR(l);
2093 }
2094 buffer[len] = 0;
2095 return(buffer);
2096 }
2097 }
2098 if (len == 0)
2099 return(NULL);
2100 return(xmlStrndup(buf, len));
2101}
2102
2103/**
2104 * xmlParseEntityValue:
2105 * @ctxt: an XML parser context
2106 * @orig: if non-NULL store a copy of the original entity value
2107 *
2108 * parse a value for ENTITY declarations
2109 *
2110 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2111 * "'" ([^%&'] | PEReference | Reference)* "'"
2112 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002113 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002114 */
2115
2116xmlChar *
2117xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2118 xmlChar *buf = NULL;
2119 int len = 0;
2120 int size = XML_PARSER_BUFFER_SIZE;
2121 int c, l;
2122 xmlChar stop;
2123 xmlChar *ret = NULL;
2124 const xmlChar *cur = NULL;
2125 xmlParserInputPtr input;
2126
2127 if (RAW == '"') stop = '"';
2128 else if (RAW == '\'') stop = '\'';
2129 else {
2130 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2132 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2133 ctxt->wellFormed = 0;
2134 ctxt->disableSAX = 1;
2135 return(NULL);
2136 }
2137 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2138 if (buf == NULL) {
2139 xmlGenericError(xmlGenericErrorContext,
2140 "malloc of %d byte failed\n", size);
2141 return(NULL);
2142 }
2143
2144 /*
2145 * The content of the entity definition is copied in a buffer.
2146 */
2147
2148 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2149 input = ctxt->input;
2150 GROW;
2151 NEXT;
2152 c = CUR_CHAR(l);
2153 /*
2154 * NOTE: 4.4.5 Included in Literal
2155 * When a parameter entity reference appears in a literal entity
2156 * value, ... a single or double quote character in the replacement
2157 * text is always treated as a normal data character and will not
2158 * terminate the literal.
2159 * In practice it means we stop the loop only when back at parsing
2160 * the initial entity and the quote is found
2161 */
2162 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2163 (ctxt->input != input))) {
2164 if (len + 5 >= size) {
2165 size *= 2;
2166 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2167 if (buf == NULL) {
2168 xmlGenericError(xmlGenericErrorContext,
2169 "realloc of %d byte failed\n", size);
2170 return(NULL);
2171 }
2172 }
2173 COPY_BUF(l,buf,len,c);
2174 NEXTL(l);
2175 /*
2176 * Pop-up of finished entities.
2177 */
2178 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2179 xmlPopInput(ctxt);
2180
2181 GROW;
2182 c = CUR_CHAR(l);
2183 if (c == 0) {
2184 GROW;
2185 c = CUR_CHAR(l);
2186 }
2187 }
2188 buf[len] = 0;
2189
2190 /*
2191 * Raise problem w.r.t. '&' and '%' being used in non-entities
2192 * reference constructs. Note Charref will be handled in
2193 * xmlStringDecodeEntities()
2194 */
2195 cur = buf;
2196 while (*cur != 0) { /* non input consuming */
2197 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2198 xmlChar *name;
2199 xmlChar tmp = *cur;
2200
2201 cur++;
2202 name = xmlParseStringName(ctxt, &cur);
2203 if ((name == NULL) || (*cur != ';')) {
2204 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2206 ctxt->sax->error(ctxt->userData,
2207 "EntityValue: '%c' forbidden except for entities references\n",
2208 tmp);
2209 ctxt->wellFormed = 0;
2210 ctxt->disableSAX = 1;
2211 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002212 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2213 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002214 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2216 ctxt->sax->error(ctxt->userData,
2217 "EntityValue: PEReferences forbidden in internal subset\n",
2218 tmp);
2219 ctxt->wellFormed = 0;
2220 ctxt->disableSAX = 1;
2221 }
2222 if (name != NULL)
2223 xmlFree(name);
2224 }
2225 cur++;
2226 }
2227
2228 /*
2229 * Then PEReference entities are substituted.
2230 */
2231 if (c != stop) {
2232 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2234 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2235 ctxt->wellFormed = 0;
2236 ctxt->disableSAX = 1;
2237 xmlFree(buf);
2238 } else {
2239 NEXT;
2240 /*
2241 * NOTE: 4.4.7 Bypassed
2242 * When a general entity reference appears in the EntityValue in
2243 * an entity declaration, it is bypassed and left as is.
2244 * so XML_SUBSTITUTE_REF is not set here.
2245 */
2246 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2247 0, 0, 0);
2248 if (orig != NULL)
2249 *orig = buf;
2250 else
2251 xmlFree(buf);
2252 }
2253
2254 return(ret);
2255}
2256
2257/**
2258 * xmlParseAttValue:
2259 * @ctxt: an XML parser context
2260 *
2261 * parse a value for an attribute
2262 * Note: the parser won't do substitution of entities here, this
2263 * will be handled later in xmlStringGetNodeList
2264 *
2265 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2266 * "'" ([^<&'] | Reference)* "'"
2267 *
2268 * 3.3.3 Attribute-Value Normalization:
2269 * Before the value of an attribute is passed to the application or
2270 * checked for validity, the XML processor must normalize it as follows:
2271 * - a character reference is processed by appending the referenced
2272 * character to the attribute value
2273 * - an entity reference is processed by recursively processing the
2274 * replacement text of the entity
2275 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2276 * appending #x20 to the normalized value, except that only a single
2277 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2278 * parsed entity or the literal entity value of an internal parsed entity
2279 * - other characters are processed by appending them to the normalized value
2280 * If the declared value is not CDATA, then the XML processor must further
2281 * process the normalized attribute value by discarding any leading and
2282 * trailing space (#x20) characters, and by replacing sequences of space
2283 * (#x20) characters by a single space (#x20) character.
2284 * All attributes for which no declaration has been read should be treated
2285 * by a non-validating parser as if declared CDATA.
2286 *
2287 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2288 */
2289
2290xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002291xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2292
2293xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002294xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2295 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002296 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002297 xmlChar *ret = NULL;
2298 SHRINK;
2299 GROW;
2300 in = CUR_PTR;
2301 if (*in != '"' && *in != '\'') {
2302 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2304 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2305 ctxt->wellFormed = 0;
2306 ctxt->disableSAX = 1;
2307 return(NULL);
2308 }
2309 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2310 limit = *in;
2311 ++in;
2312
2313 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2314 *in != '&' && *in != '<'
2315 ) {
2316 ++in;
2317 }
2318 if (*in != limit) {
2319 return xmlParseAttValueComplex(ctxt);
2320 }
2321 ++in;
2322 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2323 CUR_PTR = in;
2324 return ret;
2325}
2326
2327xmlChar *
2328xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2329 xmlChar limit = 0;
2330 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002331 int len = 0;
2332 int buf_size = 0;
2333 int c, l;
2334 xmlChar *current = NULL;
2335 xmlEntityPtr ent;
2336
2337
2338 SHRINK;
2339 if (NXT(0) == '"') {
2340 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2341 limit = '"';
2342 NEXT;
2343 } else if (NXT(0) == '\'') {
2344 limit = '\'';
2345 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2346 NEXT;
2347 } else {
2348 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2350 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2351 ctxt->wellFormed = 0;
2352 ctxt->disableSAX = 1;
2353 return(NULL);
2354 }
2355
2356 /*
2357 * allocate a translation buffer.
2358 */
2359 buf_size = XML_PARSER_BUFFER_SIZE;
2360 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2361 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002362 xmlGenericError(xmlGenericErrorContext,
2363 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002364 return(NULL);
2365 }
2366
2367 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002368 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002369 */
2370 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002371 while ((NXT(0) != limit) && /* checked */
2372 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002373 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002374 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002375 if (NXT(1) == '#') {
2376 int val = xmlParseCharRef(ctxt);
2377 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002378 if (ctxt->replaceEntities) {
2379 if (len > buf_size - 10) {
2380 growBuffer(buf);
2381 }
2382 buf[len++] = '&';
2383 } else {
2384 /*
2385 * The reparsing will be done in xmlStringGetNodeList()
2386 * called by the attribute() function in SAX.c
2387 */
2388 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002389
Daniel Veillard319a7422001-09-11 09:27:09 +00002390 if (len > buf_size - 10) {
2391 growBuffer(buf);
2392 }
2393 current = &buffer[0];
2394 while (*current != 0) { /* non input consuming */
2395 buf[len++] = *current++;
2396 }
Owen Taylor3473f882001-02-23 17:55:21 +00002397 }
2398 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002399 if (len > buf_size - 10) {
2400 growBuffer(buf);
2401 }
Owen Taylor3473f882001-02-23 17:55:21 +00002402 len += xmlCopyChar(0, &buf[len], val);
2403 }
2404 } else {
2405 ent = xmlParseEntityRef(ctxt);
2406 if ((ent != NULL) &&
2407 (ctxt->replaceEntities != 0)) {
2408 xmlChar *rep;
2409
2410 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2411 rep = xmlStringDecodeEntities(ctxt, ent->content,
2412 XML_SUBSTITUTE_REF, 0, 0, 0);
2413 if (rep != NULL) {
2414 current = rep;
2415 while (*current != 0) { /* non input consuming */
2416 buf[len++] = *current++;
2417 if (len > buf_size - 10) {
2418 growBuffer(buf);
2419 }
2420 }
2421 xmlFree(rep);
2422 }
2423 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002424 if (len > buf_size - 10) {
2425 growBuffer(buf);
2426 }
Owen Taylor3473f882001-02-23 17:55:21 +00002427 if (ent->content != NULL)
2428 buf[len++] = ent->content[0];
2429 }
2430 } else if (ent != NULL) {
2431 int i = xmlStrlen(ent->name);
2432 const xmlChar *cur = ent->name;
2433
2434 /*
2435 * This may look absurd but is needed to detect
2436 * entities problems
2437 */
2438 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2439 (ent->content != NULL)) {
2440 xmlChar *rep;
2441 rep = xmlStringDecodeEntities(ctxt, ent->content,
2442 XML_SUBSTITUTE_REF, 0, 0, 0);
2443 if (rep != NULL)
2444 xmlFree(rep);
2445 }
2446
2447 /*
2448 * Just output the reference
2449 */
2450 buf[len++] = '&';
2451 if (len > buf_size - i - 10) {
2452 growBuffer(buf);
2453 }
2454 for (;i > 0;i--)
2455 buf[len++] = *cur++;
2456 buf[len++] = ';';
2457 }
2458 }
2459 } else {
2460 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2461 COPY_BUF(l,buf,len,0x20);
2462 if (len > buf_size - 10) {
2463 growBuffer(buf);
2464 }
2465 } else {
2466 COPY_BUF(l,buf,len,c);
2467 if (len > buf_size - 10) {
2468 growBuffer(buf);
2469 }
2470 }
2471 NEXTL(l);
2472 }
2473 GROW;
2474 c = CUR_CHAR(l);
2475 }
2476 buf[len++] = 0;
2477 if (RAW == '<') {
2478 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2480 ctxt->sax->error(ctxt->userData,
2481 "Unescaped '<' not allowed in attributes values\n");
2482 ctxt->wellFormed = 0;
2483 ctxt->disableSAX = 1;
2484 } else if (RAW != limit) {
2485 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2487 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2488 ctxt->wellFormed = 0;
2489 ctxt->disableSAX = 1;
2490 } else
2491 NEXT;
2492 return(buf);
2493}
2494
2495/**
2496 * xmlParseSystemLiteral:
2497 * @ctxt: an XML parser context
2498 *
2499 * parse an XML Literal
2500 *
2501 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2502 *
2503 * Returns the SystemLiteral parsed or NULL
2504 */
2505
2506xmlChar *
2507xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2508 xmlChar *buf = NULL;
2509 int len = 0;
2510 int size = XML_PARSER_BUFFER_SIZE;
2511 int cur, l;
2512 xmlChar stop;
2513 int state = ctxt->instate;
2514 int count = 0;
2515
2516 SHRINK;
2517 if (RAW == '"') {
2518 NEXT;
2519 stop = '"';
2520 } else if (RAW == '\'') {
2521 NEXT;
2522 stop = '\'';
2523 } else {
2524 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2526 ctxt->sax->error(ctxt->userData,
2527 "SystemLiteral \" or ' expected\n");
2528 ctxt->wellFormed = 0;
2529 ctxt->disableSAX = 1;
2530 return(NULL);
2531 }
2532
2533 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2534 if (buf == NULL) {
2535 xmlGenericError(xmlGenericErrorContext,
2536 "malloc of %d byte failed\n", size);
2537 return(NULL);
2538 }
2539 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2540 cur = CUR_CHAR(l);
2541 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2542 if (len + 5 >= size) {
2543 size *= 2;
2544 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2545 if (buf == NULL) {
2546 xmlGenericError(xmlGenericErrorContext,
2547 "realloc of %d byte failed\n", size);
2548 ctxt->instate = (xmlParserInputState) state;
2549 return(NULL);
2550 }
2551 }
2552 count++;
2553 if (count > 50) {
2554 GROW;
2555 count = 0;
2556 }
2557 COPY_BUF(l,buf,len,cur);
2558 NEXTL(l);
2559 cur = CUR_CHAR(l);
2560 if (cur == 0) {
2561 GROW;
2562 SHRINK;
2563 cur = CUR_CHAR(l);
2564 }
2565 }
2566 buf[len] = 0;
2567 ctxt->instate = (xmlParserInputState) state;
2568 if (!IS_CHAR(cur)) {
2569 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2571 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2572 ctxt->wellFormed = 0;
2573 ctxt->disableSAX = 1;
2574 } else {
2575 NEXT;
2576 }
2577 return(buf);
2578}
2579
2580/**
2581 * xmlParsePubidLiteral:
2582 * @ctxt: an XML parser context
2583 *
2584 * parse an XML public literal
2585 *
2586 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2587 *
2588 * Returns the PubidLiteral parsed or NULL.
2589 */
2590
2591xmlChar *
2592xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2593 xmlChar *buf = NULL;
2594 int len = 0;
2595 int size = XML_PARSER_BUFFER_SIZE;
2596 xmlChar cur;
2597 xmlChar stop;
2598 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002599 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002600
2601 SHRINK;
2602 if (RAW == '"') {
2603 NEXT;
2604 stop = '"';
2605 } else if (RAW == '\'') {
2606 NEXT;
2607 stop = '\'';
2608 } else {
2609 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2611 ctxt->sax->error(ctxt->userData,
2612 "SystemLiteral \" or ' expected\n");
2613 ctxt->wellFormed = 0;
2614 ctxt->disableSAX = 1;
2615 return(NULL);
2616 }
2617 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2618 if (buf == NULL) {
2619 xmlGenericError(xmlGenericErrorContext,
2620 "malloc of %d byte failed\n", size);
2621 return(NULL);
2622 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002623 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002624 cur = CUR;
2625 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2626 if (len + 1 >= size) {
2627 size *= 2;
2628 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2629 if (buf == NULL) {
2630 xmlGenericError(xmlGenericErrorContext,
2631 "realloc of %d byte failed\n", size);
2632 return(NULL);
2633 }
2634 }
2635 buf[len++] = cur;
2636 count++;
2637 if (count > 50) {
2638 GROW;
2639 count = 0;
2640 }
2641 NEXT;
2642 cur = CUR;
2643 if (cur == 0) {
2644 GROW;
2645 SHRINK;
2646 cur = CUR;
2647 }
2648 }
2649 buf[len] = 0;
2650 if (cur != stop) {
2651 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2653 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2654 ctxt->wellFormed = 0;
2655 ctxt->disableSAX = 1;
2656 } else {
2657 NEXT;
2658 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002659 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002660 return(buf);
2661}
2662
Daniel Veillard48b2f892001-02-25 16:11:03 +00002663void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002664/**
2665 * xmlParseCharData:
2666 * @ctxt: an XML parser context
2667 * @cdata: int indicating whether we are within a CDATA section
2668 *
2669 * parse a CharData section.
2670 * if we are within a CDATA section ']]>' marks an end of section.
2671 *
2672 * The right angle bracket (>) may be represented using the string "&gt;",
2673 * and must, for compatibility, be escaped using "&gt;" or a character
2674 * reference when it appears in the string "]]>" in content, when that
2675 * string is not marking the end of a CDATA section.
2676 *
2677 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2678 */
2679
2680void
2681xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002682 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002683 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002684 int line = ctxt->input->line;
2685 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002686
2687 SHRINK;
2688 GROW;
2689 /*
2690 * Accelerated common case where input don't need to be
2691 * modified before passing it to the handler.
2692 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002693 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002694 in = ctxt->input->cur;
2695 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002696get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002697 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2698 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002699 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002700 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002701 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002702 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002703 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002704 ctxt->input->line++;
2705 in++;
2706 }
2707 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002708 }
2709 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002710 if ((in[1] == ']') && (in[2] == '>')) {
2711 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2713 ctxt->sax->error(ctxt->userData,
2714 "Sequence ']]>' not allowed in content\n");
2715 ctxt->input->cur = in;
2716 ctxt->wellFormed = 0;
2717 ctxt->disableSAX = 1;
2718 return;
2719 }
2720 in++;
2721 goto get_more;
2722 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002723 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002724 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002725 if (IS_BLANK(*ctxt->input->cur)) {
2726 const xmlChar *tmp = ctxt->input->cur;
2727 ctxt->input->cur = in;
2728 if (areBlanks(ctxt, tmp, nbchar)) {
2729 if (ctxt->sax->ignorableWhitespace != NULL)
2730 ctxt->sax->ignorableWhitespace(ctxt->userData,
2731 tmp, nbchar);
2732 } else {
2733 if (ctxt->sax->characters != NULL)
2734 ctxt->sax->characters(ctxt->userData,
2735 tmp, nbchar);
2736 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002737 line = ctxt->input->line;
2738 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002739 } else {
2740 if (ctxt->sax->characters != NULL)
2741 ctxt->sax->characters(ctxt->userData,
2742 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002743 line = ctxt->input->line;
2744 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002745 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002746 }
2747 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002748 if (*in == 0xD) {
2749 in++;
2750 if (*in == 0xA) {
2751 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002752 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002753 ctxt->input->line++;
2754 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002755 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002756 in--;
2757 }
2758 if (*in == '<') {
2759 return;
2760 }
2761 if (*in == '&') {
2762 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002763 }
2764 SHRINK;
2765 GROW;
2766 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002767 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002768 nbchar = 0;
2769 }
Daniel Veillard50582112001-03-26 22:52:16 +00002770 ctxt->input->line = line;
2771 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002772 xmlParseCharDataComplex(ctxt, cdata);
2773}
2774
2775void
2776xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002777 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2778 int nbchar = 0;
2779 int cur, l;
2780 int count = 0;
2781
2782 SHRINK;
2783 GROW;
2784 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002785 while ((cur != '<') && /* checked */
2786 (cur != '&') &&
2787 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002788 if ((cur == ']') && (NXT(1) == ']') &&
2789 (NXT(2) == '>')) {
2790 if (cdata) break;
2791 else {
2792 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2794 ctxt->sax->error(ctxt->userData,
2795 "Sequence ']]>' not allowed in content\n");
2796 /* Should this be relaxed ??? I see a "must here */
2797 ctxt->wellFormed = 0;
2798 ctxt->disableSAX = 1;
2799 }
2800 }
2801 COPY_BUF(l,buf,nbchar,cur);
2802 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2803 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002804 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002805 */
2806 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2807 if (areBlanks(ctxt, buf, nbchar)) {
2808 if (ctxt->sax->ignorableWhitespace != NULL)
2809 ctxt->sax->ignorableWhitespace(ctxt->userData,
2810 buf, nbchar);
2811 } else {
2812 if (ctxt->sax->characters != NULL)
2813 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2814 }
2815 }
2816 nbchar = 0;
2817 }
2818 count++;
2819 if (count > 50) {
2820 GROW;
2821 count = 0;
2822 }
2823 NEXTL(l);
2824 cur = CUR_CHAR(l);
2825 }
2826 if (nbchar != 0) {
2827 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002828 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002829 */
2830 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2831 if (areBlanks(ctxt, buf, nbchar)) {
2832 if (ctxt->sax->ignorableWhitespace != NULL)
2833 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2834 } else {
2835 if (ctxt->sax->characters != NULL)
2836 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2837 }
2838 }
2839 }
2840}
2841
2842/**
2843 * xmlParseExternalID:
2844 * @ctxt: an XML parser context
2845 * @publicID: a xmlChar** receiving PubidLiteral
2846 * @strict: indicate whether we should restrict parsing to only
2847 * production [75], see NOTE below
2848 *
2849 * Parse an External ID or a Public ID
2850 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002851 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002852 * 'PUBLIC' S PubidLiteral S SystemLiteral
2853 *
2854 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2855 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2856 *
2857 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2858 *
2859 * Returns the function returns SystemLiteral and in the second
2860 * case publicID receives PubidLiteral, is strict is off
2861 * it is possible to return NULL and have publicID set.
2862 */
2863
2864xmlChar *
2865xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2866 xmlChar *URI = NULL;
2867
2868 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002869
2870 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002871 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2872 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2873 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2874 SKIP(6);
2875 if (!IS_BLANK(CUR)) {
2876 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2878 ctxt->sax->error(ctxt->userData,
2879 "Space required after 'SYSTEM'\n");
2880 ctxt->wellFormed = 0;
2881 ctxt->disableSAX = 1;
2882 }
2883 SKIP_BLANKS;
2884 URI = xmlParseSystemLiteral(ctxt);
2885 if (URI == NULL) {
2886 ctxt->errNo = XML_ERR_URI_REQUIRED;
2887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2888 ctxt->sax->error(ctxt->userData,
2889 "xmlParseExternalID: SYSTEM, no URI\n");
2890 ctxt->wellFormed = 0;
2891 ctxt->disableSAX = 1;
2892 }
2893 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2894 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2895 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2896 SKIP(6);
2897 if (!IS_BLANK(CUR)) {
2898 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2900 ctxt->sax->error(ctxt->userData,
2901 "Space required after 'PUBLIC'\n");
2902 ctxt->wellFormed = 0;
2903 ctxt->disableSAX = 1;
2904 }
2905 SKIP_BLANKS;
2906 *publicID = xmlParsePubidLiteral(ctxt);
2907 if (*publicID == NULL) {
2908 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2910 ctxt->sax->error(ctxt->userData,
2911 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2912 ctxt->wellFormed = 0;
2913 ctxt->disableSAX = 1;
2914 }
2915 if (strict) {
2916 /*
2917 * We don't handle [83] so "S SystemLiteral" is required.
2918 */
2919 if (!IS_BLANK(CUR)) {
2920 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2922 ctxt->sax->error(ctxt->userData,
2923 "Space required after the Public Identifier\n");
2924 ctxt->wellFormed = 0;
2925 ctxt->disableSAX = 1;
2926 }
2927 } else {
2928 /*
2929 * We handle [83] so we return immediately, if
2930 * "S SystemLiteral" is not detected. From a purely parsing
2931 * point of view that's a nice mess.
2932 */
2933 const xmlChar *ptr;
2934 GROW;
2935
2936 ptr = CUR_PTR;
2937 if (!IS_BLANK(*ptr)) return(NULL);
2938
2939 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2940 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2941 }
2942 SKIP_BLANKS;
2943 URI = xmlParseSystemLiteral(ctxt);
2944 if (URI == NULL) {
2945 ctxt->errNo = XML_ERR_URI_REQUIRED;
2946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2947 ctxt->sax->error(ctxt->userData,
2948 "xmlParseExternalID: PUBLIC, no URI\n");
2949 ctxt->wellFormed = 0;
2950 ctxt->disableSAX = 1;
2951 }
2952 }
2953 return(URI);
2954}
2955
2956/**
2957 * xmlParseComment:
2958 * @ctxt: an XML parser context
2959 *
2960 * Skip an XML (SGML) comment <!-- .... -->
2961 * The spec says that "For compatibility, the string "--" (double-hyphen)
2962 * must not occur within comments. "
2963 *
2964 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2965 */
2966void
2967xmlParseComment(xmlParserCtxtPtr ctxt) {
2968 xmlChar *buf = NULL;
2969 int len;
2970 int size = XML_PARSER_BUFFER_SIZE;
2971 int q, ql;
2972 int r, rl;
2973 int cur, l;
2974 xmlParserInputState state;
2975 xmlParserInputPtr input = ctxt->input;
2976 int count = 0;
2977
2978 /*
2979 * Check that there is a comment right here.
2980 */
2981 if ((RAW != '<') || (NXT(1) != '!') ||
2982 (NXT(2) != '-') || (NXT(3) != '-')) return;
2983
2984 state = ctxt->instate;
2985 ctxt->instate = XML_PARSER_COMMENT;
2986 SHRINK;
2987 SKIP(4);
2988 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2989 if (buf == NULL) {
2990 xmlGenericError(xmlGenericErrorContext,
2991 "malloc of %d byte failed\n", size);
2992 ctxt->instate = state;
2993 return;
2994 }
2995 q = CUR_CHAR(ql);
2996 NEXTL(ql);
2997 r = CUR_CHAR(rl);
2998 NEXTL(rl);
2999 cur = CUR_CHAR(l);
3000 len = 0;
3001 while (IS_CHAR(cur) && /* checked */
3002 ((cur != '>') ||
3003 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003004 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003005 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3007 ctxt->sax->error(ctxt->userData,
3008 "Comment must not contain '--' (double-hyphen)`\n");
3009 ctxt->wellFormed = 0;
3010 ctxt->disableSAX = 1;
3011 }
3012 if (len + 5 >= size) {
3013 size *= 2;
3014 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3015 if (buf == NULL) {
3016 xmlGenericError(xmlGenericErrorContext,
3017 "realloc of %d byte failed\n", size);
3018 ctxt->instate = state;
3019 return;
3020 }
3021 }
3022 COPY_BUF(ql,buf,len,q);
3023 q = r;
3024 ql = rl;
3025 r = cur;
3026 rl = l;
3027
3028 count++;
3029 if (count > 50) {
3030 GROW;
3031 count = 0;
3032 }
3033 NEXTL(l);
3034 cur = CUR_CHAR(l);
3035 if (cur == 0) {
3036 SHRINK;
3037 GROW;
3038 cur = CUR_CHAR(l);
3039 }
3040 }
3041 buf[len] = 0;
3042 if (!IS_CHAR(cur)) {
3043 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3045 ctxt->sax->error(ctxt->userData,
3046 "Comment not terminated \n<!--%.50s\n", buf);
3047 ctxt->wellFormed = 0;
3048 ctxt->disableSAX = 1;
3049 xmlFree(buf);
3050 } else {
3051 if (input != ctxt->input) {
3052 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3054 ctxt->sax->error(ctxt->userData,
3055"Comment doesn't start and stop in the same entity\n");
3056 ctxt->wellFormed = 0;
3057 ctxt->disableSAX = 1;
3058 }
3059 NEXT;
3060 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3061 (!ctxt->disableSAX))
3062 ctxt->sax->comment(ctxt->userData, buf);
3063 xmlFree(buf);
3064 }
3065 ctxt->instate = state;
3066}
3067
3068/**
3069 * xmlParsePITarget:
3070 * @ctxt: an XML parser context
3071 *
3072 * parse the name of a PI
3073 *
3074 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3075 *
3076 * Returns the PITarget name or NULL
3077 */
3078
3079xmlChar *
3080xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3081 xmlChar *name;
3082
3083 name = xmlParseName(ctxt);
3084 if ((name != NULL) &&
3085 ((name[0] == 'x') || (name[0] == 'X')) &&
3086 ((name[1] == 'm') || (name[1] == 'M')) &&
3087 ((name[2] == 'l') || (name[2] == 'L'))) {
3088 int i;
3089 if ((name[0] == 'x') && (name[1] == 'm') &&
3090 (name[2] == 'l') && (name[3] == 0)) {
3091 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3093 ctxt->sax->error(ctxt->userData,
3094 "XML declaration allowed only at the start of the document\n");
3095 ctxt->wellFormed = 0;
3096 ctxt->disableSAX = 1;
3097 return(name);
3098 } else if (name[3] == 0) {
3099 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3100 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3101 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3102 ctxt->wellFormed = 0;
3103 ctxt->disableSAX = 1;
3104 return(name);
3105 }
3106 for (i = 0;;i++) {
3107 if (xmlW3CPIs[i] == NULL) break;
3108 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3109 return(name);
3110 }
3111 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3112 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3113 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003114 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003115 }
3116 }
3117 return(name);
3118}
3119
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003120#ifdef LIBXML_CATALOG_ENABLED
3121/**
3122 * xmlParseCatalogPI:
3123 * @ctxt: an XML parser context
3124 * @catalog: the PI value string
3125 *
3126 * parse an XML Catalog Processing Instruction.
3127 *
3128 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3129 *
3130 * Occurs only if allowed by the user and if happening in the Misc
3131 * part of the document before any doctype informations
3132 * This will add the given catalog to the parsing context in order
3133 * to be used if there is a resolution need further down in the document
3134 */
3135
3136static void
3137xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3138 xmlChar *URL = NULL;
3139 const xmlChar *tmp, *base;
3140 xmlChar marker;
3141
3142 tmp = catalog;
3143 while (IS_BLANK(*tmp)) tmp++;
3144 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3145 goto error;
3146 tmp += 7;
3147 while (IS_BLANK(*tmp)) tmp++;
3148 if (*tmp != '=') {
3149 return;
3150 }
3151 tmp++;
3152 while (IS_BLANK(*tmp)) tmp++;
3153 marker = *tmp;
3154 if ((marker != '\'') && (marker != '"'))
3155 goto error;
3156 tmp++;
3157 base = tmp;
3158 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3159 if (*tmp == 0)
3160 goto error;
3161 URL = xmlStrndup(base, tmp - base);
3162 tmp++;
3163 while (IS_BLANK(*tmp)) tmp++;
3164 if (*tmp != 0)
3165 goto error;
3166
3167 if (URL != NULL) {
3168 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3169 xmlFree(URL);
3170 }
3171 return;
3172
3173error:
3174 ctxt->errNo = XML_WAR_CATALOG_PI;
3175 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3176 ctxt->sax->warning(ctxt->userData,
3177 "Catalog PI syntax error: %s\n", catalog);
3178 if (URL != NULL)
3179 xmlFree(URL);
3180}
3181#endif
3182
Owen Taylor3473f882001-02-23 17:55:21 +00003183/**
3184 * xmlParsePI:
3185 * @ctxt: an XML parser context
3186 *
3187 * parse an XML Processing Instruction.
3188 *
3189 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3190 *
3191 * The processing is transfered to SAX once parsed.
3192 */
3193
3194void
3195xmlParsePI(xmlParserCtxtPtr ctxt) {
3196 xmlChar *buf = NULL;
3197 int len = 0;
3198 int size = XML_PARSER_BUFFER_SIZE;
3199 int cur, l;
3200 xmlChar *target;
3201 xmlParserInputState state;
3202 int count = 0;
3203
3204 if ((RAW == '<') && (NXT(1) == '?')) {
3205 xmlParserInputPtr input = ctxt->input;
3206 state = ctxt->instate;
3207 ctxt->instate = XML_PARSER_PI;
3208 /*
3209 * this is a Processing Instruction.
3210 */
3211 SKIP(2);
3212 SHRINK;
3213
3214 /*
3215 * Parse the target name and check for special support like
3216 * namespace.
3217 */
3218 target = xmlParsePITarget(ctxt);
3219 if (target != NULL) {
3220 if ((RAW == '?') && (NXT(1) == '>')) {
3221 if (input != ctxt->input) {
3222 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3224 ctxt->sax->error(ctxt->userData,
3225 "PI declaration doesn't start and stop in the same entity\n");
3226 ctxt->wellFormed = 0;
3227 ctxt->disableSAX = 1;
3228 }
3229 SKIP(2);
3230
3231 /*
3232 * SAX: PI detected.
3233 */
3234 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3235 (ctxt->sax->processingInstruction != NULL))
3236 ctxt->sax->processingInstruction(ctxt->userData,
3237 target, NULL);
3238 ctxt->instate = state;
3239 xmlFree(target);
3240 return;
3241 }
3242 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3243 if (buf == NULL) {
3244 xmlGenericError(xmlGenericErrorContext,
3245 "malloc of %d byte failed\n", size);
3246 ctxt->instate = state;
3247 return;
3248 }
3249 cur = CUR;
3250 if (!IS_BLANK(cur)) {
3251 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3253 ctxt->sax->error(ctxt->userData,
3254 "xmlParsePI: PI %s space expected\n", target);
3255 ctxt->wellFormed = 0;
3256 ctxt->disableSAX = 1;
3257 }
3258 SKIP_BLANKS;
3259 cur = CUR_CHAR(l);
3260 while (IS_CHAR(cur) && /* checked */
3261 ((cur != '?') || (NXT(1) != '>'))) {
3262 if (len + 5 >= size) {
3263 size *= 2;
3264 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3265 if (buf == NULL) {
3266 xmlGenericError(xmlGenericErrorContext,
3267 "realloc of %d byte failed\n", size);
3268 ctxt->instate = state;
3269 return;
3270 }
3271 }
3272 count++;
3273 if (count > 50) {
3274 GROW;
3275 count = 0;
3276 }
3277 COPY_BUF(l,buf,len,cur);
3278 NEXTL(l);
3279 cur = CUR_CHAR(l);
3280 if (cur == 0) {
3281 SHRINK;
3282 GROW;
3283 cur = CUR_CHAR(l);
3284 }
3285 }
3286 buf[len] = 0;
3287 if (cur != '?') {
3288 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3290 ctxt->sax->error(ctxt->userData,
3291 "xmlParsePI: PI %s never end ...\n", target);
3292 ctxt->wellFormed = 0;
3293 ctxt->disableSAX = 1;
3294 } else {
3295 if (input != ctxt->input) {
3296 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3298 ctxt->sax->error(ctxt->userData,
3299 "PI declaration doesn't start and stop in the same entity\n");
3300 ctxt->wellFormed = 0;
3301 ctxt->disableSAX = 1;
3302 }
3303 SKIP(2);
3304
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003305#ifdef LIBXML_CATALOG_ENABLED
3306 if (((state == XML_PARSER_MISC) ||
3307 (state == XML_PARSER_START)) &&
3308 (xmlStrEqual(target, XML_CATALOG_PI))) {
3309 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3310 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3311 (allow == XML_CATA_ALLOW_ALL))
3312 xmlParseCatalogPI(ctxt, buf);
3313 }
3314#endif
3315
3316
Owen Taylor3473f882001-02-23 17:55:21 +00003317 /*
3318 * SAX: PI detected.
3319 */
3320 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3321 (ctxt->sax->processingInstruction != NULL))
3322 ctxt->sax->processingInstruction(ctxt->userData,
3323 target, buf);
3324 }
3325 xmlFree(buf);
3326 xmlFree(target);
3327 } else {
3328 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3330 ctxt->sax->error(ctxt->userData,
3331 "xmlParsePI : no target name\n");
3332 ctxt->wellFormed = 0;
3333 ctxt->disableSAX = 1;
3334 }
3335 ctxt->instate = state;
3336 }
3337}
3338
3339/**
3340 * xmlParseNotationDecl:
3341 * @ctxt: an XML parser context
3342 *
3343 * parse a notation declaration
3344 *
3345 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3346 *
3347 * Hence there is actually 3 choices:
3348 * 'PUBLIC' S PubidLiteral
3349 * 'PUBLIC' S PubidLiteral S SystemLiteral
3350 * and 'SYSTEM' S SystemLiteral
3351 *
3352 * See the NOTE on xmlParseExternalID().
3353 */
3354
3355void
3356xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3357 xmlChar *name;
3358 xmlChar *Pubid;
3359 xmlChar *Systemid;
3360
3361 if ((RAW == '<') && (NXT(1) == '!') &&
3362 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3363 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3364 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3365 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3366 xmlParserInputPtr input = ctxt->input;
3367 SHRINK;
3368 SKIP(10);
3369 if (!IS_BLANK(CUR)) {
3370 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3372 ctxt->sax->error(ctxt->userData,
3373 "Space required after '<!NOTATION'\n");
3374 ctxt->wellFormed = 0;
3375 ctxt->disableSAX = 1;
3376 return;
3377 }
3378 SKIP_BLANKS;
3379
Daniel Veillard76d66f42001-05-16 21:05:17 +00003380 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003381 if (name == NULL) {
3382 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3384 ctxt->sax->error(ctxt->userData,
3385 "NOTATION: Name expected here\n");
3386 ctxt->wellFormed = 0;
3387 ctxt->disableSAX = 1;
3388 return;
3389 }
3390 if (!IS_BLANK(CUR)) {
3391 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3393 ctxt->sax->error(ctxt->userData,
3394 "Space required after the NOTATION name'\n");
3395 ctxt->wellFormed = 0;
3396 ctxt->disableSAX = 1;
3397 return;
3398 }
3399 SKIP_BLANKS;
3400
3401 /*
3402 * Parse the IDs.
3403 */
3404 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3405 SKIP_BLANKS;
3406
3407 if (RAW == '>') {
3408 if (input != ctxt->input) {
3409 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3411 ctxt->sax->error(ctxt->userData,
3412"Notation declaration doesn't start and stop in the same entity\n");
3413 ctxt->wellFormed = 0;
3414 ctxt->disableSAX = 1;
3415 }
3416 NEXT;
3417 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3418 (ctxt->sax->notationDecl != NULL))
3419 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3420 } else {
3421 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3423 ctxt->sax->error(ctxt->userData,
3424 "'>' required to close NOTATION declaration\n");
3425 ctxt->wellFormed = 0;
3426 ctxt->disableSAX = 1;
3427 }
3428 xmlFree(name);
3429 if (Systemid != NULL) xmlFree(Systemid);
3430 if (Pubid != NULL) xmlFree(Pubid);
3431 }
3432}
3433
3434/**
3435 * xmlParseEntityDecl:
3436 * @ctxt: an XML parser context
3437 *
3438 * parse <!ENTITY declarations
3439 *
3440 * [70] EntityDecl ::= GEDecl | PEDecl
3441 *
3442 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3443 *
3444 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3445 *
3446 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3447 *
3448 * [74] PEDef ::= EntityValue | ExternalID
3449 *
3450 * [76] NDataDecl ::= S 'NDATA' S Name
3451 *
3452 * [ VC: Notation Declared ]
3453 * The Name must match the declared name of a notation.
3454 */
3455
3456void
3457xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3458 xmlChar *name = NULL;
3459 xmlChar *value = NULL;
3460 xmlChar *URI = NULL, *literal = NULL;
3461 xmlChar *ndata = NULL;
3462 int isParameter = 0;
3463 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003464 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003465
3466 GROW;
3467 if ((RAW == '<') && (NXT(1) == '!') &&
3468 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3469 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3470 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3471 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003472 SHRINK;
3473 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003474 skipped = SKIP_BLANKS;
3475 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003476 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3478 ctxt->sax->error(ctxt->userData,
3479 "Space required after '<!ENTITY'\n");
3480 ctxt->wellFormed = 0;
3481 ctxt->disableSAX = 1;
3482 }
Owen Taylor3473f882001-02-23 17:55:21 +00003483
3484 if (RAW == '%') {
3485 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003486 skipped = SKIP_BLANKS;
3487 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003488 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3490 ctxt->sax->error(ctxt->userData,
3491 "Space required after '%'\n");
3492 ctxt->wellFormed = 0;
3493 ctxt->disableSAX = 1;
3494 }
Owen Taylor3473f882001-02-23 17:55:21 +00003495 isParameter = 1;
3496 }
3497
Daniel Veillard76d66f42001-05-16 21:05:17 +00003498 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003499 if (name == NULL) {
3500 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3502 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3503 ctxt->wellFormed = 0;
3504 ctxt->disableSAX = 1;
3505 return;
3506 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003507 skipped = SKIP_BLANKS;
3508 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003509 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3511 ctxt->sax->error(ctxt->userData,
3512 "Space required after the entity name\n");
3513 ctxt->wellFormed = 0;
3514 ctxt->disableSAX = 1;
3515 }
Owen Taylor3473f882001-02-23 17:55:21 +00003516
Daniel Veillardf5582f12002-06-11 10:08:16 +00003517 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003518 /*
3519 * handle the various case of definitions...
3520 */
3521 if (isParameter) {
3522 if ((RAW == '"') || (RAW == '\'')) {
3523 value = xmlParseEntityValue(ctxt, &orig);
3524 if (value) {
3525 if ((ctxt->sax != NULL) &&
3526 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3527 ctxt->sax->entityDecl(ctxt->userData, name,
3528 XML_INTERNAL_PARAMETER_ENTITY,
3529 NULL, NULL, value);
3530 }
3531 } else {
3532 URI = xmlParseExternalID(ctxt, &literal, 1);
3533 if ((URI == NULL) && (literal == NULL)) {
3534 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3536 ctxt->sax->error(ctxt->userData,
3537 "Entity value required\n");
3538 ctxt->wellFormed = 0;
3539 ctxt->disableSAX = 1;
3540 }
3541 if (URI) {
3542 xmlURIPtr uri;
3543
3544 uri = xmlParseURI((const char *) URI);
3545 if (uri == NULL) {
3546 ctxt->errNo = XML_ERR_INVALID_URI;
3547 if ((ctxt->sax != NULL) &&
3548 (!ctxt->disableSAX) &&
3549 (ctxt->sax->error != NULL))
3550 ctxt->sax->error(ctxt->userData,
3551 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003552 /*
3553 * This really ought to be a well formedness error
3554 * but the XML Core WG decided otherwise c.f. issue
3555 * E26 of the XML erratas.
3556 */
Owen Taylor3473f882001-02-23 17:55:21 +00003557 } else {
3558 if (uri->fragment != NULL) {
3559 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3560 if ((ctxt->sax != NULL) &&
3561 (!ctxt->disableSAX) &&
3562 (ctxt->sax->error != NULL))
3563 ctxt->sax->error(ctxt->userData,
3564 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003565 /*
3566 * Okay this is foolish to block those but not
3567 * invalid URIs.
3568 */
Owen Taylor3473f882001-02-23 17:55:21 +00003569 ctxt->wellFormed = 0;
3570 } else {
3571 if ((ctxt->sax != NULL) &&
3572 (!ctxt->disableSAX) &&
3573 (ctxt->sax->entityDecl != NULL))
3574 ctxt->sax->entityDecl(ctxt->userData, name,
3575 XML_EXTERNAL_PARAMETER_ENTITY,
3576 literal, URI, NULL);
3577 }
3578 xmlFreeURI(uri);
3579 }
3580 }
3581 }
3582 } else {
3583 if ((RAW == '"') || (RAW == '\'')) {
3584 value = xmlParseEntityValue(ctxt, &orig);
3585 if ((ctxt->sax != NULL) &&
3586 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3587 ctxt->sax->entityDecl(ctxt->userData, name,
3588 XML_INTERNAL_GENERAL_ENTITY,
3589 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003590 /*
3591 * For expat compatibility in SAX mode.
3592 */
3593 if ((ctxt->myDoc == NULL) ||
3594 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3595 if (ctxt->myDoc == NULL) {
3596 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3597 }
3598 if (ctxt->myDoc->intSubset == NULL)
3599 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3600 BAD_CAST "fake", NULL, NULL);
3601
3602 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3603 NULL, NULL, value);
3604 }
Owen Taylor3473f882001-02-23 17:55:21 +00003605 } else {
3606 URI = xmlParseExternalID(ctxt, &literal, 1);
3607 if ((URI == NULL) && (literal == NULL)) {
3608 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3610 ctxt->sax->error(ctxt->userData,
3611 "Entity value required\n");
3612 ctxt->wellFormed = 0;
3613 ctxt->disableSAX = 1;
3614 }
3615 if (URI) {
3616 xmlURIPtr uri;
3617
3618 uri = xmlParseURI((const char *)URI);
3619 if (uri == NULL) {
3620 ctxt->errNo = XML_ERR_INVALID_URI;
3621 if ((ctxt->sax != NULL) &&
3622 (!ctxt->disableSAX) &&
3623 (ctxt->sax->error != NULL))
3624 ctxt->sax->error(ctxt->userData,
3625 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003626 /*
3627 * This really ought to be a well formedness error
3628 * but the XML Core WG decided otherwise c.f. issue
3629 * E26 of the XML erratas.
3630 */
Owen Taylor3473f882001-02-23 17:55:21 +00003631 } else {
3632 if (uri->fragment != NULL) {
3633 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3634 if ((ctxt->sax != NULL) &&
3635 (!ctxt->disableSAX) &&
3636 (ctxt->sax->error != NULL))
3637 ctxt->sax->error(ctxt->userData,
3638 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003639 /*
3640 * Okay this is foolish to block those but not
3641 * invalid URIs.
3642 */
Owen Taylor3473f882001-02-23 17:55:21 +00003643 ctxt->wellFormed = 0;
3644 }
3645 xmlFreeURI(uri);
3646 }
3647 }
3648 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3649 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3651 ctxt->sax->error(ctxt->userData,
3652 "Space required before 'NDATA'\n");
3653 ctxt->wellFormed = 0;
3654 ctxt->disableSAX = 1;
3655 }
3656 SKIP_BLANKS;
3657 if ((RAW == 'N') && (NXT(1) == 'D') &&
3658 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3659 (NXT(4) == 'A')) {
3660 SKIP(5);
3661 if (!IS_BLANK(CUR)) {
3662 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3664 ctxt->sax->error(ctxt->userData,
3665 "Space required after 'NDATA'\n");
3666 ctxt->wellFormed = 0;
3667 ctxt->disableSAX = 1;
3668 }
3669 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003670 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003671 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3672 (ctxt->sax->unparsedEntityDecl != NULL))
3673 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3674 literal, URI, ndata);
3675 } else {
3676 if ((ctxt->sax != NULL) &&
3677 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3678 ctxt->sax->entityDecl(ctxt->userData, name,
3679 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3680 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003681 /*
3682 * For expat compatibility in SAX mode.
3683 * assuming the entity repalcement was asked for
3684 */
3685 if ((ctxt->replaceEntities != 0) &&
3686 ((ctxt->myDoc == NULL) ||
3687 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3688 if (ctxt->myDoc == NULL) {
3689 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3690 }
3691
3692 if (ctxt->myDoc->intSubset == NULL)
3693 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3694 BAD_CAST "fake", NULL, NULL);
3695 entityDecl(ctxt, name,
3696 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3697 literal, URI, NULL);
3698 }
Owen Taylor3473f882001-02-23 17:55:21 +00003699 }
3700 }
3701 }
3702 SKIP_BLANKS;
3703 if (RAW != '>') {
3704 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3706 ctxt->sax->error(ctxt->userData,
3707 "xmlParseEntityDecl: entity %s not terminated\n", name);
3708 ctxt->wellFormed = 0;
3709 ctxt->disableSAX = 1;
3710 } else {
3711 if (input != ctxt->input) {
3712 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3714 ctxt->sax->error(ctxt->userData,
3715"Entity declaration doesn't start and stop in the same entity\n");
3716 ctxt->wellFormed = 0;
3717 ctxt->disableSAX = 1;
3718 }
3719 NEXT;
3720 }
3721 if (orig != NULL) {
3722 /*
3723 * Ugly mechanism to save the raw entity value.
3724 */
3725 xmlEntityPtr cur = NULL;
3726
3727 if (isParameter) {
3728 if ((ctxt->sax != NULL) &&
3729 (ctxt->sax->getParameterEntity != NULL))
3730 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3731 } else {
3732 if ((ctxt->sax != NULL) &&
3733 (ctxt->sax->getEntity != NULL))
3734 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003735 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3736 cur = getEntity(ctxt, name);
3737 }
Owen Taylor3473f882001-02-23 17:55:21 +00003738 }
3739 if (cur != NULL) {
3740 if (cur->orig != NULL)
3741 xmlFree(orig);
3742 else
3743 cur->orig = orig;
3744 } else
3745 xmlFree(orig);
3746 }
3747 if (name != NULL) xmlFree(name);
3748 if (value != NULL) xmlFree(value);
3749 if (URI != NULL) xmlFree(URI);
3750 if (literal != NULL) xmlFree(literal);
3751 if (ndata != NULL) xmlFree(ndata);
3752 }
3753}
3754
3755/**
3756 * xmlParseDefaultDecl:
3757 * @ctxt: an XML parser context
3758 * @value: Receive a possible fixed default value for the attribute
3759 *
3760 * Parse an attribute default declaration
3761 *
3762 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3763 *
3764 * [ VC: Required Attribute ]
3765 * if the default declaration is the keyword #REQUIRED, then the
3766 * attribute must be specified for all elements of the type in the
3767 * attribute-list declaration.
3768 *
3769 * [ VC: Attribute Default Legal ]
3770 * The declared default value must meet the lexical constraints of
3771 * the declared attribute type c.f. xmlValidateAttributeDecl()
3772 *
3773 * [ VC: Fixed Attribute Default ]
3774 * if an attribute has a default value declared with the #FIXED
3775 * keyword, instances of that attribute must match the default value.
3776 *
3777 * [ WFC: No < in Attribute Values ]
3778 * handled in xmlParseAttValue()
3779 *
3780 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3781 * or XML_ATTRIBUTE_FIXED.
3782 */
3783
3784int
3785xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3786 int val;
3787 xmlChar *ret;
3788
3789 *value = NULL;
3790 if ((RAW == '#') && (NXT(1) == 'R') &&
3791 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3792 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3793 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3794 (NXT(8) == 'D')) {
3795 SKIP(9);
3796 return(XML_ATTRIBUTE_REQUIRED);
3797 }
3798 if ((RAW == '#') && (NXT(1) == 'I') &&
3799 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3800 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3801 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3802 SKIP(8);
3803 return(XML_ATTRIBUTE_IMPLIED);
3804 }
3805 val = XML_ATTRIBUTE_NONE;
3806 if ((RAW == '#') && (NXT(1) == 'F') &&
3807 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3808 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3809 SKIP(6);
3810 val = XML_ATTRIBUTE_FIXED;
3811 if (!IS_BLANK(CUR)) {
3812 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3814 ctxt->sax->error(ctxt->userData,
3815 "Space required after '#FIXED'\n");
3816 ctxt->wellFormed = 0;
3817 ctxt->disableSAX = 1;
3818 }
3819 SKIP_BLANKS;
3820 }
3821 ret = xmlParseAttValue(ctxt);
3822 ctxt->instate = XML_PARSER_DTD;
3823 if (ret == NULL) {
3824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3825 ctxt->sax->error(ctxt->userData,
3826 "Attribute default value declaration error\n");
3827 ctxt->wellFormed = 0;
3828 ctxt->disableSAX = 1;
3829 } else
3830 *value = ret;
3831 return(val);
3832}
3833
3834/**
3835 * xmlParseNotationType:
3836 * @ctxt: an XML parser context
3837 *
3838 * parse an Notation attribute type.
3839 *
3840 * Note: the leading 'NOTATION' S part has already being parsed...
3841 *
3842 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3843 *
3844 * [ VC: Notation Attributes ]
3845 * Values of this type must match one of the notation names included
3846 * in the declaration; all notation names in the declaration must be declared.
3847 *
3848 * Returns: the notation attribute tree built while parsing
3849 */
3850
3851xmlEnumerationPtr
3852xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3853 xmlChar *name;
3854 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3855
3856 if (RAW != '(') {
3857 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3859 ctxt->sax->error(ctxt->userData,
3860 "'(' required to start 'NOTATION'\n");
3861 ctxt->wellFormed = 0;
3862 ctxt->disableSAX = 1;
3863 return(NULL);
3864 }
3865 SHRINK;
3866 do {
3867 NEXT;
3868 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003869 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003870 if (name == NULL) {
3871 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3873 ctxt->sax->error(ctxt->userData,
3874 "Name expected in NOTATION declaration\n");
3875 ctxt->wellFormed = 0;
3876 ctxt->disableSAX = 1;
3877 return(ret);
3878 }
3879 cur = xmlCreateEnumeration(name);
3880 xmlFree(name);
3881 if (cur == NULL) return(ret);
3882 if (last == NULL) ret = last = cur;
3883 else {
3884 last->next = cur;
3885 last = cur;
3886 }
3887 SKIP_BLANKS;
3888 } while (RAW == '|');
3889 if (RAW != ')') {
3890 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3892 ctxt->sax->error(ctxt->userData,
3893 "')' required to finish NOTATION declaration\n");
3894 ctxt->wellFormed = 0;
3895 ctxt->disableSAX = 1;
3896 if ((last != NULL) && (last != ret))
3897 xmlFreeEnumeration(last);
3898 return(ret);
3899 }
3900 NEXT;
3901 return(ret);
3902}
3903
3904/**
3905 * xmlParseEnumerationType:
3906 * @ctxt: an XML parser context
3907 *
3908 * parse an Enumeration attribute type.
3909 *
3910 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3911 *
3912 * [ VC: Enumeration ]
3913 * Values of this type must match one of the Nmtoken tokens in
3914 * the declaration
3915 *
3916 * Returns: the enumeration attribute tree built while parsing
3917 */
3918
3919xmlEnumerationPtr
3920xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3921 xmlChar *name;
3922 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3923
3924 if (RAW != '(') {
3925 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3927 ctxt->sax->error(ctxt->userData,
3928 "'(' required to start ATTLIST enumeration\n");
3929 ctxt->wellFormed = 0;
3930 ctxt->disableSAX = 1;
3931 return(NULL);
3932 }
3933 SHRINK;
3934 do {
3935 NEXT;
3936 SKIP_BLANKS;
3937 name = xmlParseNmtoken(ctxt);
3938 if (name == NULL) {
3939 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3941 ctxt->sax->error(ctxt->userData,
3942 "NmToken expected in ATTLIST enumeration\n");
3943 ctxt->wellFormed = 0;
3944 ctxt->disableSAX = 1;
3945 return(ret);
3946 }
3947 cur = xmlCreateEnumeration(name);
3948 xmlFree(name);
3949 if (cur == NULL) return(ret);
3950 if (last == NULL) ret = last = cur;
3951 else {
3952 last->next = cur;
3953 last = cur;
3954 }
3955 SKIP_BLANKS;
3956 } while (RAW == '|');
3957 if (RAW != ')') {
3958 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3960 ctxt->sax->error(ctxt->userData,
3961 "')' required to finish ATTLIST enumeration\n");
3962 ctxt->wellFormed = 0;
3963 ctxt->disableSAX = 1;
3964 return(ret);
3965 }
3966 NEXT;
3967 return(ret);
3968}
3969
3970/**
3971 * xmlParseEnumeratedType:
3972 * @ctxt: an XML parser context
3973 * @tree: the enumeration tree built while parsing
3974 *
3975 * parse an Enumerated attribute type.
3976 *
3977 * [57] EnumeratedType ::= NotationType | Enumeration
3978 *
3979 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3980 *
3981 *
3982 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3983 */
3984
3985int
3986xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3987 if ((RAW == 'N') && (NXT(1) == 'O') &&
3988 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3989 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3990 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3991 SKIP(8);
3992 if (!IS_BLANK(CUR)) {
3993 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3995 ctxt->sax->error(ctxt->userData,
3996 "Space required after 'NOTATION'\n");
3997 ctxt->wellFormed = 0;
3998 ctxt->disableSAX = 1;
3999 return(0);
4000 }
4001 SKIP_BLANKS;
4002 *tree = xmlParseNotationType(ctxt);
4003 if (*tree == NULL) return(0);
4004 return(XML_ATTRIBUTE_NOTATION);
4005 }
4006 *tree = xmlParseEnumerationType(ctxt);
4007 if (*tree == NULL) return(0);
4008 return(XML_ATTRIBUTE_ENUMERATION);
4009}
4010
4011/**
4012 * xmlParseAttributeType:
4013 * @ctxt: an XML parser context
4014 * @tree: the enumeration tree built while parsing
4015 *
4016 * parse the Attribute list def for an element
4017 *
4018 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4019 *
4020 * [55] StringType ::= 'CDATA'
4021 *
4022 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4023 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4024 *
4025 * Validity constraints for attribute values syntax are checked in
4026 * xmlValidateAttributeValue()
4027 *
4028 * [ VC: ID ]
4029 * Values of type ID must match the Name production. A name must not
4030 * appear more than once in an XML document as a value of this type;
4031 * i.e., ID values must uniquely identify the elements which bear them.
4032 *
4033 * [ VC: One ID per Element Type ]
4034 * No element type may have more than one ID attribute specified.
4035 *
4036 * [ VC: ID Attribute Default ]
4037 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4038 *
4039 * [ VC: IDREF ]
4040 * Values of type IDREF must match the Name production, and values
4041 * of type IDREFS must match Names; each IDREF Name must match the value
4042 * of an ID attribute on some element in the XML document; i.e. IDREF
4043 * values must match the value of some ID attribute.
4044 *
4045 * [ VC: Entity Name ]
4046 * Values of type ENTITY must match the Name production, values
4047 * of type ENTITIES must match Names; each Entity Name must match the
4048 * name of an unparsed entity declared in the DTD.
4049 *
4050 * [ VC: Name Token ]
4051 * Values of type NMTOKEN must match the Nmtoken production; values
4052 * of type NMTOKENS must match Nmtokens.
4053 *
4054 * Returns the attribute type
4055 */
4056int
4057xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4058 SHRINK;
4059 if ((RAW == 'C') && (NXT(1) == 'D') &&
4060 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4061 (NXT(4) == 'A')) {
4062 SKIP(5);
4063 return(XML_ATTRIBUTE_CDATA);
4064 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4065 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4066 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4067 SKIP(6);
4068 return(XML_ATTRIBUTE_IDREFS);
4069 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4070 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4071 (NXT(4) == 'F')) {
4072 SKIP(5);
4073 return(XML_ATTRIBUTE_IDREF);
4074 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4075 SKIP(2);
4076 return(XML_ATTRIBUTE_ID);
4077 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4078 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4079 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4080 SKIP(6);
4081 return(XML_ATTRIBUTE_ENTITY);
4082 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4083 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4084 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4085 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4086 SKIP(8);
4087 return(XML_ATTRIBUTE_ENTITIES);
4088 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4089 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4090 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4091 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4092 SKIP(8);
4093 return(XML_ATTRIBUTE_NMTOKENS);
4094 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4095 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4096 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4097 (NXT(6) == 'N')) {
4098 SKIP(7);
4099 return(XML_ATTRIBUTE_NMTOKEN);
4100 }
4101 return(xmlParseEnumeratedType(ctxt, tree));
4102}
4103
4104/**
4105 * xmlParseAttributeListDecl:
4106 * @ctxt: an XML parser context
4107 *
4108 * : parse the Attribute list def for an element
4109 *
4110 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4111 *
4112 * [53] AttDef ::= S Name S AttType S DefaultDecl
4113 *
4114 */
4115void
4116xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4117 xmlChar *elemName;
4118 xmlChar *attrName;
4119 xmlEnumerationPtr tree;
4120
4121 if ((RAW == '<') && (NXT(1) == '!') &&
4122 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4123 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4124 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4125 (NXT(8) == 'T')) {
4126 xmlParserInputPtr input = ctxt->input;
4127
4128 SKIP(9);
4129 if (!IS_BLANK(CUR)) {
4130 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4132 ctxt->sax->error(ctxt->userData,
4133 "Space required after '<!ATTLIST'\n");
4134 ctxt->wellFormed = 0;
4135 ctxt->disableSAX = 1;
4136 }
4137 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004138 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004139 if (elemName == NULL) {
4140 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4142 ctxt->sax->error(ctxt->userData,
4143 "ATTLIST: no name for Element\n");
4144 ctxt->wellFormed = 0;
4145 ctxt->disableSAX = 1;
4146 return;
4147 }
4148 SKIP_BLANKS;
4149 GROW;
4150 while (RAW != '>') {
4151 const xmlChar *check = CUR_PTR;
4152 int type;
4153 int def;
4154 xmlChar *defaultValue = NULL;
4155
4156 GROW;
4157 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004158 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004159 if (attrName == NULL) {
4160 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4162 ctxt->sax->error(ctxt->userData,
4163 "ATTLIST: no name for Attribute\n");
4164 ctxt->wellFormed = 0;
4165 ctxt->disableSAX = 1;
4166 break;
4167 }
4168 GROW;
4169 if (!IS_BLANK(CUR)) {
4170 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4172 ctxt->sax->error(ctxt->userData,
4173 "Space required after the attribute name\n");
4174 ctxt->wellFormed = 0;
4175 ctxt->disableSAX = 1;
4176 if (attrName != NULL)
4177 xmlFree(attrName);
4178 if (defaultValue != NULL)
4179 xmlFree(defaultValue);
4180 break;
4181 }
4182 SKIP_BLANKS;
4183
4184 type = xmlParseAttributeType(ctxt, &tree);
4185 if (type <= 0) {
4186 if (attrName != NULL)
4187 xmlFree(attrName);
4188 if (defaultValue != NULL)
4189 xmlFree(defaultValue);
4190 break;
4191 }
4192
4193 GROW;
4194 if (!IS_BLANK(CUR)) {
4195 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4197 ctxt->sax->error(ctxt->userData,
4198 "Space required after the attribute type\n");
4199 ctxt->wellFormed = 0;
4200 ctxt->disableSAX = 1;
4201 if (attrName != NULL)
4202 xmlFree(attrName);
4203 if (defaultValue != NULL)
4204 xmlFree(defaultValue);
4205 if (tree != NULL)
4206 xmlFreeEnumeration(tree);
4207 break;
4208 }
4209 SKIP_BLANKS;
4210
4211 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4212 if (def <= 0) {
4213 if (attrName != NULL)
4214 xmlFree(attrName);
4215 if (defaultValue != NULL)
4216 xmlFree(defaultValue);
4217 if (tree != NULL)
4218 xmlFreeEnumeration(tree);
4219 break;
4220 }
4221
4222 GROW;
4223 if (RAW != '>') {
4224 if (!IS_BLANK(CUR)) {
4225 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4227 ctxt->sax->error(ctxt->userData,
4228 "Space required after the attribute default value\n");
4229 ctxt->wellFormed = 0;
4230 ctxt->disableSAX = 1;
4231 if (attrName != NULL)
4232 xmlFree(attrName);
4233 if (defaultValue != NULL)
4234 xmlFree(defaultValue);
4235 if (tree != NULL)
4236 xmlFreeEnumeration(tree);
4237 break;
4238 }
4239 SKIP_BLANKS;
4240 }
4241 if (check == CUR_PTR) {
4242 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4244 ctxt->sax->error(ctxt->userData,
4245 "xmlParseAttributeListDecl: detected internal error\n");
4246 if (attrName != NULL)
4247 xmlFree(attrName);
4248 if (defaultValue != NULL)
4249 xmlFree(defaultValue);
4250 if (tree != NULL)
4251 xmlFreeEnumeration(tree);
4252 break;
4253 }
4254 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4255 (ctxt->sax->attributeDecl != NULL))
4256 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4257 type, def, defaultValue, tree);
4258 if (attrName != NULL)
4259 xmlFree(attrName);
4260 if (defaultValue != NULL)
4261 xmlFree(defaultValue);
4262 GROW;
4263 }
4264 if (RAW == '>') {
4265 if (input != ctxt->input) {
4266 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4268 ctxt->sax->error(ctxt->userData,
4269"Attribute list declaration doesn't start and stop in the same entity\n");
4270 ctxt->wellFormed = 0;
4271 ctxt->disableSAX = 1;
4272 }
4273 NEXT;
4274 }
4275
4276 xmlFree(elemName);
4277 }
4278}
4279
4280/**
4281 * xmlParseElementMixedContentDecl:
4282 * @ctxt: an XML parser context
4283 *
4284 * parse the declaration for a Mixed Element content
4285 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4286 *
4287 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4288 * '(' S? '#PCDATA' S? ')'
4289 *
4290 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4291 *
4292 * [ VC: No Duplicate Types ]
4293 * The same name must not appear more than once in a single
4294 * mixed-content declaration.
4295 *
4296 * returns: the list of the xmlElementContentPtr describing the element choices
4297 */
4298xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004299xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004300 xmlElementContentPtr ret = NULL, cur = NULL, n;
4301 xmlChar *elem = NULL;
4302
4303 GROW;
4304 if ((RAW == '#') && (NXT(1) == 'P') &&
4305 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4306 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4307 (NXT(6) == 'A')) {
4308 SKIP(7);
4309 SKIP_BLANKS;
4310 SHRINK;
4311 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004312 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4313 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4314 if (ctxt->vctxt.error != NULL)
4315 ctxt->vctxt.error(ctxt->vctxt.userData,
4316"Element content declaration doesn't start and stop in the same entity\n");
4317 ctxt->valid = 0;
4318 }
Owen Taylor3473f882001-02-23 17:55:21 +00004319 NEXT;
4320 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4321 if (RAW == '*') {
4322 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4323 NEXT;
4324 }
4325 return(ret);
4326 }
4327 if ((RAW == '(') || (RAW == '|')) {
4328 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4329 if (ret == NULL) return(NULL);
4330 }
4331 while (RAW == '|') {
4332 NEXT;
4333 if (elem == NULL) {
4334 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4335 if (ret == NULL) return(NULL);
4336 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004337 if (cur != NULL)
4338 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004339 cur = ret;
4340 } else {
4341 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4342 if (n == NULL) return(NULL);
4343 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004344 if (n->c1 != NULL)
4345 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004346 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004347 if (n != NULL)
4348 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004349 cur = n;
4350 xmlFree(elem);
4351 }
4352 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004353 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004354 if (elem == NULL) {
4355 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4357 ctxt->sax->error(ctxt->userData,
4358 "xmlParseElementMixedContentDecl : Name expected\n");
4359 ctxt->wellFormed = 0;
4360 ctxt->disableSAX = 1;
4361 xmlFreeElementContent(cur);
4362 return(NULL);
4363 }
4364 SKIP_BLANKS;
4365 GROW;
4366 }
4367 if ((RAW == ')') && (NXT(1) == '*')) {
4368 if (elem != NULL) {
4369 cur->c2 = xmlNewElementContent(elem,
4370 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004371 if (cur->c2 != NULL)
4372 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004373 xmlFree(elem);
4374 }
4375 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004376 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4377 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4378 if (ctxt->vctxt.error != NULL)
4379 ctxt->vctxt.error(ctxt->vctxt.userData,
4380"Element content declaration doesn't start and stop in the same entity\n");
4381 ctxt->valid = 0;
4382 }
Owen Taylor3473f882001-02-23 17:55:21 +00004383 SKIP(2);
4384 } else {
4385 if (elem != NULL) xmlFree(elem);
4386 xmlFreeElementContent(ret);
4387 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4389 ctxt->sax->error(ctxt->userData,
4390 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4391 ctxt->wellFormed = 0;
4392 ctxt->disableSAX = 1;
4393 return(NULL);
4394 }
4395
4396 } else {
4397 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4399 ctxt->sax->error(ctxt->userData,
4400 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4401 ctxt->wellFormed = 0;
4402 ctxt->disableSAX = 1;
4403 }
4404 return(ret);
4405}
4406
4407/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004408 * xmlParseElementChildrenContentD:
4409 * @ctxt: an XML parser context
4410 *
4411 * VMS version of xmlParseElementChildrenContentDecl()
4412 *
4413 * Returns the tree of xmlElementContentPtr describing the element
4414 * hierarchy.
4415 */
4416/**
Owen Taylor3473f882001-02-23 17:55:21 +00004417 * xmlParseElementChildrenContentDecl:
4418 * @ctxt: an XML parser context
4419 *
4420 * parse the declaration for a Mixed Element content
4421 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4422 *
4423 *
4424 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4425 *
4426 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4427 *
4428 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4429 *
4430 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4431 *
4432 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4433 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004434 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004435 * opening or closing parentheses in a choice, seq, or Mixed
4436 * construct is contained in the replacement text for a parameter
4437 * entity, both must be contained in the same replacement text. For
4438 * interoperability, if a parameter-entity reference appears in a
4439 * choice, seq, or Mixed construct, its replacement text should not
4440 * be empty, and neither the first nor last non-blank character of
4441 * the replacement text should be a connector (| or ,).
4442 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004443 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004444 * hierarchy.
4445 */
4446xmlElementContentPtr
4447#ifdef VMS
4448xmlParseElementChildrenContentD
4449#else
4450xmlParseElementChildrenContentDecl
4451#endif
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004452(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004453 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4454 xmlChar *elem;
4455 xmlChar type = 0;
4456
4457 SKIP_BLANKS;
4458 GROW;
4459 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004460 xmlParserInputPtr input = ctxt->input;
4461
Owen Taylor3473f882001-02-23 17:55:21 +00004462 /* Recurse on first child */
4463 NEXT;
4464 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004465 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004466 SKIP_BLANKS;
4467 GROW;
4468 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004469 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004470 if (elem == NULL) {
4471 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4473 ctxt->sax->error(ctxt->userData,
4474 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4475 ctxt->wellFormed = 0;
4476 ctxt->disableSAX = 1;
4477 return(NULL);
4478 }
4479 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4480 GROW;
4481 if (RAW == '?') {
4482 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4483 NEXT;
4484 } else if (RAW == '*') {
4485 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4486 NEXT;
4487 } else if (RAW == '+') {
4488 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4489 NEXT;
4490 } else {
4491 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4492 }
4493 xmlFree(elem);
4494 GROW;
4495 }
4496 SKIP_BLANKS;
4497 SHRINK;
4498 while (RAW != ')') {
4499 /*
4500 * Each loop we parse one separator and one element.
4501 */
4502 if (RAW == ',') {
4503 if (type == 0) type = CUR;
4504
4505 /*
4506 * Detect "Name | Name , Name" error
4507 */
4508 else if (type != CUR) {
4509 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4511 ctxt->sax->error(ctxt->userData,
4512 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4513 type);
4514 ctxt->wellFormed = 0;
4515 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004516 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004517 xmlFreeElementContent(last);
4518 if (ret != NULL)
4519 xmlFreeElementContent(ret);
4520 return(NULL);
4521 }
4522 NEXT;
4523
4524 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4525 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004526 if ((last != NULL) && (last != ret))
4527 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004528 xmlFreeElementContent(ret);
4529 return(NULL);
4530 }
4531 if (last == NULL) {
4532 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004533 if (ret != NULL)
4534 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004535 ret = cur = op;
4536 } else {
4537 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004538 if (op != NULL)
4539 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004540 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004541 if (last != NULL)
4542 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004543 cur =op;
4544 last = NULL;
4545 }
4546 } else if (RAW == '|') {
4547 if (type == 0) type = CUR;
4548
4549 /*
4550 * Detect "Name , Name | Name" error
4551 */
4552 else if (type != CUR) {
4553 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4555 ctxt->sax->error(ctxt->userData,
4556 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4557 type);
4558 ctxt->wellFormed = 0;
4559 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004560 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004561 xmlFreeElementContent(last);
4562 if (ret != NULL)
4563 xmlFreeElementContent(ret);
4564 return(NULL);
4565 }
4566 NEXT;
4567
4568 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4569 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004570 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004571 xmlFreeElementContent(last);
4572 if (ret != NULL)
4573 xmlFreeElementContent(ret);
4574 return(NULL);
4575 }
4576 if (last == NULL) {
4577 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004578 if (ret != NULL)
4579 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004580 ret = cur = op;
4581 } else {
4582 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004583 if (op != NULL)
4584 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004585 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004586 if (last != NULL)
4587 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004588 cur =op;
4589 last = NULL;
4590 }
4591 } else {
4592 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4594 ctxt->sax->error(ctxt->userData,
4595 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4596 ctxt->wellFormed = 0;
4597 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004598 if (ret != NULL)
4599 xmlFreeElementContent(ret);
4600 return(NULL);
4601 }
4602 GROW;
4603 SKIP_BLANKS;
4604 GROW;
4605 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004606 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004607 /* Recurse on second child */
4608 NEXT;
4609 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004610 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004611 SKIP_BLANKS;
4612 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004613 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004614 if (elem == NULL) {
4615 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4617 ctxt->sax->error(ctxt->userData,
4618 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4619 ctxt->wellFormed = 0;
4620 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004621 if (ret != NULL)
4622 xmlFreeElementContent(ret);
4623 return(NULL);
4624 }
4625 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4626 xmlFree(elem);
4627 if (RAW == '?') {
4628 last->ocur = XML_ELEMENT_CONTENT_OPT;
4629 NEXT;
4630 } else if (RAW == '*') {
4631 last->ocur = XML_ELEMENT_CONTENT_MULT;
4632 NEXT;
4633 } else if (RAW == '+') {
4634 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4635 NEXT;
4636 } else {
4637 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4638 }
4639 }
4640 SKIP_BLANKS;
4641 GROW;
4642 }
4643 if ((cur != NULL) && (last != NULL)) {
4644 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004645 if (last != NULL)
4646 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004647 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004648 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4649 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4650 if (ctxt->vctxt.error != NULL)
4651 ctxt->vctxt.error(ctxt->vctxt.userData,
4652"Element content declaration doesn't start and stop in the same entity\n");
4653 ctxt->valid = 0;
4654 }
Owen Taylor3473f882001-02-23 17:55:21 +00004655 NEXT;
4656 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004657 if (ret != NULL)
4658 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004659 NEXT;
4660 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004661 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004662 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004663 cur = ret;
4664 /*
4665 * Some normalization:
4666 * (a | b* | c?)* == (a | b | c)*
4667 */
4668 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4669 if ((cur->c1 != NULL) &&
4670 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4671 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4672 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4673 if ((cur->c2 != NULL) &&
4674 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4675 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4676 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4677 cur = cur->c2;
4678 }
4679 }
Owen Taylor3473f882001-02-23 17:55:21 +00004680 NEXT;
4681 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004682 if (ret != NULL) {
4683 int found = 0;
4684
Daniel Veillarde470df72001-04-18 21:41:07 +00004685 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004686 /*
4687 * Some normalization:
4688 * (a | b*)+ == (a | b)*
4689 * (a | b?)+ == (a | b)*
4690 */
4691 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4692 if ((cur->c1 != NULL) &&
4693 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4694 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4695 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4696 found = 1;
4697 }
4698 if ((cur->c2 != NULL) &&
4699 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4700 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4701 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4702 found = 1;
4703 }
4704 cur = cur->c2;
4705 }
4706 if (found)
4707 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4708 }
Owen Taylor3473f882001-02-23 17:55:21 +00004709 NEXT;
4710 }
4711 return(ret);
4712}
4713
4714/**
4715 * xmlParseElementContentDecl:
4716 * @ctxt: an XML parser context
4717 * @name: the name of the element being defined.
4718 * @result: the Element Content pointer will be stored here if any
4719 *
4720 * parse the declaration for an Element content either Mixed or Children,
4721 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4722 *
4723 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4724 *
4725 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4726 */
4727
4728int
4729xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4730 xmlElementContentPtr *result) {
4731
4732 xmlElementContentPtr tree = NULL;
4733 xmlParserInputPtr input = ctxt->input;
4734 int res;
4735
4736 *result = NULL;
4737
4738 if (RAW != '(') {
4739 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4741 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004742 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004743 ctxt->wellFormed = 0;
4744 ctxt->disableSAX = 1;
4745 return(-1);
4746 }
4747 NEXT;
4748 GROW;
4749 SKIP_BLANKS;
4750 if ((RAW == '#') && (NXT(1) == 'P') &&
4751 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4752 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4753 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004754 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004755 res = XML_ELEMENT_TYPE_MIXED;
4756 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004757 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004758 res = XML_ELEMENT_TYPE_ELEMENT;
4759 }
Owen Taylor3473f882001-02-23 17:55:21 +00004760 SKIP_BLANKS;
4761 *result = tree;
4762 return(res);
4763}
4764
4765/**
4766 * xmlParseElementDecl:
4767 * @ctxt: an XML parser context
4768 *
4769 * parse an Element declaration.
4770 *
4771 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4772 *
4773 * [ VC: Unique Element Type Declaration ]
4774 * No element type may be declared more than once
4775 *
4776 * Returns the type of the element, or -1 in case of error
4777 */
4778int
4779xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4780 xmlChar *name;
4781 int ret = -1;
4782 xmlElementContentPtr content = NULL;
4783
4784 GROW;
4785 if ((RAW == '<') && (NXT(1) == '!') &&
4786 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4787 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4788 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4789 (NXT(8) == 'T')) {
4790 xmlParserInputPtr input = ctxt->input;
4791
4792 SKIP(9);
4793 if (!IS_BLANK(CUR)) {
4794 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4796 ctxt->sax->error(ctxt->userData,
4797 "Space required after 'ELEMENT'\n");
4798 ctxt->wellFormed = 0;
4799 ctxt->disableSAX = 1;
4800 }
4801 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004802 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004803 if (name == NULL) {
4804 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4806 ctxt->sax->error(ctxt->userData,
4807 "xmlParseElementDecl: no name for Element\n");
4808 ctxt->wellFormed = 0;
4809 ctxt->disableSAX = 1;
4810 return(-1);
4811 }
4812 while ((RAW == 0) && (ctxt->inputNr > 1))
4813 xmlPopInput(ctxt);
4814 if (!IS_BLANK(CUR)) {
4815 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4817 ctxt->sax->error(ctxt->userData,
4818 "Space required after the element name\n");
4819 ctxt->wellFormed = 0;
4820 ctxt->disableSAX = 1;
4821 }
4822 SKIP_BLANKS;
4823 if ((RAW == 'E') && (NXT(1) == 'M') &&
4824 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4825 (NXT(4) == 'Y')) {
4826 SKIP(5);
4827 /*
4828 * Element must always be empty.
4829 */
4830 ret = XML_ELEMENT_TYPE_EMPTY;
4831 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4832 (NXT(2) == 'Y')) {
4833 SKIP(3);
4834 /*
4835 * Element is a generic container.
4836 */
4837 ret = XML_ELEMENT_TYPE_ANY;
4838 } else if (RAW == '(') {
4839 ret = xmlParseElementContentDecl(ctxt, name, &content);
4840 } else {
4841 /*
4842 * [ WFC: PEs in Internal Subset ] error handling.
4843 */
4844 if ((RAW == '%') && (ctxt->external == 0) &&
4845 (ctxt->inputNr == 1)) {
4846 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4848 ctxt->sax->error(ctxt->userData,
4849 "PEReference: forbidden within markup decl in internal subset\n");
4850 } else {
4851 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4852 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4853 ctxt->sax->error(ctxt->userData,
4854 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4855 }
4856 ctxt->wellFormed = 0;
4857 ctxt->disableSAX = 1;
4858 if (name != NULL) xmlFree(name);
4859 return(-1);
4860 }
4861
4862 SKIP_BLANKS;
4863 /*
4864 * Pop-up of finished entities.
4865 */
4866 while ((RAW == 0) && (ctxt->inputNr > 1))
4867 xmlPopInput(ctxt);
4868 SKIP_BLANKS;
4869
4870 if (RAW != '>') {
4871 ctxt->errNo = XML_ERR_GT_REQUIRED;
4872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4873 ctxt->sax->error(ctxt->userData,
4874 "xmlParseElementDecl: expected '>' at the end\n");
4875 ctxt->wellFormed = 0;
4876 ctxt->disableSAX = 1;
4877 } else {
4878 if (input != ctxt->input) {
4879 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4881 ctxt->sax->error(ctxt->userData,
4882"Element declaration doesn't start and stop in the same entity\n");
4883 ctxt->wellFormed = 0;
4884 ctxt->disableSAX = 1;
4885 }
4886
4887 NEXT;
4888 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4889 (ctxt->sax->elementDecl != NULL))
4890 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4891 content);
4892 }
4893 if (content != NULL) {
4894 xmlFreeElementContent(content);
4895 }
4896 if (name != NULL) {
4897 xmlFree(name);
4898 }
4899 }
4900 return(ret);
4901}
4902
4903/**
Owen Taylor3473f882001-02-23 17:55:21 +00004904 * xmlParseConditionalSections
4905 * @ctxt: an XML parser context
4906 *
4907 * [61] conditionalSect ::= includeSect | ignoreSect
4908 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4909 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4910 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4911 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4912 */
4913
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004914static void
Owen Taylor3473f882001-02-23 17:55:21 +00004915xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4916 SKIP(3);
4917 SKIP_BLANKS;
4918 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4919 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4920 (NXT(6) == 'E')) {
4921 SKIP(7);
4922 SKIP_BLANKS;
4923 if (RAW != '[') {
4924 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4926 ctxt->sax->error(ctxt->userData,
4927 "XML conditional section '[' expected\n");
4928 ctxt->wellFormed = 0;
4929 ctxt->disableSAX = 1;
4930 } else {
4931 NEXT;
4932 }
4933 if (xmlParserDebugEntities) {
4934 if ((ctxt->input != NULL) && (ctxt->input->filename))
4935 xmlGenericError(xmlGenericErrorContext,
4936 "%s(%d): ", ctxt->input->filename,
4937 ctxt->input->line);
4938 xmlGenericError(xmlGenericErrorContext,
4939 "Entering INCLUDE Conditional Section\n");
4940 }
4941
4942 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4943 (NXT(2) != '>'))) {
4944 const xmlChar *check = CUR_PTR;
4945 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00004946
4947 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4948 xmlParseConditionalSections(ctxt);
4949 } else if (IS_BLANK(CUR)) {
4950 NEXT;
4951 } else if (RAW == '%') {
4952 xmlParsePEReference(ctxt);
4953 } else
4954 xmlParseMarkupDecl(ctxt);
4955
4956 /*
4957 * Pop-up of finished entities.
4958 */
4959 while ((RAW == 0) && (ctxt->inputNr > 1))
4960 xmlPopInput(ctxt);
4961
Daniel Veillardfdc91562002-07-01 21:52:03 +00004962 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004963 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4965 ctxt->sax->error(ctxt->userData,
4966 "Content error in the external subset\n");
4967 ctxt->wellFormed = 0;
4968 ctxt->disableSAX = 1;
4969 break;
4970 }
4971 }
4972 if (xmlParserDebugEntities) {
4973 if ((ctxt->input != NULL) && (ctxt->input->filename))
4974 xmlGenericError(xmlGenericErrorContext,
4975 "%s(%d): ", ctxt->input->filename,
4976 ctxt->input->line);
4977 xmlGenericError(xmlGenericErrorContext,
4978 "Leaving INCLUDE Conditional Section\n");
4979 }
4980
4981 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4982 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4983 int state;
4984 int instate;
4985 int depth = 0;
4986
4987 SKIP(6);
4988 SKIP_BLANKS;
4989 if (RAW != '[') {
4990 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4992 ctxt->sax->error(ctxt->userData,
4993 "XML conditional section '[' expected\n");
4994 ctxt->wellFormed = 0;
4995 ctxt->disableSAX = 1;
4996 } else {
4997 NEXT;
4998 }
4999 if (xmlParserDebugEntities) {
5000 if ((ctxt->input != NULL) && (ctxt->input->filename))
5001 xmlGenericError(xmlGenericErrorContext,
5002 "%s(%d): ", ctxt->input->filename,
5003 ctxt->input->line);
5004 xmlGenericError(xmlGenericErrorContext,
5005 "Entering IGNORE Conditional Section\n");
5006 }
5007
5008 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005009 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005010 * But disable SAX event generating DTD building in the meantime
5011 */
5012 state = ctxt->disableSAX;
5013 instate = ctxt->instate;
5014 ctxt->disableSAX = 1;
5015 ctxt->instate = XML_PARSER_IGNORE;
5016
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005017 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005018 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5019 depth++;
5020 SKIP(3);
5021 continue;
5022 }
5023 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5024 if (--depth >= 0) SKIP(3);
5025 continue;
5026 }
5027 NEXT;
5028 continue;
5029 }
5030
5031 ctxt->disableSAX = state;
5032 ctxt->instate = instate;
5033
5034 if (xmlParserDebugEntities) {
5035 if ((ctxt->input != NULL) && (ctxt->input->filename))
5036 xmlGenericError(xmlGenericErrorContext,
5037 "%s(%d): ", ctxt->input->filename,
5038 ctxt->input->line);
5039 xmlGenericError(xmlGenericErrorContext,
5040 "Leaving IGNORE Conditional Section\n");
5041 }
5042
5043 } else {
5044 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5046 ctxt->sax->error(ctxt->userData,
5047 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5048 ctxt->wellFormed = 0;
5049 ctxt->disableSAX = 1;
5050 }
5051
5052 if (RAW == 0)
5053 SHRINK;
5054
5055 if (RAW == 0) {
5056 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5058 ctxt->sax->error(ctxt->userData,
5059 "XML conditional section not closed\n");
5060 ctxt->wellFormed = 0;
5061 ctxt->disableSAX = 1;
5062 } else {
5063 SKIP(3);
5064 }
5065}
5066
5067/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005068 * xmlParseMarkupDecl:
5069 * @ctxt: an XML parser context
5070 *
5071 * parse Markup declarations
5072 *
5073 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5074 * NotationDecl | PI | Comment
5075 *
5076 * [ VC: Proper Declaration/PE Nesting ]
5077 * Parameter-entity replacement text must be properly nested with
5078 * markup declarations. That is to say, if either the first character
5079 * or the last character of a markup declaration (markupdecl above) is
5080 * contained in the replacement text for a parameter-entity reference,
5081 * both must be contained in the same replacement text.
5082 *
5083 * [ WFC: PEs in Internal Subset ]
5084 * In the internal DTD subset, parameter-entity references can occur
5085 * only where markup declarations can occur, not within markup declarations.
5086 * (This does not apply to references that occur in external parameter
5087 * entities or to the external subset.)
5088 */
5089void
5090xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5091 GROW;
5092 xmlParseElementDecl(ctxt);
5093 xmlParseAttributeListDecl(ctxt);
5094 xmlParseEntityDecl(ctxt);
5095 xmlParseNotationDecl(ctxt);
5096 xmlParsePI(ctxt);
5097 xmlParseComment(ctxt);
5098 /*
5099 * This is only for internal subset. On external entities,
5100 * the replacement is done before parsing stage
5101 */
5102 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5103 xmlParsePEReference(ctxt);
5104
5105 /*
5106 * Conditional sections are allowed from entities included
5107 * by PE References in the internal subset.
5108 */
5109 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5110 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5111 xmlParseConditionalSections(ctxt);
5112 }
5113 }
5114
5115 ctxt->instate = XML_PARSER_DTD;
5116}
5117
5118/**
5119 * xmlParseTextDecl:
5120 * @ctxt: an XML parser context
5121 *
5122 * parse an XML declaration header for external entities
5123 *
5124 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5125 *
5126 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5127 */
5128
5129void
5130xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5131 xmlChar *version;
5132
5133 /*
5134 * We know that '<?xml' is here.
5135 */
5136 if ((RAW == '<') && (NXT(1) == '?') &&
5137 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5138 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5139 SKIP(5);
5140 } else {
5141 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5143 ctxt->sax->error(ctxt->userData,
5144 "Text declaration '<?xml' required\n");
5145 ctxt->wellFormed = 0;
5146 ctxt->disableSAX = 1;
5147
5148 return;
5149 }
5150
5151 if (!IS_BLANK(CUR)) {
5152 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5154 ctxt->sax->error(ctxt->userData,
5155 "Space needed after '<?xml'\n");
5156 ctxt->wellFormed = 0;
5157 ctxt->disableSAX = 1;
5158 }
5159 SKIP_BLANKS;
5160
5161 /*
5162 * We may have the VersionInfo here.
5163 */
5164 version = xmlParseVersionInfo(ctxt);
5165 if (version == NULL)
5166 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005167 else {
5168 if (!IS_BLANK(CUR)) {
5169 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5171 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5172 ctxt->wellFormed = 0;
5173 ctxt->disableSAX = 1;
5174 }
5175 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005176 ctxt->input->version = version;
5177
5178 /*
5179 * We must have the encoding declaration
5180 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005181 xmlParseEncodingDecl(ctxt);
5182 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5183 /*
5184 * The XML REC instructs us to stop parsing right here
5185 */
5186 return;
5187 }
5188
5189 SKIP_BLANKS;
5190 if ((RAW == '?') && (NXT(1) == '>')) {
5191 SKIP(2);
5192 } else if (RAW == '>') {
5193 /* Deprecated old WD ... */
5194 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5196 ctxt->sax->error(ctxt->userData,
5197 "XML declaration must end-up with '?>'\n");
5198 ctxt->wellFormed = 0;
5199 ctxt->disableSAX = 1;
5200 NEXT;
5201 } else {
5202 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5204 ctxt->sax->error(ctxt->userData,
5205 "parsing XML declaration: '?>' expected\n");
5206 ctxt->wellFormed = 0;
5207 ctxt->disableSAX = 1;
5208 MOVETO_ENDTAG(CUR_PTR);
5209 NEXT;
5210 }
5211}
5212
5213/**
Owen Taylor3473f882001-02-23 17:55:21 +00005214 * xmlParseExternalSubset:
5215 * @ctxt: an XML parser context
5216 * @ExternalID: the external identifier
5217 * @SystemID: the system identifier (or URL)
5218 *
5219 * parse Markup declarations from an external subset
5220 *
5221 * [30] extSubset ::= textDecl? extSubsetDecl
5222 *
5223 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5224 */
5225void
5226xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5227 const xmlChar *SystemID) {
5228 GROW;
5229 if ((RAW == '<') && (NXT(1) == '?') &&
5230 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5231 (NXT(4) == 'l')) {
5232 xmlParseTextDecl(ctxt);
5233 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5234 /*
5235 * The XML REC instructs us to stop parsing right here
5236 */
5237 ctxt->instate = XML_PARSER_EOF;
5238 return;
5239 }
5240 }
5241 if (ctxt->myDoc == NULL) {
5242 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5243 }
5244 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5245 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5246
5247 ctxt->instate = XML_PARSER_DTD;
5248 ctxt->external = 1;
5249 while (((RAW == '<') && (NXT(1) == '?')) ||
5250 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005251 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005252 const xmlChar *check = CUR_PTR;
5253 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005254
5255 GROW;
5256 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5257 xmlParseConditionalSections(ctxt);
5258 } else if (IS_BLANK(CUR)) {
5259 NEXT;
5260 } else if (RAW == '%') {
5261 xmlParsePEReference(ctxt);
5262 } else
5263 xmlParseMarkupDecl(ctxt);
5264
5265 /*
5266 * Pop-up of finished entities.
5267 */
5268 while ((RAW == 0) && (ctxt->inputNr > 1))
5269 xmlPopInput(ctxt);
5270
Daniel Veillardfdc91562002-07-01 21:52:03 +00005271 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005272 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5274 ctxt->sax->error(ctxt->userData,
5275 "Content error in the external subset\n");
5276 ctxt->wellFormed = 0;
5277 ctxt->disableSAX = 1;
5278 break;
5279 }
5280 }
5281
5282 if (RAW != 0) {
5283 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5285 ctxt->sax->error(ctxt->userData,
5286 "Extra content at the end of the document\n");
5287 ctxt->wellFormed = 0;
5288 ctxt->disableSAX = 1;
5289 }
5290
5291}
5292
5293/**
5294 * xmlParseReference:
5295 * @ctxt: an XML parser context
5296 *
5297 * parse and handle entity references in content, depending on the SAX
5298 * interface, this may end-up in a call to character() if this is a
5299 * CharRef, a predefined entity, if there is no reference() callback.
5300 * or if the parser was asked to switch to that mode.
5301 *
5302 * [67] Reference ::= EntityRef | CharRef
5303 */
5304void
5305xmlParseReference(xmlParserCtxtPtr ctxt) {
5306 xmlEntityPtr ent;
5307 xmlChar *val;
5308 if (RAW != '&') return;
5309
5310 if (NXT(1) == '#') {
5311 int i = 0;
5312 xmlChar out[10];
5313 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005314 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005315
5316 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5317 /*
5318 * So we are using non-UTF-8 buffers
5319 * Check that the char fit on 8bits, if not
5320 * generate a CharRef.
5321 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005322 if (value <= 0xFF) {
5323 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005324 out[1] = 0;
5325 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5326 (!ctxt->disableSAX))
5327 ctxt->sax->characters(ctxt->userData, out, 1);
5328 } else {
5329 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005330 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005331 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005332 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005333 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5334 (!ctxt->disableSAX))
5335 ctxt->sax->reference(ctxt->userData, out);
5336 }
5337 } else {
5338 /*
5339 * Just encode the value in UTF-8
5340 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005341 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005342 out[i] = 0;
5343 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5344 (!ctxt->disableSAX))
5345 ctxt->sax->characters(ctxt->userData, out, i);
5346 }
5347 } else {
5348 ent = xmlParseEntityRef(ctxt);
5349 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005350 if (!ctxt->wellFormed)
5351 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005352 if ((ent->name != NULL) &&
5353 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5354 xmlNodePtr list = NULL;
5355 int ret;
5356
5357
5358 /*
5359 * The first reference to the entity trigger a parsing phase
5360 * where the ent->children is filled with the result from
5361 * the parsing.
5362 */
5363 if (ent->children == NULL) {
5364 xmlChar *value;
5365 value = ent->content;
5366
5367 /*
5368 * Check that this entity is well formed
5369 */
5370 if ((value != NULL) &&
5371 (value[1] == 0) && (value[0] == '<') &&
5372 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5373 /*
5374 * DONE: get definite answer on this !!!
5375 * Lots of entity decls are used to declare a single
5376 * char
5377 * <!ENTITY lt "<">
5378 * Which seems to be valid since
5379 * 2.4: The ampersand character (&) and the left angle
5380 * bracket (<) may appear in their literal form only
5381 * when used ... They are also legal within the literal
5382 * entity value of an internal entity declaration;i
5383 * see "4.3.2 Well-Formed Parsed Entities".
5384 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5385 * Looking at the OASIS test suite and James Clark
5386 * tests, this is broken. However the XML REC uses
5387 * it. Is the XML REC not well-formed ????
5388 * This is a hack to avoid this problem
5389 *
5390 * ANSWER: since lt gt amp .. are already defined,
5391 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005392 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005393 * is lousy but acceptable.
5394 */
5395 list = xmlNewDocText(ctxt->myDoc, value);
5396 if (list != NULL) {
5397 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5398 (ent->children == NULL)) {
5399 ent->children = list;
5400 ent->last = list;
5401 list->parent = (xmlNodePtr) ent;
5402 } else {
5403 xmlFreeNodeList(list);
5404 }
5405 } else if (list != NULL) {
5406 xmlFreeNodeList(list);
5407 }
5408 } else {
5409 /*
5410 * 4.3.2: An internal general parsed entity is well-formed
5411 * if its replacement text matches the production labeled
5412 * content.
5413 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005414
5415 void *user_data;
5416 /*
5417 * This is a bit hackish but this seems the best
5418 * way to make sure both SAX and DOM entity support
5419 * behaves okay.
5420 */
5421 if (ctxt->userData == ctxt)
5422 user_data = NULL;
5423 else
5424 user_data = ctxt->userData;
5425
Owen Taylor3473f882001-02-23 17:55:21 +00005426 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5427 ctxt->depth++;
5428 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005429 ctxt->sax, user_data, ctxt->depth,
Owen Taylor3473f882001-02-23 17:55:21 +00005430 value, &list);
5431 ctxt->depth--;
5432 } else if (ent->etype ==
5433 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5434 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005435 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005436 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005437 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005438 ctxt->depth--;
5439 } else {
5440 ret = -1;
5441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5442 ctxt->sax->error(ctxt->userData,
5443 "Internal: invalid entity type\n");
5444 }
5445 if (ret == XML_ERR_ENTITY_LOOP) {
5446 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5448 ctxt->sax->error(ctxt->userData,
5449 "Detected entity reference loop\n");
5450 ctxt->wellFormed = 0;
5451 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005452 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005453 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005454 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5455 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005456 (ent->children == NULL)) {
5457 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005458 if (ctxt->replaceEntities) {
5459 /*
5460 * Prune it directly in the generated document
5461 * except for single text nodes.
5462 */
5463 if ((list->type == XML_TEXT_NODE) &&
5464 (list->next == NULL)) {
5465 list->parent = (xmlNodePtr) ent;
5466 list = NULL;
5467 } else {
5468 while (list != NULL) {
5469 list->parent = (xmlNodePtr) ctxt->node;
5470 if (list->next == NULL)
5471 ent->last = list;
5472 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005473 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005474 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005475 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5476 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005477 }
5478 } else {
5479 while (list != NULL) {
5480 list->parent = (xmlNodePtr) ent;
5481 if (list->next == NULL)
5482 ent->last = list;
5483 list = list->next;
5484 }
Owen Taylor3473f882001-02-23 17:55:21 +00005485 }
5486 } else {
5487 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005488 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005489 }
5490 } else if (ret > 0) {
5491 ctxt->errNo = ret;
5492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5493 ctxt->sax->error(ctxt->userData,
5494 "Entity value required\n");
5495 ctxt->wellFormed = 0;
5496 ctxt->disableSAX = 1;
5497 } else if (list != NULL) {
5498 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005499 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005500 }
5501 }
5502 }
5503 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5504 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5505 /*
5506 * Create a node.
5507 */
5508 ctxt->sax->reference(ctxt->userData, ent->name);
5509 return;
5510 } else if (ctxt->replaceEntities) {
5511 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5512 /*
5513 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005514 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005515 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005516 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005517 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005518 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005519 cur = ent->children;
5520 while (cur != NULL) {
5521 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005522 if (firstChild == NULL){
5523 firstChild = new;
5524 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005525 xmlAddChild(ctxt->node, new);
5526 if (cur == ent->last)
5527 break;
5528 cur = cur->next;
5529 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005530 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5531 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005532 } else {
5533 /*
5534 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005535 * node with a possible previous text one which
5536 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005537 */
5538 if (ent->children->type == XML_TEXT_NODE)
5539 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5540 if ((ent->last != ent->children) &&
5541 (ent->last->type == XML_TEXT_NODE))
5542 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5543 xmlAddChildList(ctxt->node, ent->children);
5544 }
5545
Owen Taylor3473f882001-02-23 17:55:21 +00005546 /*
5547 * This is to avoid a nasty side effect, see
5548 * characters() in SAX.c
5549 */
5550 ctxt->nodemem = 0;
5551 ctxt->nodelen = 0;
5552 return;
5553 } else {
5554 /*
5555 * Probably running in SAX mode
5556 */
5557 xmlParserInputPtr input;
5558
5559 input = xmlNewEntityInputStream(ctxt, ent);
5560 xmlPushInput(ctxt, input);
5561 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5562 (RAW == '<') && (NXT(1) == '?') &&
5563 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5564 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5565 xmlParseTextDecl(ctxt);
5566 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5567 /*
5568 * The XML REC instructs us to stop parsing right here
5569 */
5570 ctxt->instate = XML_PARSER_EOF;
5571 return;
5572 }
5573 if (input->standalone == 1) {
5574 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5576 ctxt->sax->error(ctxt->userData,
5577 "external parsed entities cannot be standalone\n");
5578 ctxt->wellFormed = 0;
5579 ctxt->disableSAX = 1;
5580 }
5581 }
5582 return;
5583 }
5584 }
5585 } else {
5586 val = ent->content;
5587 if (val == NULL) return;
5588 /*
5589 * inline the entity.
5590 */
5591 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5592 (!ctxt->disableSAX))
5593 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5594 }
5595 }
5596}
5597
5598/**
5599 * xmlParseEntityRef:
5600 * @ctxt: an XML parser context
5601 *
5602 * parse ENTITY references declarations
5603 *
5604 * [68] EntityRef ::= '&' Name ';'
5605 *
5606 * [ WFC: Entity Declared ]
5607 * In a document without any DTD, a document with only an internal DTD
5608 * subset which contains no parameter entity references, or a document
5609 * with "standalone='yes'", the Name given in the entity reference
5610 * must match that in an entity declaration, except that well-formed
5611 * documents need not declare any of the following entities: amp, lt,
5612 * gt, apos, quot. The declaration of a parameter entity must precede
5613 * any reference to it. Similarly, the declaration of a general entity
5614 * must precede any reference to it which appears in a default value in an
5615 * attribute-list declaration. Note that if entities are declared in the
5616 * external subset or in external parameter entities, a non-validating
5617 * processor is not obligated to read and process their declarations;
5618 * for such documents, the rule that an entity must be declared is a
5619 * well-formedness constraint only if standalone='yes'.
5620 *
5621 * [ WFC: Parsed Entity ]
5622 * An entity reference must not contain the name of an unparsed entity
5623 *
5624 * Returns the xmlEntityPtr if found, or NULL otherwise.
5625 */
5626xmlEntityPtr
5627xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5628 xmlChar *name;
5629 xmlEntityPtr ent = NULL;
5630
5631 GROW;
5632
5633 if (RAW == '&') {
5634 NEXT;
5635 name = xmlParseName(ctxt);
5636 if (name == NULL) {
5637 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5639 ctxt->sax->error(ctxt->userData,
5640 "xmlParseEntityRef: no name\n");
5641 ctxt->wellFormed = 0;
5642 ctxt->disableSAX = 1;
5643 } else {
5644 if (RAW == ';') {
5645 NEXT;
5646 /*
5647 * Ask first SAX for entity resolution, otherwise try the
5648 * predefined set.
5649 */
5650 if (ctxt->sax != NULL) {
5651 if (ctxt->sax->getEntity != NULL)
5652 ent = ctxt->sax->getEntity(ctxt->userData, name);
5653 if (ent == NULL)
5654 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005655 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5656 ent = getEntity(ctxt, name);
5657 }
Owen Taylor3473f882001-02-23 17:55:21 +00005658 }
5659 /*
5660 * [ WFC: Entity Declared ]
5661 * In a document without any DTD, a document with only an
5662 * internal DTD subset which contains no parameter entity
5663 * references, or a document with "standalone='yes'", the
5664 * Name given in the entity reference must match that in an
5665 * entity declaration, except that well-formed documents
5666 * need not declare any of the following entities: amp, lt,
5667 * gt, apos, quot.
5668 * The declaration of a parameter entity must precede any
5669 * reference to it.
5670 * Similarly, the declaration of a general entity must
5671 * precede any reference to it which appears in a default
5672 * value in an attribute-list declaration. Note that if
5673 * entities are declared in the external subset or in
5674 * external parameter entities, a non-validating processor
5675 * is not obligated to read and process their declarations;
5676 * for such documents, the rule that an entity must be
5677 * declared is a well-formedness constraint only if
5678 * standalone='yes'.
5679 */
5680 if (ent == NULL) {
5681 if ((ctxt->standalone == 1) ||
5682 ((ctxt->hasExternalSubset == 0) &&
5683 (ctxt->hasPErefs == 0))) {
5684 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5686 ctxt->sax->error(ctxt->userData,
5687 "Entity '%s' not defined\n", name);
5688 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005689 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005690 ctxt->disableSAX = 1;
5691 } else {
5692 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005694 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005695 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005696 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005697 }
5698 }
5699
5700 /*
5701 * [ WFC: Parsed Entity ]
5702 * An entity reference must not contain the name of an
5703 * unparsed entity
5704 */
5705 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5706 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5708 ctxt->sax->error(ctxt->userData,
5709 "Entity reference to unparsed entity %s\n", name);
5710 ctxt->wellFormed = 0;
5711 ctxt->disableSAX = 1;
5712 }
5713
5714 /*
5715 * [ WFC: No External Entity References ]
5716 * Attribute values cannot contain direct or indirect
5717 * entity references to external entities.
5718 */
5719 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5720 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5721 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5723 ctxt->sax->error(ctxt->userData,
5724 "Attribute references external entity '%s'\n", name);
5725 ctxt->wellFormed = 0;
5726 ctxt->disableSAX = 1;
5727 }
5728 /*
5729 * [ WFC: No < in Attribute Values ]
5730 * The replacement text of any entity referred to directly or
5731 * indirectly in an attribute value (other than "&lt;") must
5732 * not contain a <.
5733 */
5734 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5735 (ent != NULL) &&
5736 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5737 (ent->content != NULL) &&
5738 (xmlStrchr(ent->content, '<'))) {
5739 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5741 ctxt->sax->error(ctxt->userData,
5742 "'<' in entity '%s' is not allowed in attributes values\n", name);
5743 ctxt->wellFormed = 0;
5744 ctxt->disableSAX = 1;
5745 }
5746
5747 /*
5748 * Internal check, no parameter entities here ...
5749 */
5750 else {
5751 switch (ent->etype) {
5752 case XML_INTERNAL_PARAMETER_ENTITY:
5753 case XML_EXTERNAL_PARAMETER_ENTITY:
5754 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5756 ctxt->sax->error(ctxt->userData,
5757 "Attempt to reference the parameter entity '%s'\n", name);
5758 ctxt->wellFormed = 0;
5759 ctxt->disableSAX = 1;
5760 break;
5761 default:
5762 break;
5763 }
5764 }
5765
5766 /*
5767 * [ WFC: No Recursion ]
5768 * A parsed entity must not contain a recursive reference
5769 * to itself, either directly or indirectly.
5770 * Done somewhere else
5771 */
5772
5773 } else {
5774 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5776 ctxt->sax->error(ctxt->userData,
5777 "xmlParseEntityRef: expecting ';'\n");
5778 ctxt->wellFormed = 0;
5779 ctxt->disableSAX = 1;
5780 }
5781 xmlFree(name);
5782 }
5783 }
5784 return(ent);
5785}
5786
5787/**
5788 * xmlParseStringEntityRef:
5789 * @ctxt: an XML parser context
5790 * @str: a pointer to an index in the string
5791 *
5792 * parse ENTITY references declarations, but this version parses it from
5793 * a string value.
5794 *
5795 * [68] EntityRef ::= '&' Name ';'
5796 *
5797 * [ WFC: Entity Declared ]
5798 * In a document without any DTD, a document with only an internal DTD
5799 * subset which contains no parameter entity references, or a document
5800 * with "standalone='yes'", the Name given in the entity reference
5801 * must match that in an entity declaration, except that well-formed
5802 * documents need not declare any of the following entities: amp, lt,
5803 * gt, apos, quot. The declaration of a parameter entity must precede
5804 * any reference to it. Similarly, the declaration of a general entity
5805 * must precede any reference to it which appears in a default value in an
5806 * attribute-list declaration. Note that if entities are declared in the
5807 * external subset or in external parameter entities, a non-validating
5808 * processor is not obligated to read and process their declarations;
5809 * for such documents, the rule that an entity must be declared is a
5810 * well-formedness constraint only if standalone='yes'.
5811 *
5812 * [ WFC: Parsed Entity ]
5813 * An entity reference must not contain the name of an unparsed entity
5814 *
5815 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5816 * is updated to the current location in the string.
5817 */
5818xmlEntityPtr
5819xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5820 xmlChar *name;
5821 const xmlChar *ptr;
5822 xmlChar cur;
5823 xmlEntityPtr ent = NULL;
5824
5825 if ((str == NULL) || (*str == NULL))
5826 return(NULL);
5827 ptr = *str;
5828 cur = *ptr;
5829 if (cur == '&') {
5830 ptr++;
5831 cur = *ptr;
5832 name = xmlParseStringName(ctxt, &ptr);
5833 if (name == NULL) {
5834 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5836 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005837 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005838 ctxt->wellFormed = 0;
5839 ctxt->disableSAX = 1;
5840 } else {
5841 if (*ptr == ';') {
5842 ptr++;
5843 /*
5844 * Ask first SAX for entity resolution, otherwise try the
5845 * predefined set.
5846 */
5847 if (ctxt->sax != NULL) {
5848 if (ctxt->sax->getEntity != NULL)
5849 ent = ctxt->sax->getEntity(ctxt->userData, name);
5850 if (ent == NULL)
5851 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005852 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5853 ent = getEntity(ctxt, name);
5854 }
Owen Taylor3473f882001-02-23 17:55:21 +00005855 }
5856 /*
5857 * [ WFC: Entity Declared ]
5858 * In a document without any DTD, a document with only an
5859 * internal DTD subset which contains no parameter entity
5860 * references, or a document with "standalone='yes'", the
5861 * Name given in the entity reference must match that in an
5862 * entity declaration, except that well-formed documents
5863 * need not declare any of the following entities: amp, lt,
5864 * gt, apos, quot.
5865 * The declaration of a parameter entity must precede any
5866 * reference to it.
5867 * Similarly, the declaration of a general entity must
5868 * precede any reference to it which appears in a default
5869 * value in an attribute-list declaration. Note that if
5870 * entities are declared in the external subset or in
5871 * external parameter entities, a non-validating processor
5872 * is not obligated to read and process their declarations;
5873 * for such documents, the rule that an entity must be
5874 * declared is a well-formedness constraint only if
5875 * standalone='yes'.
5876 */
5877 if (ent == NULL) {
5878 if ((ctxt->standalone == 1) ||
5879 ((ctxt->hasExternalSubset == 0) &&
5880 (ctxt->hasPErefs == 0))) {
5881 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5883 ctxt->sax->error(ctxt->userData,
5884 "Entity '%s' not defined\n", name);
5885 ctxt->wellFormed = 0;
5886 ctxt->disableSAX = 1;
5887 } else {
5888 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5889 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5890 ctxt->sax->warning(ctxt->userData,
5891 "Entity '%s' not defined\n", name);
5892 }
5893 }
5894
5895 /*
5896 * [ WFC: Parsed Entity ]
5897 * An entity reference must not contain the name of an
5898 * unparsed entity
5899 */
5900 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5901 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5903 ctxt->sax->error(ctxt->userData,
5904 "Entity reference to unparsed entity %s\n", name);
5905 ctxt->wellFormed = 0;
5906 ctxt->disableSAX = 1;
5907 }
5908
5909 /*
5910 * [ WFC: No External Entity References ]
5911 * Attribute values cannot contain direct or indirect
5912 * entity references to external entities.
5913 */
5914 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5915 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5916 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5917 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5918 ctxt->sax->error(ctxt->userData,
5919 "Attribute references external entity '%s'\n", name);
5920 ctxt->wellFormed = 0;
5921 ctxt->disableSAX = 1;
5922 }
5923 /*
5924 * [ WFC: No < in Attribute Values ]
5925 * The replacement text of any entity referred to directly or
5926 * indirectly in an attribute value (other than "&lt;") must
5927 * not contain a <.
5928 */
5929 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5930 (ent != NULL) &&
5931 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5932 (ent->content != NULL) &&
5933 (xmlStrchr(ent->content, '<'))) {
5934 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5936 ctxt->sax->error(ctxt->userData,
5937 "'<' in entity '%s' is not allowed in attributes values\n", name);
5938 ctxt->wellFormed = 0;
5939 ctxt->disableSAX = 1;
5940 }
5941
5942 /*
5943 * Internal check, no parameter entities here ...
5944 */
5945 else {
5946 switch (ent->etype) {
5947 case XML_INTERNAL_PARAMETER_ENTITY:
5948 case XML_EXTERNAL_PARAMETER_ENTITY:
5949 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5951 ctxt->sax->error(ctxt->userData,
5952 "Attempt to reference the parameter entity '%s'\n", name);
5953 ctxt->wellFormed = 0;
5954 ctxt->disableSAX = 1;
5955 break;
5956 default:
5957 break;
5958 }
5959 }
5960
5961 /*
5962 * [ WFC: No Recursion ]
5963 * A parsed entity must not contain a recursive reference
5964 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005965 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005966 */
5967
5968 } else {
5969 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5971 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005972 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005973 ctxt->wellFormed = 0;
5974 ctxt->disableSAX = 1;
5975 }
5976 xmlFree(name);
5977 }
5978 }
5979 *str = ptr;
5980 return(ent);
5981}
5982
5983/**
5984 * xmlParsePEReference:
5985 * @ctxt: an XML parser context
5986 *
5987 * parse PEReference declarations
5988 * The entity content is handled directly by pushing it's content as
5989 * a new input stream.
5990 *
5991 * [69] PEReference ::= '%' Name ';'
5992 *
5993 * [ WFC: No Recursion ]
5994 * A parsed entity must not contain a recursive
5995 * reference to itself, either directly or indirectly.
5996 *
5997 * [ WFC: Entity Declared ]
5998 * In a document without any DTD, a document with only an internal DTD
5999 * subset which contains no parameter entity references, or a document
6000 * with "standalone='yes'", ... ... The declaration of a parameter
6001 * entity must precede any reference to it...
6002 *
6003 * [ VC: Entity Declared ]
6004 * In a document with an external subset or external parameter entities
6005 * with "standalone='no'", ... ... The declaration of a parameter entity
6006 * must precede any reference to it...
6007 *
6008 * [ WFC: In DTD ]
6009 * Parameter-entity references may only appear in the DTD.
6010 * NOTE: misleading but this is handled.
6011 */
6012void
6013xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6014 xmlChar *name;
6015 xmlEntityPtr entity = NULL;
6016 xmlParserInputPtr input;
6017
6018 if (RAW == '%') {
6019 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006020 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006021 if (name == NULL) {
6022 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6024 ctxt->sax->error(ctxt->userData,
6025 "xmlParsePEReference: no name\n");
6026 ctxt->wellFormed = 0;
6027 ctxt->disableSAX = 1;
6028 } else {
6029 if (RAW == ';') {
6030 NEXT;
6031 if ((ctxt->sax != NULL) &&
6032 (ctxt->sax->getParameterEntity != NULL))
6033 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6034 name);
6035 if (entity == NULL) {
6036 /*
6037 * [ WFC: Entity Declared ]
6038 * In a document without any DTD, a document with only an
6039 * internal DTD subset which contains no parameter entity
6040 * references, or a document with "standalone='yes'", ...
6041 * ... The declaration of a parameter entity must precede
6042 * any reference to it...
6043 */
6044 if ((ctxt->standalone == 1) ||
6045 ((ctxt->hasExternalSubset == 0) &&
6046 (ctxt->hasPErefs == 0))) {
6047 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6048 if ((!ctxt->disableSAX) &&
6049 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6050 ctxt->sax->error(ctxt->userData,
6051 "PEReference: %%%s; not found\n", name);
6052 ctxt->wellFormed = 0;
6053 ctxt->disableSAX = 1;
6054 } else {
6055 /*
6056 * [ VC: Entity Declared ]
6057 * In a document with an external subset or external
6058 * parameter entities with "standalone='no'", ...
6059 * ... The declaration of a parameter entity must precede
6060 * any reference to it...
6061 */
6062 if ((!ctxt->disableSAX) &&
6063 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6064 ctxt->sax->warning(ctxt->userData,
6065 "PEReference: %%%s; not found\n", name);
6066 ctxt->valid = 0;
6067 }
6068 } else {
6069 /*
6070 * Internal checking in case the entity quest barfed
6071 */
6072 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6073 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6074 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6075 ctxt->sax->warning(ctxt->userData,
6076 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006077 } else if (ctxt->input->free != deallocblankswrapper) {
6078 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6079 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006080 } else {
6081 /*
6082 * TODO !!!
6083 * handle the extra spaces added before and after
6084 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6085 */
6086 input = xmlNewEntityInputStream(ctxt, entity);
6087 xmlPushInput(ctxt, input);
6088 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6089 (RAW == '<') && (NXT(1) == '?') &&
6090 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6091 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6092 xmlParseTextDecl(ctxt);
6093 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6094 /*
6095 * The XML REC instructs us to stop parsing
6096 * right here
6097 */
6098 ctxt->instate = XML_PARSER_EOF;
6099 xmlFree(name);
6100 return;
6101 }
6102 }
Owen Taylor3473f882001-02-23 17:55:21 +00006103 }
6104 }
6105 ctxt->hasPErefs = 1;
6106 } else {
6107 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6108 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6109 ctxt->sax->error(ctxt->userData,
6110 "xmlParsePEReference: expecting ';'\n");
6111 ctxt->wellFormed = 0;
6112 ctxt->disableSAX = 1;
6113 }
6114 xmlFree(name);
6115 }
6116 }
6117}
6118
6119/**
6120 * xmlParseStringPEReference:
6121 * @ctxt: an XML parser context
6122 * @str: a pointer to an index in the string
6123 *
6124 * parse PEReference declarations
6125 *
6126 * [69] PEReference ::= '%' Name ';'
6127 *
6128 * [ WFC: No Recursion ]
6129 * A parsed entity must not contain a recursive
6130 * reference to itself, either directly or indirectly.
6131 *
6132 * [ WFC: Entity Declared ]
6133 * In a document without any DTD, a document with only an internal DTD
6134 * subset which contains no parameter entity references, or a document
6135 * with "standalone='yes'", ... ... The declaration of a parameter
6136 * entity must precede any reference to it...
6137 *
6138 * [ VC: Entity Declared ]
6139 * In a document with an external subset or external parameter entities
6140 * with "standalone='no'", ... ... The declaration of a parameter entity
6141 * must precede any reference to it...
6142 *
6143 * [ WFC: In DTD ]
6144 * Parameter-entity references may only appear in the DTD.
6145 * NOTE: misleading but this is handled.
6146 *
6147 * Returns the string of the entity content.
6148 * str is updated to the current value of the index
6149 */
6150xmlEntityPtr
6151xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6152 const xmlChar *ptr;
6153 xmlChar cur;
6154 xmlChar *name;
6155 xmlEntityPtr entity = NULL;
6156
6157 if ((str == NULL) || (*str == NULL)) return(NULL);
6158 ptr = *str;
6159 cur = *ptr;
6160 if (cur == '%') {
6161 ptr++;
6162 cur = *ptr;
6163 name = xmlParseStringName(ctxt, &ptr);
6164 if (name == NULL) {
6165 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6167 ctxt->sax->error(ctxt->userData,
6168 "xmlParseStringPEReference: no name\n");
6169 ctxt->wellFormed = 0;
6170 ctxt->disableSAX = 1;
6171 } else {
6172 cur = *ptr;
6173 if (cur == ';') {
6174 ptr++;
6175 cur = *ptr;
6176 if ((ctxt->sax != NULL) &&
6177 (ctxt->sax->getParameterEntity != NULL))
6178 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6179 name);
6180 if (entity == NULL) {
6181 /*
6182 * [ WFC: Entity Declared ]
6183 * In a document without any DTD, a document with only an
6184 * internal DTD subset which contains no parameter entity
6185 * references, or a document with "standalone='yes'", ...
6186 * ... The declaration of a parameter entity must precede
6187 * any reference to it...
6188 */
6189 if ((ctxt->standalone == 1) ||
6190 ((ctxt->hasExternalSubset == 0) &&
6191 (ctxt->hasPErefs == 0))) {
6192 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6194 ctxt->sax->error(ctxt->userData,
6195 "PEReference: %%%s; not found\n", name);
6196 ctxt->wellFormed = 0;
6197 ctxt->disableSAX = 1;
6198 } else {
6199 /*
6200 * [ VC: Entity Declared ]
6201 * In a document with an external subset or external
6202 * parameter entities with "standalone='no'", ...
6203 * ... The declaration of a parameter entity must
6204 * precede any reference to it...
6205 */
6206 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6207 ctxt->sax->warning(ctxt->userData,
6208 "PEReference: %%%s; not found\n", name);
6209 ctxt->valid = 0;
6210 }
6211 } else {
6212 /*
6213 * Internal checking in case the entity quest barfed
6214 */
6215 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6216 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6217 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6218 ctxt->sax->warning(ctxt->userData,
6219 "Internal: %%%s; is not a parameter entity\n", name);
6220 }
6221 }
6222 ctxt->hasPErefs = 1;
6223 } else {
6224 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6226 ctxt->sax->error(ctxt->userData,
6227 "xmlParseStringPEReference: expecting ';'\n");
6228 ctxt->wellFormed = 0;
6229 ctxt->disableSAX = 1;
6230 }
6231 xmlFree(name);
6232 }
6233 }
6234 *str = ptr;
6235 return(entity);
6236}
6237
6238/**
6239 * xmlParseDocTypeDecl:
6240 * @ctxt: an XML parser context
6241 *
6242 * parse a DOCTYPE declaration
6243 *
6244 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6245 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6246 *
6247 * [ VC: Root Element Type ]
6248 * The Name in the document type declaration must match the element
6249 * type of the root element.
6250 */
6251
6252void
6253xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6254 xmlChar *name = NULL;
6255 xmlChar *ExternalID = NULL;
6256 xmlChar *URI = NULL;
6257
6258 /*
6259 * We know that '<!DOCTYPE' has been detected.
6260 */
6261 SKIP(9);
6262
6263 SKIP_BLANKS;
6264
6265 /*
6266 * Parse the DOCTYPE name.
6267 */
6268 name = xmlParseName(ctxt);
6269 if (name == NULL) {
6270 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6272 ctxt->sax->error(ctxt->userData,
6273 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6274 ctxt->wellFormed = 0;
6275 ctxt->disableSAX = 1;
6276 }
6277 ctxt->intSubName = name;
6278
6279 SKIP_BLANKS;
6280
6281 /*
6282 * Check for SystemID and ExternalID
6283 */
6284 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6285
6286 if ((URI != NULL) || (ExternalID != NULL)) {
6287 ctxt->hasExternalSubset = 1;
6288 }
6289 ctxt->extSubURI = URI;
6290 ctxt->extSubSystem = ExternalID;
6291
6292 SKIP_BLANKS;
6293
6294 /*
6295 * Create and update the internal subset.
6296 */
6297 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6298 (!ctxt->disableSAX))
6299 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6300
6301 /*
6302 * Is there any internal subset declarations ?
6303 * they are handled separately in xmlParseInternalSubset()
6304 */
6305 if (RAW == '[')
6306 return;
6307
6308 /*
6309 * We should be at the end of the DOCTYPE declaration.
6310 */
6311 if (RAW != '>') {
6312 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006314 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006315 ctxt->wellFormed = 0;
6316 ctxt->disableSAX = 1;
6317 }
6318 NEXT;
6319}
6320
6321/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006322 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006323 * @ctxt: an XML parser context
6324 *
6325 * parse the internal subset declaration
6326 *
6327 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6328 */
6329
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006330static void
Owen Taylor3473f882001-02-23 17:55:21 +00006331xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6332 /*
6333 * Is there any DTD definition ?
6334 */
6335 if (RAW == '[') {
6336 ctxt->instate = XML_PARSER_DTD;
6337 NEXT;
6338 /*
6339 * Parse the succession of Markup declarations and
6340 * PEReferences.
6341 * Subsequence (markupdecl | PEReference | S)*
6342 */
6343 while (RAW != ']') {
6344 const xmlChar *check = CUR_PTR;
6345 int cons = ctxt->input->consumed;
6346
6347 SKIP_BLANKS;
6348 xmlParseMarkupDecl(ctxt);
6349 xmlParsePEReference(ctxt);
6350
6351 /*
6352 * Pop-up of finished entities.
6353 */
6354 while ((RAW == 0) && (ctxt->inputNr > 1))
6355 xmlPopInput(ctxt);
6356
6357 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6358 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6360 ctxt->sax->error(ctxt->userData,
6361 "xmlParseInternalSubset: error detected in Markup declaration\n");
6362 ctxt->wellFormed = 0;
6363 ctxt->disableSAX = 1;
6364 break;
6365 }
6366 }
6367 if (RAW == ']') {
6368 NEXT;
6369 SKIP_BLANKS;
6370 }
6371 }
6372
6373 /*
6374 * We should be at the end of the DOCTYPE declaration.
6375 */
6376 if (RAW != '>') {
6377 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006379 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006380 ctxt->wellFormed = 0;
6381 ctxt->disableSAX = 1;
6382 }
6383 NEXT;
6384}
6385
6386/**
6387 * xmlParseAttribute:
6388 * @ctxt: an XML parser context
6389 * @value: a xmlChar ** used to store the value of the attribute
6390 *
6391 * parse an attribute
6392 *
6393 * [41] Attribute ::= Name Eq AttValue
6394 *
6395 * [ WFC: No External Entity References ]
6396 * Attribute values cannot contain direct or indirect entity references
6397 * to external entities.
6398 *
6399 * [ WFC: No < in Attribute Values ]
6400 * The replacement text of any entity referred to directly or indirectly in
6401 * an attribute value (other than "&lt;") must not contain a <.
6402 *
6403 * [ VC: Attribute Value Type ]
6404 * The attribute must have been declared; the value must be of the type
6405 * declared for it.
6406 *
6407 * [25] Eq ::= S? '=' S?
6408 *
6409 * With namespace:
6410 *
6411 * [NS 11] Attribute ::= QName Eq AttValue
6412 *
6413 * Also the case QName == xmlns:??? is handled independently as a namespace
6414 * definition.
6415 *
6416 * Returns the attribute name, and the value in *value.
6417 */
6418
6419xmlChar *
6420xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6421 xmlChar *name, *val;
6422
6423 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006424 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006425 name = xmlParseName(ctxt);
6426 if (name == NULL) {
6427 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6429 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6430 ctxt->wellFormed = 0;
6431 ctxt->disableSAX = 1;
6432 return(NULL);
6433 }
6434
6435 /*
6436 * read the value
6437 */
6438 SKIP_BLANKS;
6439 if (RAW == '=') {
6440 NEXT;
6441 SKIP_BLANKS;
6442 val = xmlParseAttValue(ctxt);
6443 ctxt->instate = XML_PARSER_CONTENT;
6444 } else {
6445 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6446 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6447 ctxt->sax->error(ctxt->userData,
6448 "Specification mandate value for attribute %s\n", name);
6449 ctxt->wellFormed = 0;
6450 ctxt->disableSAX = 1;
6451 xmlFree(name);
6452 return(NULL);
6453 }
6454
6455 /*
6456 * Check that xml:lang conforms to the specification
6457 * No more registered as an error, just generate a warning now
6458 * since this was deprecated in XML second edition
6459 */
6460 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6461 if (!xmlCheckLanguageID(val)) {
6462 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6463 ctxt->sax->warning(ctxt->userData,
6464 "Malformed value for xml:lang : %s\n", val);
6465 }
6466 }
6467
6468 /*
6469 * Check that xml:space conforms to the specification
6470 */
6471 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6472 if (xmlStrEqual(val, BAD_CAST "default"))
6473 *(ctxt->space) = 0;
6474 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6475 *(ctxt->space) = 1;
6476 else {
6477 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6479 ctxt->sax->error(ctxt->userData,
6480"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6481 val);
6482 ctxt->wellFormed = 0;
6483 ctxt->disableSAX = 1;
6484 }
6485 }
6486
6487 *value = val;
6488 return(name);
6489}
6490
6491/**
6492 * xmlParseStartTag:
6493 * @ctxt: an XML parser context
6494 *
6495 * parse a start of tag either for rule element or
6496 * EmptyElement. In both case we don't parse the tag closing chars.
6497 *
6498 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6499 *
6500 * [ WFC: Unique Att Spec ]
6501 * No attribute name may appear more than once in the same start-tag or
6502 * empty-element tag.
6503 *
6504 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6505 *
6506 * [ WFC: Unique Att Spec ]
6507 * No attribute name may appear more than once in the same start-tag or
6508 * empty-element tag.
6509 *
6510 * With namespace:
6511 *
6512 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6513 *
6514 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6515 *
6516 * Returns the element name parsed
6517 */
6518
6519xmlChar *
6520xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6521 xmlChar *name;
6522 xmlChar *attname;
6523 xmlChar *attvalue;
6524 const xmlChar **atts = NULL;
6525 int nbatts = 0;
6526 int maxatts = 0;
6527 int i;
6528
6529 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006530 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006531
6532 name = xmlParseName(ctxt);
6533 if (name == NULL) {
6534 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6536 ctxt->sax->error(ctxt->userData,
6537 "xmlParseStartTag: invalid element name\n");
6538 ctxt->wellFormed = 0;
6539 ctxt->disableSAX = 1;
6540 return(NULL);
6541 }
6542
6543 /*
6544 * Now parse the attributes, it ends up with the ending
6545 *
6546 * (S Attribute)* S?
6547 */
6548 SKIP_BLANKS;
6549 GROW;
6550
Daniel Veillard21a0f912001-02-25 19:54:14 +00006551 while ((RAW != '>') &&
6552 ((RAW != '/') || (NXT(1) != '>')) &&
6553 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006554 const xmlChar *q = CUR_PTR;
6555 int cons = ctxt->input->consumed;
6556
6557 attname = xmlParseAttribute(ctxt, &attvalue);
6558 if ((attname != NULL) && (attvalue != NULL)) {
6559 /*
6560 * [ WFC: Unique Att Spec ]
6561 * No attribute name may appear more than once in the same
6562 * start-tag or empty-element tag.
6563 */
6564 for (i = 0; i < nbatts;i += 2) {
6565 if (xmlStrEqual(atts[i], attname)) {
6566 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6568 ctxt->sax->error(ctxt->userData,
6569 "Attribute %s redefined\n",
6570 attname);
6571 ctxt->wellFormed = 0;
6572 ctxt->disableSAX = 1;
6573 xmlFree(attname);
6574 xmlFree(attvalue);
6575 goto failed;
6576 }
6577 }
6578
6579 /*
6580 * Add the pair to atts
6581 */
6582 if (atts == NULL) {
6583 maxatts = 10;
6584 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6585 if (atts == NULL) {
6586 xmlGenericError(xmlGenericErrorContext,
6587 "malloc of %ld byte failed\n",
6588 maxatts * (long)sizeof(xmlChar *));
6589 return(NULL);
6590 }
6591 } else if (nbatts + 4 > maxatts) {
6592 maxatts *= 2;
6593 atts = (const xmlChar **) xmlRealloc((void *) atts,
6594 maxatts * sizeof(xmlChar *));
6595 if (atts == NULL) {
6596 xmlGenericError(xmlGenericErrorContext,
6597 "realloc of %ld byte failed\n",
6598 maxatts * (long)sizeof(xmlChar *));
6599 return(NULL);
6600 }
6601 }
6602 atts[nbatts++] = attname;
6603 atts[nbatts++] = attvalue;
6604 atts[nbatts] = NULL;
6605 atts[nbatts + 1] = NULL;
6606 } else {
6607 if (attname != NULL)
6608 xmlFree(attname);
6609 if (attvalue != NULL)
6610 xmlFree(attvalue);
6611 }
6612
6613failed:
6614
6615 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6616 break;
6617 if (!IS_BLANK(RAW)) {
6618 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6620 ctxt->sax->error(ctxt->userData,
6621 "attributes construct error\n");
6622 ctxt->wellFormed = 0;
6623 ctxt->disableSAX = 1;
6624 }
6625 SKIP_BLANKS;
6626 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6627 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6629 ctxt->sax->error(ctxt->userData,
6630 "xmlParseStartTag: problem parsing attributes\n");
6631 ctxt->wellFormed = 0;
6632 ctxt->disableSAX = 1;
6633 break;
6634 }
6635 GROW;
6636 }
6637
6638 /*
6639 * SAX: Start of Element !
6640 */
6641 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6642 (!ctxt->disableSAX))
6643 ctxt->sax->startElement(ctxt->userData, name, atts);
6644
6645 if (atts != NULL) {
6646 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6647 xmlFree((void *) atts);
6648 }
6649 return(name);
6650}
6651
6652/**
6653 * xmlParseEndTag:
6654 * @ctxt: an XML parser context
6655 *
6656 * parse an end of tag
6657 *
6658 * [42] ETag ::= '</' Name S? '>'
6659 *
6660 * With namespace
6661 *
6662 * [NS 9] ETag ::= '</' QName S? '>'
6663 */
6664
6665void
6666xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6667 xmlChar *name;
6668 xmlChar *oldname;
6669
6670 GROW;
6671 if ((RAW != '<') || (NXT(1) != '/')) {
6672 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6674 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6675 ctxt->wellFormed = 0;
6676 ctxt->disableSAX = 1;
6677 return;
6678 }
6679 SKIP(2);
6680
Daniel Veillard46de64e2002-05-29 08:21:33 +00006681 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006682
6683 /*
6684 * We should definitely be at the ending "S? '>'" part
6685 */
6686 GROW;
6687 SKIP_BLANKS;
6688 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6689 ctxt->errNo = XML_ERR_GT_REQUIRED;
6690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6691 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6692 ctxt->wellFormed = 0;
6693 ctxt->disableSAX = 1;
6694 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006695 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006696
6697 /*
6698 * [ WFC: Element Type Match ]
6699 * The Name in an element's end-tag must match the element type in the
6700 * start-tag.
6701 *
6702 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006703 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006704 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006706 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006707 ctxt->sax->error(ctxt->userData,
6708 "Opening and ending tag mismatch: %s and %s\n",
6709 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006710 xmlFree(name);
6711 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006712 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006713 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006714 }
6715
6716 }
6717 ctxt->wellFormed = 0;
6718 ctxt->disableSAX = 1;
6719 }
6720
6721 /*
6722 * SAX: End of Tag
6723 */
6724 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6725 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006726 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006727
Owen Taylor3473f882001-02-23 17:55:21 +00006728 oldname = namePop(ctxt);
6729 spacePop(ctxt);
6730 if (oldname != NULL) {
6731#ifdef DEBUG_STACK
6732 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6733#endif
6734 xmlFree(oldname);
6735 }
6736 return;
6737}
6738
6739/**
6740 * xmlParseCDSect:
6741 * @ctxt: an XML parser context
6742 *
6743 * Parse escaped pure raw content.
6744 *
6745 * [18] CDSect ::= CDStart CData CDEnd
6746 *
6747 * [19] CDStart ::= '<![CDATA['
6748 *
6749 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6750 *
6751 * [21] CDEnd ::= ']]>'
6752 */
6753void
6754xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6755 xmlChar *buf = NULL;
6756 int len = 0;
6757 int size = XML_PARSER_BUFFER_SIZE;
6758 int r, rl;
6759 int s, sl;
6760 int cur, l;
6761 int count = 0;
6762
6763 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6764 (NXT(2) == '[') && (NXT(3) == 'C') &&
6765 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6766 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6767 (NXT(8) == '[')) {
6768 SKIP(9);
6769 } else
6770 return;
6771
6772 ctxt->instate = XML_PARSER_CDATA_SECTION;
6773 r = CUR_CHAR(rl);
6774 if (!IS_CHAR(r)) {
6775 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6777 ctxt->sax->error(ctxt->userData,
6778 "CData section not finished\n");
6779 ctxt->wellFormed = 0;
6780 ctxt->disableSAX = 1;
6781 ctxt->instate = XML_PARSER_CONTENT;
6782 return;
6783 }
6784 NEXTL(rl);
6785 s = CUR_CHAR(sl);
6786 if (!IS_CHAR(s)) {
6787 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6789 ctxt->sax->error(ctxt->userData,
6790 "CData section not finished\n");
6791 ctxt->wellFormed = 0;
6792 ctxt->disableSAX = 1;
6793 ctxt->instate = XML_PARSER_CONTENT;
6794 return;
6795 }
6796 NEXTL(sl);
6797 cur = CUR_CHAR(l);
6798 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6799 if (buf == NULL) {
6800 xmlGenericError(xmlGenericErrorContext,
6801 "malloc of %d byte failed\n", size);
6802 return;
6803 }
6804 while (IS_CHAR(cur) &&
6805 ((r != ']') || (s != ']') || (cur != '>'))) {
6806 if (len + 5 >= size) {
6807 size *= 2;
6808 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6809 if (buf == NULL) {
6810 xmlGenericError(xmlGenericErrorContext,
6811 "realloc of %d byte failed\n", size);
6812 return;
6813 }
6814 }
6815 COPY_BUF(rl,buf,len,r);
6816 r = s;
6817 rl = sl;
6818 s = cur;
6819 sl = l;
6820 count++;
6821 if (count > 50) {
6822 GROW;
6823 count = 0;
6824 }
6825 NEXTL(l);
6826 cur = CUR_CHAR(l);
6827 }
6828 buf[len] = 0;
6829 ctxt->instate = XML_PARSER_CONTENT;
6830 if (cur != '>') {
6831 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6833 ctxt->sax->error(ctxt->userData,
6834 "CData section not finished\n%.50s\n", buf);
6835 ctxt->wellFormed = 0;
6836 ctxt->disableSAX = 1;
6837 xmlFree(buf);
6838 return;
6839 }
6840 NEXTL(l);
6841
6842 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006843 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006844 */
6845 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6846 if (ctxt->sax->cdataBlock != NULL)
6847 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006848 else if (ctxt->sax->characters != NULL)
6849 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006850 }
6851 xmlFree(buf);
6852}
6853
6854/**
6855 * xmlParseContent:
6856 * @ctxt: an XML parser context
6857 *
6858 * Parse a content:
6859 *
6860 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6861 */
6862
6863void
6864xmlParseContent(xmlParserCtxtPtr ctxt) {
6865 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006866 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006867 ((RAW != '<') || (NXT(1) != '/'))) {
6868 const xmlChar *test = CUR_PTR;
6869 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006870 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006871
6872 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006873 * First case : a Processing Instruction.
6874 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006875 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006876 xmlParsePI(ctxt);
6877 }
6878
6879 /*
6880 * Second case : a CDSection
6881 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006882 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006883 (NXT(2) == '[') && (NXT(3) == 'C') &&
6884 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6885 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6886 (NXT(8) == '[')) {
6887 xmlParseCDSect(ctxt);
6888 }
6889
6890 /*
6891 * Third case : a comment
6892 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006893 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006894 (NXT(2) == '-') && (NXT(3) == '-')) {
6895 xmlParseComment(ctxt);
6896 ctxt->instate = XML_PARSER_CONTENT;
6897 }
6898
6899 /*
6900 * Fourth case : a sub-element.
6901 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006902 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006903 xmlParseElement(ctxt);
6904 }
6905
6906 /*
6907 * Fifth case : a reference. If if has not been resolved,
6908 * parsing returns it's Name, create the node
6909 */
6910
Daniel Veillard21a0f912001-02-25 19:54:14 +00006911 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006912 xmlParseReference(ctxt);
6913 }
6914
6915 /*
6916 * Last case, text. Note that References are handled directly.
6917 */
6918 else {
6919 xmlParseCharData(ctxt, 0);
6920 }
6921
6922 GROW;
6923 /*
6924 * Pop-up of finished entities.
6925 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00006926 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00006927 xmlPopInput(ctxt);
6928 SHRINK;
6929
Daniel Veillardfdc91562002-07-01 21:52:03 +00006930 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006931 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6933 ctxt->sax->error(ctxt->userData,
6934 "detected an error in element content\n");
6935 ctxt->wellFormed = 0;
6936 ctxt->disableSAX = 1;
6937 ctxt->instate = XML_PARSER_EOF;
6938 break;
6939 }
6940 }
6941}
6942
6943/**
6944 * xmlParseElement:
6945 * @ctxt: an XML parser context
6946 *
6947 * parse an XML element, this is highly recursive
6948 *
6949 * [39] element ::= EmptyElemTag | STag content ETag
6950 *
6951 * [ WFC: Element Type Match ]
6952 * The Name in an element's end-tag must match the element type in the
6953 * start-tag.
6954 *
6955 * [ VC: Element Valid ]
6956 * An element is valid if there is a declaration matching elementdecl
6957 * where the Name matches the element type and one of the following holds:
6958 * - The declaration matches EMPTY and the element has no content.
6959 * - The declaration matches children and the sequence of child elements
6960 * belongs to the language generated by the regular expression in the
6961 * content model, with optional white space (characters matching the
6962 * nonterminal S) between each pair of child elements.
6963 * - The declaration matches Mixed and the content consists of character
6964 * data and child elements whose types match names in the content model.
6965 * - The declaration matches ANY, and the types of any child elements have
6966 * been declared.
6967 */
6968
6969void
6970xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00006971 xmlChar *name;
6972 xmlChar *oldname;
6973 xmlParserNodeInfo node_info;
6974 xmlNodePtr ret;
6975
6976 /* Capture start position */
6977 if (ctxt->record_info) {
6978 node_info.begin_pos = ctxt->input->consumed +
6979 (CUR_PTR - ctxt->input->base);
6980 node_info.begin_line = ctxt->input->line;
6981 }
6982
6983 if (ctxt->spaceNr == 0)
6984 spacePush(ctxt, -1);
6985 else
6986 spacePush(ctxt, *ctxt->space);
6987
6988 name = xmlParseStartTag(ctxt);
6989 if (name == NULL) {
6990 spacePop(ctxt);
6991 return;
6992 }
6993 namePush(ctxt, name);
6994 ret = ctxt->node;
6995
6996 /*
6997 * [ VC: Root Element Type ]
6998 * The Name in the document type declaration must match the element
6999 * type of the root element.
7000 */
7001 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7002 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7003 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7004
7005 /*
7006 * Check for an Empty Element.
7007 */
7008 if ((RAW == '/') && (NXT(1) == '>')) {
7009 SKIP(2);
7010 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7011 (!ctxt->disableSAX))
7012 ctxt->sax->endElement(ctxt->userData, name);
7013 oldname = namePop(ctxt);
7014 spacePop(ctxt);
7015 if (oldname != NULL) {
7016#ifdef DEBUG_STACK
7017 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7018#endif
7019 xmlFree(oldname);
7020 }
7021 if ( ret != NULL && ctxt->record_info ) {
7022 node_info.end_pos = ctxt->input->consumed +
7023 (CUR_PTR - ctxt->input->base);
7024 node_info.end_line = ctxt->input->line;
7025 node_info.node = ret;
7026 xmlParserAddNodeInfo(ctxt, &node_info);
7027 }
7028 return;
7029 }
7030 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007031 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007032 } else {
7033 ctxt->errNo = XML_ERR_GT_REQUIRED;
7034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7035 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007036 "Couldn't find end of Start Tag %s\n",
7037 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007038 ctxt->wellFormed = 0;
7039 ctxt->disableSAX = 1;
7040
7041 /*
7042 * end of parsing of this node.
7043 */
7044 nodePop(ctxt);
7045 oldname = namePop(ctxt);
7046 spacePop(ctxt);
7047 if (oldname != NULL) {
7048#ifdef DEBUG_STACK
7049 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7050#endif
7051 xmlFree(oldname);
7052 }
7053
7054 /*
7055 * Capture end position and add node
7056 */
7057 if ( ret != NULL && ctxt->record_info ) {
7058 node_info.end_pos = ctxt->input->consumed +
7059 (CUR_PTR - ctxt->input->base);
7060 node_info.end_line = ctxt->input->line;
7061 node_info.node = ret;
7062 xmlParserAddNodeInfo(ctxt, &node_info);
7063 }
7064 return;
7065 }
7066
7067 /*
7068 * Parse the content of the element:
7069 */
7070 xmlParseContent(ctxt);
7071 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007072 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7074 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007075 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007076 ctxt->wellFormed = 0;
7077 ctxt->disableSAX = 1;
7078
7079 /*
7080 * end of parsing of this node.
7081 */
7082 nodePop(ctxt);
7083 oldname = namePop(ctxt);
7084 spacePop(ctxt);
7085 if (oldname != NULL) {
7086#ifdef DEBUG_STACK
7087 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7088#endif
7089 xmlFree(oldname);
7090 }
7091 return;
7092 }
7093
7094 /*
7095 * parse the end of tag: '</' should be here.
7096 */
7097 xmlParseEndTag(ctxt);
7098
7099 /*
7100 * Capture end position and add node
7101 */
7102 if ( ret != NULL && ctxt->record_info ) {
7103 node_info.end_pos = ctxt->input->consumed +
7104 (CUR_PTR - ctxt->input->base);
7105 node_info.end_line = ctxt->input->line;
7106 node_info.node = ret;
7107 xmlParserAddNodeInfo(ctxt, &node_info);
7108 }
7109}
7110
7111/**
7112 * xmlParseVersionNum:
7113 * @ctxt: an XML parser context
7114 *
7115 * parse the XML version value.
7116 *
7117 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7118 *
7119 * Returns the string giving the XML version number, or NULL
7120 */
7121xmlChar *
7122xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7123 xmlChar *buf = NULL;
7124 int len = 0;
7125 int size = 10;
7126 xmlChar cur;
7127
7128 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7129 if (buf == NULL) {
7130 xmlGenericError(xmlGenericErrorContext,
7131 "malloc of %d byte failed\n", size);
7132 return(NULL);
7133 }
7134 cur = CUR;
7135 while (((cur >= 'a') && (cur <= 'z')) ||
7136 ((cur >= 'A') && (cur <= 'Z')) ||
7137 ((cur >= '0') && (cur <= '9')) ||
7138 (cur == '_') || (cur == '.') ||
7139 (cur == ':') || (cur == '-')) {
7140 if (len + 1 >= size) {
7141 size *= 2;
7142 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7143 if (buf == NULL) {
7144 xmlGenericError(xmlGenericErrorContext,
7145 "realloc of %d byte failed\n", size);
7146 return(NULL);
7147 }
7148 }
7149 buf[len++] = cur;
7150 NEXT;
7151 cur=CUR;
7152 }
7153 buf[len] = 0;
7154 return(buf);
7155}
7156
7157/**
7158 * xmlParseVersionInfo:
7159 * @ctxt: an XML parser context
7160 *
7161 * parse the XML version.
7162 *
7163 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7164 *
7165 * [25] Eq ::= S? '=' S?
7166 *
7167 * Returns the version string, e.g. "1.0"
7168 */
7169
7170xmlChar *
7171xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7172 xmlChar *version = NULL;
7173 const xmlChar *q;
7174
7175 if ((RAW == 'v') && (NXT(1) == 'e') &&
7176 (NXT(2) == 'r') && (NXT(3) == 's') &&
7177 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7178 (NXT(6) == 'n')) {
7179 SKIP(7);
7180 SKIP_BLANKS;
7181 if (RAW != '=') {
7182 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7184 ctxt->sax->error(ctxt->userData,
7185 "xmlParseVersionInfo : expected '='\n");
7186 ctxt->wellFormed = 0;
7187 ctxt->disableSAX = 1;
7188 return(NULL);
7189 }
7190 NEXT;
7191 SKIP_BLANKS;
7192 if (RAW == '"') {
7193 NEXT;
7194 q = CUR_PTR;
7195 version = xmlParseVersionNum(ctxt);
7196 if (RAW != '"') {
7197 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7199 ctxt->sax->error(ctxt->userData,
7200 "String not closed\n%.50s\n", q);
7201 ctxt->wellFormed = 0;
7202 ctxt->disableSAX = 1;
7203 } else
7204 NEXT;
7205 } else if (RAW == '\''){
7206 NEXT;
7207 q = CUR_PTR;
7208 version = xmlParseVersionNum(ctxt);
7209 if (RAW != '\'') {
7210 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7212 ctxt->sax->error(ctxt->userData,
7213 "String not closed\n%.50s\n", q);
7214 ctxt->wellFormed = 0;
7215 ctxt->disableSAX = 1;
7216 } else
7217 NEXT;
7218 } else {
7219 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7220 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7221 ctxt->sax->error(ctxt->userData,
7222 "xmlParseVersionInfo : expected ' or \"\n");
7223 ctxt->wellFormed = 0;
7224 ctxt->disableSAX = 1;
7225 }
7226 }
7227 return(version);
7228}
7229
7230/**
7231 * xmlParseEncName:
7232 * @ctxt: an XML parser context
7233 *
7234 * parse the XML encoding name
7235 *
7236 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7237 *
7238 * Returns the encoding name value or NULL
7239 */
7240xmlChar *
7241xmlParseEncName(xmlParserCtxtPtr ctxt) {
7242 xmlChar *buf = NULL;
7243 int len = 0;
7244 int size = 10;
7245 xmlChar cur;
7246
7247 cur = CUR;
7248 if (((cur >= 'a') && (cur <= 'z')) ||
7249 ((cur >= 'A') && (cur <= 'Z'))) {
7250 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7251 if (buf == NULL) {
7252 xmlGenericError(xmlGenericErrorContext,
7253 "malloc of %d byte failed\n", size);
7254 return(NULL);
7255 }
7256
7257 buf[len++] = cur;
7258 NEXT;
7259 cur = CUR;
7260 while (((cur >= 'a') && (cur <= 'z')) ||
7261 ((cur >= 'A') && (cur <= 'Z')) ||
7262 ((cur >= '0') && (cur <= '9')) ||
7263 (cur == '.') || (cur == '_') ||
7264 (cur == '-')) {
7265 if (len + 1 >= size) {
7266 size *= 2;
7267 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7268 if (buf == NULL) {
7269 xmlGenericError(xmlGenericErrorContext,
7270 "realloc of %d byte failed\n", size);
7271 return(NULL);
7272 }
7273 }
7274 buf[len++] = cur;
7275 NEXT;
7276 cur = CUR;
7277 if (cur == 0) {
7278 SHRINK;
7279 GROW;
7280 cur = CUR;
7281 }
7282 }
7283 buf[len] = 0;
7284 } else {
7285 ctxt->errNo = XML_ERR_ENCODING_NAME;
7286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7287 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7288 ctxt->wellFormed = 0;
7289 ctxt->disableSAX = 1;
7290 }
7291 return(buf);
7292}
7293
7294/**
7295 * xmlParseEncodingDecl:
7296 * @ctxt: an XML parser context
7297 *
7298 * parse the XML encoding declaration
7299 *
7300 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7301 *
7302 * this setups the conversion filters.
7303 *
7304 * Returns the encoding value or NULL
7305 */
7306
7307xmlChar *
7308xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7309 xmlChar *encoding = NULL;
7310 const xmlChar *q;
7311
7312 SKIP_BLANKS;
7313 if ((RAW == 'e') && (NXT(1) == 'n') &&
7314 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7315 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7316 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7317 SKIP(8);
7318 SKIP_BLANKS;
7319 if (RAW != '=') {
7320 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7322 ctxt->sax->error(ctxt->userData,
7323 "xmlParseEncodingDecl : expected '='\n");
7324 ctxt->wellFormed = 0;
7325 ctxt->disableSAX = 1;
7326 return(NULL);
7327 }
7328 NEXT;
7329 SKIP_BLANKS;
7330 if (RAW == '"') {
7331 NEXT;
7332 q = CUR_PTR;
7333 encoding = xmlParseEncName(ctxt);
7334 if (RAW != '"') {
7335 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7337 ctxt->sax->error(ctxt->userData,
7338 "String not closed\n%.50s\n", q);
7339 ctxt->wellFormed = 0;
7340 ctxt->disableSAX = 1;
7341 } else
7342 NEXT;
7343 } else if (RAW == '\''){
7344 NEXT;
7345 q = CUR_PTR;
7346 encoding = xmlParseEncName(ctxt);
7347 if (RAW != '\'') {
7348 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7350 ctxt->sax->error(ctxt->userData,
7351 "String not closed\n%.50s\n", q);
7352 ctxt->wellFormed = 0;
7353 ctxt->disableSAX = 1;
7354 } else
7355 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007356 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007357 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7359 ctxt->sax->error(ctxt->userData,
7360 "xmlParseEncodingDecl : expected ' or \"\n");
7361 ctxt->wellFormed = 0;
7362 ctxt->disableSAX = 1;
7363 }
7364 if (encoding != NULL) {
7365 xmlCharEncoding enc;
7366 xmlCharEncodingHandlerPtr handler;
7367
7368 if (ctxt->input->encoding != NULL)
7369 xmlFree((xmlChar *) ctxt->input->encoding);
7370 ctxt->input->encoding = encoding;
7371
7372 enc = xmlParseCharEncoding((const char *) encoding);
7373 /*
7374 * registered set of known encodings
7375 */
7376 if (enc != XML_CHAR_ENCODING_ERROR) {
7377 xmlSwitchEncoding(ctxt, enc);
7378 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007379 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007380 xmlFree(encoding);
7381 return(NULL);
7382 }
7383 } else {
7384 /*
7385 * fallback for unknown encodings
7386 */
7387 handler = xmlFindCharEncodingHandler((const char *) encoding);
7388 if (handler != NULL) {
7389 xmlSwitchToEncoding(ctxt, handler);
7390 } else {
7391 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7393 ctxt->sax->error(ctxt->userData,
7394 "Unsupported encoding %s\n", encoding);
7395 return(NULL);
7396 }
7397 }
7398 }
7399 }
7400 return(encoding);
7401}
7402
7403/**
7404 * xmlParseSDDecl:
7405 * @ctxt: an XML parser context
7406 *
7407 * parse the XML standalone declaration
7408 *
7409 * [32] SDDecl ::= S 'standalone' Eq
7410 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7411 *
7412 * [ VC: Standalone Document Declaration ]
7413 * TODO The standalone document declaration must have the value "no"
7414 * if any external markup declarations contain declarations of:
7415 * - attributes with default values, if elements to which these
7416 * attributes apply appear in the document without specifications
7417 * of values for these attributes, or
7418 * - entities (other than amp, lt, gt, apos, quot), if references
7419 * to those entities appear in the document, or
7420 * - attributes with values subject to normalization, where the
7421 * attribute appears in the document with a value which will change
7422 * as a result of normalization, or
7423 * - element types with element content, if white space occurs directly
7424 * within any instance of those types.
7425 *
7426 * Returns 1 if standalone, 0 otherwise
7427 */
7428
7429int
7430xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7431 int standalone = -1;
7432
7433 SKIP_BLANKS;
7434 if ((RAW == 's') && (NXT(1) == 't') &&
7435 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7436 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7437 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7438 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7439 SKIP(10);
7440 SKIP_BLANKS;
7441 if (RAW != '=') {
7442 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7444 ctxt->sax->error(ctxt->userData,
7445 "XML standalone declaration : expected '='\n");
7446 ctxt->wellFormed = 0;
7447 ctxt->disableSAX = 1;
7448 return(standalone);
7449 }
7450 NEXT;
7451 SKIP_BLANKS;
7452 if (RAW == '\''){
7453 NEXT;
7454 if ((RAW == 'n') && (NXT(1) == 'o')) {
7455 standalone = 0;
7456 SKIP(2);
7457 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7458 (NXT(2) == 's')) {
7459 standalone = 1;
7460 SKIP(3);
7461 } else {
7462 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7463 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7464 ctxt->sax->error(ctxt->userData,
7465 "standalone accepts only 'yes' or 'no'\n");
7466 ctxt->wellFormed = 0;
7467 ctxt->disableSAX = 1;
7468 }
7469 if (RAW != '\'') {
7470 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7472 ctxt->sax->error(ctxt->userData, "String not closed\n");
7473 ctxt->wellFormed = 0;
7474 ctxt->disableSAX = 1;
7475 } else
7476 NEXT;
7477 } else if (RAW == '"'){
7478 NEXT;
7479 if ((RAW == 'n') && (NXT(1) == 'o')) {
7480 standalone = 0;
7481 SKIP(2);
7482 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7483 (NXT(2) == 's')) {
7484 standalone = 1;
7485 SKIP(3);
7486 } else {
7487 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7488 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7489 ctxt->sax->error(ctxt->userData,
7490 "standalone accepts only 'yes' or 'no'\n");
7491 ctxt->wellFormed = 0;
7492 ctxt->disableSAX = 1;
7493 }
7494 if (RAW != '"') {
7495 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7497 ctxt->sax->error(ctxt->userData, "String not closed\n");
7498 ctxt->wellFormed = 0;
7499 ctxt->disableSAX = 1;
7500 } else
7501 NEXT;
7502 } else {
7503 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7505 ctxt->sax->error(ctxt->userData,
7506 "Standalone value not found\n");
7507 ctxt->wellFormed = 0;
7508 ctxt->disableSAX = 1;
7509 }
7510 }
7511 return(standalone);
7512}
7513
7514/**
7515 * xmlParseXMLDecl:
7516 * @ctxt: an XML parser context
7517 *
7518 * parse an XML declaration header
7519 *
7520 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7521 */
7522
7523void
7524xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7525 xmlChar *version;
7526
7527 /*
7528 * We know that '<?xml' is here.
7529 */
7530 SKIP(5);
7531
7532 if (!IS_BLANK(RAW)) {
7533 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7535 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7536 ctxt->wellFormed = 0;
7537 ctxt->disableSAX = 1;
7538 }
7539 SKIP_BLANKS;
7540
7541 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007542 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007543 */
7544 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007545 if (version == NULL) {
7546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7547 ctxt->sax->error(ctxt->userData,
7548 "Malformed declaration expecting version\n");
7549 ctxt->wellFormed = 0;
7550 ctxt->disableSAX = 1;
7551 } else {
7552 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7553 /*
7554 * TODO: Blueberry should be detected here
7555 */
7556 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7557 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7558 version);
7559 }
7560 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007561 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007562 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007563 }
Owen Taylor3473f882001-02-23 17:55:21 +00007564
7565 /*
7566 * We may have the encoding declaration
7567 */
7568 if (!IS_BLANK(RAW)) {
7569 if ((RAW == '?') && (NXT(1) == '>')) {
7570 SKIP(2);
7571 return;
7572 }
7573 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7575 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7576 ctxt->wellFormed = 0;
7577 ctxt->disableSAX = 1;
7578 }
7579 xmlParseEncodingDecl(ctxt);
7580 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7581 /*
7582 * The XML REC instructs us to stop parsing right here
7583 */
7584 return;
7585 }
7586
7587 /*
7588 * We may have the standalone status.
7589 */
7590 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7591 if ((RAW == '?') && (NXT(1) == '>')) {
7592 SKIP(2);
7593 return;
7594 }
7595 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7597 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7598 ctxt->wellFormed = 0;
7599 ctxt->disableSAX = 1;
7600 }
7601 SKIP_BLANKS;
7602 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7603
7604 SKIP_BLANKS;
7605 if ((RAW == '?') && (NXT(1) == '>')) {
7606 SKIP(2);
7607 } else if (RAW == '>') {
7608 /* Deprecated old WD ... */
7609 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7611 ctxt->sax->error(ctxt->userData,
7612 "XML declaration must end-up with '?>'\n");
7613 ctxt->wellFormed = 0;
7614 ctxt->disableSAX = 1;
7615 NEXT;
7616 } else {
7617 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7619 ctxt->sax->error(ctxt->userData,
7620 "parsing XML declaration: '?>' expected\n");
7621 ctxt->wellFormed = 0;
7622 ctxt->disableSAX = 1;
7623 MOVETO_ENDTAG(CUR_PTR);
7624 NEXT;
7625 }
7626}
7627
7628/**
7629 * xmlParseMisc:
7630 * @ctxt: an XML parser context
7631 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007632 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007633 *
7634 * [27] Misc ::= Comment | PI | S
7635 */
7636
7637void
7638xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007639 while (((RAW == '<') && (NXT(1) == '?')) ||
7640 ((RAW == '<') && (NXT(1) == '!') &&
7641 (NXT(2) == '-') && (NXT(3) == '-')) ||
7642 IS_BLANK(CUR)) {
7643 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007644 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007645 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007646 NEXT;
7647 } else
7648 xmlParseComment(ctxt);
7649 }
7650}
7651
7652/**
7653 * xmlParseDocument:
7654 * @ctxt: an XML parser context
7655 *
7656 * parse an XML document (and build a tree if using the standard SAX
7657 * interface).
7658 *
7659 * [1] document ::= prolog element Misc*
7660 *
7661 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7662 *
7663 * Returns 0, -1 in case of error. the parser context is augmented
7664 * as a result of the parsing.
7665 */
7666
7667int
7668xmlParseDocument(xmlParserCtxtPtr ctxt) {
7669 xmlChar start[4];
7670 xmlCharEncoding enc;
7671
7672 xmlInitParser();
7673
7674 GROW;
7675
7676 /*
7677 * SAX: beginning of the document processing.
7678 */
7679 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7680 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7681
Daniel Veillard50f34372001-08-03 12:06:36 +00007682 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007683 /*
7684 * Get the 4 first bytes and decode the charset
7685 * if enc != XML_CHAR_ENCODING_NONE
7686 * plug some encoding conversion routines.
7687 */
7688 start[0] = RAW;
7689 start[1] = NXT(1);
7690 start[2] = NXT(2);
7691 start[3] = NXT(3);
7692 enc = xmlDetectCharEncoding(start, 4);
7693 if (enc != XML_CHAR_ENCODING_NONE) {
7694 xmlSwitchEncoding(ctxt, enc);
7695 }
Owen Taylor3473f882001-02-23 17:55:21 +00007696 }
7697
7698
7699 if (CUR == 0) {
7700 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7702 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7703 ctxt->wellFormed = 0;
7704 ctxt->disableSAX = 1;
7705 }
7706
7707 /*
7708 * Check for the XMLDecl in the Prolog.
7709 */
7710 GROW;
7711 if ((RAW == '<') && (NXT(1) == '?') &&
7712 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7713 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7714
7715 /*
7716 * Note that we will switch encoding on the fly.
7717 */
7718 xmlParseXMLDecl(ctxt);
7719 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7720 /*
7721 * The XML REC instructs us to stop parsing right here
7722 */
7723 return(-1);
7724 }
7725 ctxt->standalone = ctxt->input->standalone;
7726 SKIP_BLANKS;
7727 } else {
7728 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7729 }
7730 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7731 ctxt->sax->startDocument(ctxt->userData);
7732
7733 /*
7734 * The Misc part of the Prolog
7735 */
7736 GROW;
7737 xmlParseMisc(ctxt);
7738
7739 /*
7740 * Then possibly doc type declaration(s) and more Misc
7741 * (doctypedecl Misc*)?
7742 */
7743 GROW;
7744 if ((RAW == '<') && (NXT(1) == '!') &&
7745 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7746 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7747 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7748 (NXT(8) == 'E')) {
7749
7750 ctxt->inSubset = 1;
7751 xmlParseDocTypeDecl(ctxt);
7752 if (RAW == '[') {
7753 ctxt->instate = XML_PARSER_DTD;
7754 xmlParseInternalSubset(ctxt);
7755 }
7756
7757 /*
7758 * Create and update the external subset.
7759 */
7760 ctxt->inSubset = 2;
7761 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7762 (!ctxt->disableSAX))
7763 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7764 ctxt->extSubSystem, ctxt->extSubURI);
7765 ctxt->inSubset = 0;
7766
7767
7768 ctxt->instate = XML_PARSER_PROLOG;
7769 xmlParseMisc(ctxt);
7770 }
7771
7772 /*
7773 * Time to start parsing the tree itself
7774 */
7775 GROW;
7776 if (RAW != '<') {
7777 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7779 ctxt->sax->error(ctxt->userData,
7780 "Start tag expected, '<' not found\n");
7781 ctxt->wellFormed = 0;
7782 ctxt->disableSAX = 1;
7783 ctxt->instate = XML_PARSER_EOF;
7784 } else {
7785 ctxt->instate = XML_PARSER_CONTENT;
7786 xmlParseElement(ctxt);
7787 ctxt->instate = XML_PARSER_EPILOG;
7788
7789
7790 /*
7791 * The Misc part at the end
7792 */
7793 xmlParseMisc(ctxt);
7794
Daniel Veillard561b7f82002-03-20 21:55:57 +00007795 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007796 ctxt->errNo = XML_ERR_DOCUMENT_END;
7797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7798 ctxt->sax->error(ctxt->userData,
7799 "Extra content at the end of the document\n");
7800 ctxt->wellFormed = 0;
7801 ctxt->disableSAX = 1;
7802 }
7803 ctxt->instate = XML_PARSER_EOF;
7804 }
7805
7806 /*
7807 * SAX: end of the document processing.
7808 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007809 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007810 ctxt->sax->endDocument(ctxt->userData);
7811
Daniel Veillard5997aca2002-03-18 18:36:20 +00007812 /*
7813 * Remove locally kept entity definitions if the tree was not built
7814 */
7815 if ((ctxt->myDoc != NULL) &&
7816 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7817 xmlFreeDoc(ctxt->myDoc);
7818 ctxt->myDoc = NULL;
7819 }
7820
Daniel Veillardc7612992002-02-17 22:47:37 +00007821 if (! ctxt->wellFormed) {
7822 ctxt->valid = 0;
7823 return(-1);
7824 }
Owen Taylor3473f882001-02-23 17:55:21 +00007825 return(0);
7826}
7827
7828/**
7829 * xmlParseExtParsedEnt:
7830 * @ctxt: an XML parser context
7831 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007832 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007833 * An external general parsed entity is well-formed if it matches the
7834 * production labeled extParsedEnt.
7835 *
7836 * [78] extParsedEnt ::= TextDecl? content
7837 *
7838 * Returns 0, -1 in case of error. the parser context is augmented
7839 * as a result of the parsing.
7840 */
7841
7842int
7843xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7844 xmlChar start[4];
7845 xmlCharEncoding enc;
7846
7847 xmlDefaultSAXHandlerInit();
7848
7849 GROW;
7850
7851 /*
7852 * SAX: beginning of the document processing.
7853 */
7854 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7855 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7856
7857 /*
7858 * Get the 4 first bytes and decode the charset
7859 * if enc != XML_CHAR_ENCODING_NONE
7860 * plug some encoding conversion routines.
7861 */
7862 start[0] = RAW;
7863 start[1] = NXT(1);
7864 start[2] = NXT(2);
7865 start[3] = NXT(3);
7866 enc = xmlDetectCharEncoding(start, 4);
7867 if (enc != XML_CHAR_ENCODING_NONE) {
7868 xmlSwitchEncoding(ctxt, enc);
7869 }
7870
7871
7872 if (CUR == 0) {
7873 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7875 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7876 ctxt->wellFormed = 0;
7877 ctxt->disableSAX = 1;
7878 }
7879
7880 /*
7881 * Check for the XMLDecl in the Prolog.
7882 */
7883 GROW;
7884 if ((RAW == '<') && (NXT(1) == '?') &&
7885 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7886 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7887
7888 /*
7889 * Note that we will switch encoding on the fly.
7890 */
7891 xmlParseXMLDecl(ctxt);
7892 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7893 /*
7894 * The XML REC instructs us to stop parsing right here
7895 */
7896 return(-1);
7897 }
7898 SKIP_BLANKS;
7899 } else {
7900 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7901 }
7902 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7903 ctxt->sax->startDocument(ctxt->userData);
7904
7905 /*
7906 * Doing validity checking on chunk doesn't make sense
7907 */
7908 ctxt->instate = XML_PARSER_CONTENT;
7909 ctxt->validate = 0;
7910 ctxt->loadsubset = 0;
7911 ctxt->depth = 0;
7912
7913 xmlParseContent(ctxt);
7914
7915 if ((RAW == '<') && (NXT(1) == '/')) {
7916 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7917 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7918 ctxt->sax->error(ctxt->userData,
7919 "chunk is not well balanced\n");
7920 ctxt->wellFormed = 0;
7921 ctxt->disableSAX = 1;
7922 } else if (RAW != 0) {
7923 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7925 ctxt->sax->error(ctxt->userData,
7926 "extra content at the end of well balanced chunk\n");
7927 ctxt->wellFormed = 0;
7928 ctxt->disableSAX = 1;
7929 }
7930
7931 /*
7932 * SAX: end of the document processing.
7933 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007934 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007935 ctxt->sax->endDocument(ctxt->userData);
7936
7937 if (! ctxt->wellFormed) return(-1);
7938 return(0);
7939}
7940
7941/************************************************************************
7942 * *
7943 * Progressive parsing interfaces *
7944 * *
7945 ************************************************************************/
7946
7947/**
7948 * xmlParseLookupSequence:
7949 * @ctxt: an XML parser context
7950 * @first: the first char to lookup
7951 * @next: the next char to lookup or zero
7952 * @third: the next char to lookup or zero
7953 *
7954 * Try to find if a sequence (first, next, third) or just (first next) or
7955 * (first) is available in the input stream.
7956 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7957 * to avoid rescanning sequences of bytes, it DOES change the state of the
7958 * parser, do not use liberally.
7959 *
7960 * Returns the index to the current parsing point if the full sequence
7961 * is available, -1 otherwise.
7962 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007963static int
Owen Taylor3473f882001-02-23 17:55:21 +00007964xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7965 xmlChar next, xmlChar third) {
7966 int base, len;
7967 xmlParserInputPtr in;
7968 const xmlChar *buf;
7969
7970 in = ctxt->input;
7971 if (in == NULL) return(-1);
7972 base = in->cur - in->base;
7973 if (base < 0) return(-1);
7974 if (ctxt->checkIndex > base)
7975 base = ctxt->checkIndex;
7976 if (in->buf == NULL) {
7977 buf = in->base;
7978 len = in->length;
7979 } else {
7980 buf = in->buf->buffer->content;
7981 len = in->buf->buffer->use;
7982 }
7983 /* take into account the sequence length */
7984 if (third) len -= 2;
7985 else if (next) len --;
7986 for (;base < len;base++) {
7987 if (buf[base] == first) {
7988 if (third != 0) {
7989 if ((buf[base + 1] != next) ||
7990 (buf[base + 2] != third)) continue;
7991 } else if (next != 0) {
7992 if (buf[base + 1] != next) continue;
7993 }
7994 ctxt->checkIndex = 0;
7995#ifdef DEBUG_PUSH
7996 if (next == 0)
7997 xmlGenericError(xmlGenericErrorContext,
7998 "PP: lookup '%c' found at %d\n",
7999 first, base);
8000 else if (third == 0)
8001 xmlGenericError(xmlGenericErrorContext,
8002 "PP: lookup '%c%c' found at %d\n",
8003 first, next, base);
8004 else
8005 xmlGenericError(xmlGenericErrorContext,
8006 "PP: lookup '%c%c%c' found at %d\n",
8007 first, next, third, base);
8008#endif
8009 return(base - (in->cur - in->base));
8010 }
8011 }
8012 ctxt->checkIndex = base;
8013#ifdef DEBUG_PUSH
8014 if (next == 0)
8015 xmlGenericError(xmlGenericErrorContext,
8016 "PP: lookup '%c' failed\n", first);
8017 else if (third == 0)
8018 xmlGenericError(xmlGenericErrorContext,
8019 "PP: lookup '%c%c' failed\n", first, next);
8020 else
8021 xmlGenericError(xmlGenericErrorContext,
8022 "PP: lookup '%c%c%c' failed\n", first, next, third);
8023#endif
8024 return(-1);
8025}
8026
8027/**
8028 * xmlParseTryOrFinish:
8029 * @ctxt: an XML parser context
8030 * @terminate: last chunk indicator
8031 *
8032 * Try to progress on parsing
8033 *
8034 * Returns zero if no parsing was possible
8035 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008036static int
Owen Taylor3473f882001-02-23 17:55:21 +00008037xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8038 int ret = 0;
8039 int avail;
8040 xmlChar cur, next;
8041
8042#ifdef DEBUG_PUSH
8043 switch (ctxt->instate) {
8044 case XML_PARSER_EOF:
8045 xmlGenericError(xmlGenericErrorContext,
8046 "PP: try EOF\n"); break;
8047 case XML_PARSER_START:
8048 xmlGenericError(xmlGenericErrorContext,
8049 "PP: try START\n"); break;
8050 case XML_PARSER_MISC:
8051 xmlGenericError(xmlGenericErrorContext,
8052 "PP: try MISC\n");break;
8053 case XML_PARSER_COMMENT:
8054 xmlGenericError(xmlGenericErrorContext,
8055 "PP: try COMMENT\n");break;
8056 case XML_PARSER_PROLOG:
8057 xmlGenericError(xmlGenericErrorContext,
8058 "PP: try PROLOG\n");break;
8059 case XML_PARSER_START_TAG:
8060 xmlGenericError(xmlGenericErrorContext,
8061 "PP: try START_TAG\n");break;
8062 case XML_PARSER_CONTENT:
8063 xmlGenericError(xmlGenericErrorContext,
8064 "PP: try CONTENT\n");break;
8065 case XML_PARSER_CDATA_SECTION:
8066 xmlGenericError(xmlGenericErrorContext,
8067 "PP: try CDATA_SECTION\n");break;
8068 case XML_PARSER_END_TAG:
8069 xmlGenericError(xmlGenericErrorContext,
8070 "PP: try END_TAG\n");break;
8071 case XML_PARSER_ENTITY_DECL:
8072 xmlGenericError(xmlGenericErrorContext,
8073 "PP: try ENTITY_DECL\n");break;
8074 case XML_PARSER_ENTITY_VALUE:
8075 xmlGenericError(xmlGenericErrorContext,
8076 "PP: try ENTITY_VALUE\n");break;
8077 case XML_PARSER_ATTRIBUTE_VALUE:
8078 xmlGenericError(xmlGenericErrorContext,
8079 "PP: try ATTRIBUTE_VALUE\n");break;
8080 case XML_PARSER_DTD:
8081 xmlGenericError(xmlGenericErrorContext,
8082 "PP: try DTD\n");break;
8083 case XML_PARSER_EPILOG:
8084 xmlGenericError(xmlGenericErrorContext,
8085 "PP: try EPILOG\n");break;
8086 case XML_PARSER_PI:
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: try PI\n");break;
8089 case XML_PARSER_IGNORE:
8090 xmlGenericError(xmlGenericErrorContext,
8091 "PP: try IGNORE\n");break;
8092 }
8093#endif
8094
8095 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008096 SHRINK;
8097
Owen Taylor3473f882001-02-23 17:55:21 +00008098 /*
8099 * Pop-up of finished entities.
8100 */
8101 while ((RAW == 0) && (ctxt->inputNr > 1))
8102 xmlPopInput(ctxt);
8103
8104 if (ctxt->input ==NULL) break;
8105 if (ctxt->input->buf == NULL)
8106 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008107 else {
8108 /*
8109 * If we are operating on converted input, try to flush
8110 * remainng chars to avoid them stalling in the non-converted
8111 * buffer.
8112 */
8113 if ((ctxt->input->buf->raw != NULL) &&
8114 (ctxt->input->buf->raw->use > 0)) {
8115 int base = ctxt->input->base -
8116 ctxt->input->buf->buffer->content;
8117 int current = ctxt->input->cur - ctxt->input->base;
8118
8119 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8120 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8121 ctxt->input->cur = ctxt->input->base + current;
8122 ctxt->input->end =
8123 &ctxt->input->buf->buffer->content[
8124 ctxt->input->buf->buffer->use];
8125 }
8126 avail = ctxt->input->buf->buffer->use -
8127 (ctxt->input->cur - ctxt->input->base);
8128 }
Owen Taylor3473f882001-02-23 17:55:21 +00008129 if (avail < 1)
8130 goto done;
8131 switch (ctxt->instate) {
8132 case XML_PARSER_EOF:
8133 /*
8134 * Document parsing is done !
8135 */
8136 goto done;
8137 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008138 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8139 xmlChar start[4];
8140 xmlCharEncoding enc;
8141
8142 /*
8143 * Very first chars read from the document flow.
8144 */
8145 if (avail < 4)
8146 goto done;
8147
8148 /*
8149 * Get the 4 first bytes and decode the charset
8150 * if enc != XML_CHAR_ENCODING_NONE
8151 * plug some encoding conversion routines.
8152 */
8153 start[0] = RAW;
8154 start[1] = NXT(1);
8155 start[2] = NXT(2);
8156 start[3] = NXT(3);
8157 enc = xmlDetectCharEncoding(start, 4);
8158 if (enc != XML_CHAR_ENCODING_NONE) {
8159 xmlSwitchEncoding(ctxt, enc);
8160 }
8161 break;
8162 }
Owen Taylor3473f882001-02-23 17:55:21 +00008163
8164 cur = ctxt->input->cur[0];
8165 next = ctxt->input->cur[1];
8166 if (cur == 0) {
8167 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8168 ctxt->sax->setDocumentLocator(ctxt->userData,
8169 &xmlDefaultSAXLocator);
8170 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8172 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8173 ctxt->wellFormed = 0;
8174 ctxt->disableSAX = 1;
8175 ctxt->instate = XML_PARSER_EOF;
8176#ifdef DEBUG_PUSH
8177 xmlGenericError(xmlGenericErrorContext,
8178 "PP: entering EOF\n");
8179#endif
8180 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8181 ctxt->sax->endDocument(ctxt->userData);
8182 goto done;
8183 }
8184 if ((cur == '<') && (next == '?')) {
8185 /* PI or XML decl */
8186 if (avail < 5) return(ret);
8187 if ((!terminate) &&
8188 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8189 return(ret);
8190 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8191 ctxt->sax->setDocumentLocator(ctxt->userData,
8192 &xmlDefaultSAXLocator);
8193 if ((ctxt->input->cur[2] == 'x') &&
8194 (ctxt->input->cur[3] == 'm') &&
8195 (ctxt->input->cur[4] == 'l') &&
8196 (IS_BLANK(ctxt->input->cur[5]))) {
8197 ret += 5;
8198#ifdef DEBUG_PUSH
8199 xmlGenericError(xmlGenericErrorContext,
8200 "PP: Parsing XML Decl\n");
8201#endif
8202 xmlParseXMLDecl(ctxt);
8203 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8204 /*
8205 * The XML REC instructs us to stop parsing right
8206 * here
8207 */
8208 ctxt->instate = XML_PARSER_EOF;
8209 return(0);
8210 }
8211 ctxt->standalone = ctxt->input->standalone;
8212 if ((ctxt->encoding == NULL) &&
8213 (ctxt->input->encoding != NULL))
8214 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8215 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8216 (!ctxt->disableSAX))
8217 ctxt->sax->startDocument(ctxt->userData);
8218 ctxt->instate = XML_PARSER_MISC;
8219#ifdef DEBUG_PUSH
8220 xmlGenericError(xmlGenericErrorContext,
8221 "PP: entering MISC\n");
8222#endif
8223 } else {
8224 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8225 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8226 (!ctxt->disableSAX))
8227 ctxt->sax->startDocument(ctxt->userData);
8228 ctxt->instate = XML_PARSER_MISC;
8229#ifdef DEBUG_PUSH
8230 xmlGenericError(xmlGenericErrorContext,
8231 "PP: entering MISC\n");
8232#endif
8233 }
8234 } else {
8235 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8236 ctxt->sax->setDocumentLocator(ctxt->userData,
8237 &xmlDefaultSAXLocator);
8238 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8239 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8240 (!ctxt->disableSAX))
8241 ctxt->sax->startDocument(ctxt->userData);
8242 ctxt->instate = XML_PARSER_MISC;
8243#ifdef DEBUG_PUSH
8244 xmlGenericError(xmlGenericErrorContext,
8245 "PP: entering MISC\n");
8246#endif
8247 }
8248 break;
8249 case XML_PARSER_MISC:
8250 SKIP_BLANKS;
8251 if (ctxt->input->buf == NULL)
8252 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8253 else
8254 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8255 if (avail < 2)
8256 goto done;
8257 cur = ctxt->input->cur[0];
8258 next = ctxt->input->cur[1];
8259 if ((cur == '<') && (next == '?')) {
8260 if ((!terminate) &&
8261 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8262 goto done;
8263#ifdef DEBUG_PUSH
8264 xmlGenericError(xmlGenericErrorContext,
8265 "PP: Parsing PI\n");
8266#endif
8267 xmlParsePI(ctxt);
8268 } else if ((cur == '<') && (next == '!') &&
8269 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8270 if ((!terminate) &&
8271 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8272 goto done;
8273#ifdef DEBUG_PUSH
8274 xmlGenericError(xmlGenericErrorContext,
8275 "PP: Parsing Comment\n");
8276#endif
8277 xmlParseComment(ctxt);
8278 ctxt->instate = XML_PARSER_MISC;
8279 } else if ((cur == '<') && (next == '!') &&
8280 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8281 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8282 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8283 (ctxt->input->cur[8] == 'E')) {
8284 if ((!terminate) &&
8285 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8286 goto done;
8287#ifdef DEBUG_PUSH
8288 xmlGenericError(xmlGenericErrorContext,
8289 "PP: Parsing internal subset\n");
8290#endif
8291 ctxt->inSubset = 1;
8292 xmlParseDocTypeDecl(ctxt);
8293 if (RAW == '[') {
8294 ctxt->instate = XML_PARSER_DTD;
8295#ifdef DEBUG_PUSH
8296 xmlGenericError(xmlGenericErrorContext,
8297 "PP: entering DTD\n");
8298#endif
8299 } else {
8300 /*
8301 * Create and update the external subset.
8302 */
8303 ctxt->inSubset = 2;
8304 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8305 (ctxt->sax->externalSubset != NULL))
8306 ctxt->sax->externalSubset(ctxt->userData,
8307 ctxt->intSubName, ctxt->extSubSystem,
8308 ctxt->extSubURI);
8309 ctxt->inSubset = 0;
8310 ctxt->instate = XML_PARSER_PROLOG;
8311#ifdef DEBUG_PUSH
8312 xmlGenericError(xmlGenericErrorContext,
8313 "PP: entering PROLOG\n");
8314#endif
8315 }
8316 } else if ((cur == '<') && (next == '!') &&
8317 (avail < 9)) {
8318 goto done;
8319 } else {
8320 ctxt->instate = XML_PARSER_START_TAG;
8321#ifdef DEBUG_PUSH
8322 xmlGenericError(xmlGenericErrorContext,
8323 "PP: entering START_TAG\n");
8324#endif
8325 }
8326 break;
8327 case XML_PARSER_IGNORE:
8328 xmlGenericError(xmlGenericErrorContext,
8329 "PP: internal error, state == IGNORE");
8330 ctxt->instate = XML_PARSER_DTD;
8331#ifdef DEBUG_PUSH
8332 xmlGenericError(xmlGenericErrorContext,
8333 "PP: entering DTD\n");
8334#endif
8335 break;
8336 case XML_PARSER_PROLOG:
8337 SKIP_BLANKS;
8338 if (ctxt->input->buf == NULL)
8339 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8340 else
8341 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8342 if (avail < 2)
8343 goto done;
8344 cur = ctxt->input->cur[0];
8345 next = ctxt->input->cur[1];
8346 if ((cur == '<') && (next == '?')) {
8347 if ((!terminate) &&
8348 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8349 goto done;
8350#ifdef DEBUG_PUSH
8351 xmlGenericError(xmlGenericErrorContext,
8352 "PP: Parsing PI\n");
8353#endif
8354 xmlParsePI(ctxt);
8355 } else if ((cur == '<') && (next == '!') &&
8356 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8357 if ((!terminate) &&
8358 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8359 goto done;
8360#ifdef DEBUG_PUSH
8361 xmlGenericError(xmlGenericErrorContext,
8362 "PP: Parsing Comment\n");
8363#endif
8364 xmlParseComment(ctxt);
8365 ctxt->instate = XML_PARSER_PROLOG;
8366 } else if ((cur == '<') && (next == '!') &&
8367 (avail < 4)) {
8368 goto done;
8369 } else {
8370 ctxt->instate = XML_PARSER_START_TAG;
8371#ifdef DEBUG_PUSH
8372 xmlGenericError(xmlGenericErrorContext,
8373 "PP: entering START_TAG\n");
8374#endif
8375 }
8376 break;
8377 case XML_PARSER_EPILOG:
8378 SKIP_BLANKS;
8379 if (ctxt->input->buf == NULL)
8380 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8381 else
8382 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8383 if (avail < 2)
8384 goto done;
8385 cur = ctxt->input->cur[0];
8386 next = ctxt->input->cur[1];
8387 if ((cur == '<') && (next == '?')) {
8388 if ((!terminate) &&
8389 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8390 goto done;
8391#ifdef DEBUG_PUSH
8392 xmlGenericError(xmlGenericErrorContext,
8393 "PP: Parsing PI\n");
8394#endif
8395 xmlParsePI(ctxt);
8396 ctxt->instate = XML_PARSER_EPILOG;
8397 } else if ((cur == '<') && (next == '!') &&
8398 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8399 if ((!terminate) &&
8400 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8401 goto done;
8402#ifdef DEBUG_PUSH
8403 xmlGenericError(xmlGenericErrorContext,
8404 "PP: Parsing Comment\n");
8405#endif
8406 xmlParseComment(ctxt);
8407 ctxt->instate = XML_PARSER_EPILOG;
8408 } else if ((cur == '<') && (next == '!') &&
8409 (avail < 4)) {
8410 goto done;
8411 } else {
8412 ctxt->errNo = XML_ERR_DOCUMENT_END;
8413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8414 ctxt->sax->error(ctxt->userData,
8415 "Extra content at the end of the document\n");
8416 ctxt->wellFormed = 0;
8417 ctxt->disableSAX = 1;
8418 ctxt->instate = XML_PARSER_EOF;
8419#ifdef DEBUG_PUSH
8420 xmlGenericError(xmlGenericErrorContext,
8421 "PP: entering EOF\n");
8422#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008423 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008424 ctxt->sax->endDocument(ctxt->userData);
8425 goto done;
8426 }
8427 break;
8428 case XML_PARSER_START_TAG: {
8429 xmlChar *name, *oldname;
8430
8431 if ((avail < 2) && (ctxt->inputNr == 1))
8432 goto done;
8433 cur = ctxt->input->cur[0];
8434 if (cur != '<') {
8435 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8437 ctxt->sax->error(ctxt->userData,
8438 "Start tag expect, '<' not found\n");
8439 ctxt->wellFormed = 0;
8440 ctxt->disableSAX = 1;
8441 ctxt->instate = XML_PARSER_EOF;
8442#ifdef DEBUG_PUSH
8443 xmlGenericError(xmlGenericErrorContext,
8444 "PP: entering EOF\n");
8445#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008446 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008447 ctxt->sax->endDocument(ctxt->userData);
8448 goto done;
8449 }
8450 if ((!terminate) &&
8451 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8452 goto done;
8453 if (ctxt->spaceNr == 0)
8454 spacePush(ctxt, -1);
8455 else
8456 spacePush(ctxt, *ctxt->space);
8457 name = xmlParseStartTag(ctxt);
8458 if (name == NULL) {
8459 spacePop(ctxt);
8460 ctxt->instate = XML_PARSER_EOF;
8461#ifdef DEBUG_PUSH
8462 xmlGenericError(xmlGenericErrorContext,
8463 "PP: entering EOF\n");
8464#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008465 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008466 ctxt->sax->endDocument(ctxt->userData);
8467 goto done;
8468 }
8469 namePush(ctxt, xmlStrdup(name));
8470
8471 /*
8472 * [ VC: Root Element Type ]
8473 * The Name in the document type declaration must match
8474 * the element type of the root element.
8475 */
8476 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8477 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8478 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8479
8480 /*
8481 * Check for an Empty Element.
8482 */
8483 if ((RAW == '/') && (NXT(1) == '>')) {
8484 SKIP(2);
8485 if ((ctxt->sax != NULL) &&
8486 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8487 ctxt->sax->endElement(ctxt->userData, name);
8488 xmlFree(name);
8489 oldname = namePop(ctxt);
8490 spacePop(ctxt);
8491 if (oldname != NULL) {
8492#ifdef DEBUG_STACK
8493 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8494#endif
8495 xmlFree(oldname);
8496 }
8497 if (ctxt->name == NULL) {
8498 ctxt->instate = XML_PARSER_EPILOG;
8499#ifdef DEBUG_PUSH
8500 xmlGenericError(xmlGenericErrorContext,
8501 "PP: entering EPILOG\n");
8502#endif
8503 } else {
8504 ctxt->instate = XML_PARSER_CONTENT;
8505#ifdef DEBUG_PUSH
8506 xmlGenericError(xmlGenericErrorContext,
8507 "PP: entering CONTENT\n");
8508#endif
8509 }
8510 break;
8511 }
8512 if (RAW == '>') {
8513 NEXT;
8514 } else {
8515 ctxt->errNo = XML_ERR_GT_REQUIRED;
8516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8517 ctxt->sax->error(ctxt->userData,
8518 "Couldn't find end of Start Tag %s\n",
8519 name);
8520 ctxt->wellFormed = 0;
8521 ctxt->disableSAX = 1;
8522
8523 /*
8524 * end of parsing of this node.
8525 */
8526 nodePop(ctxt);
8527 oldname = namePop(ctxt);
8528 spacePop(ctxt);
8529 if (oldname != NULL) {
8530#ifdef DEBUG_STACK
8531 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8532#endif
8533 xmlFree(oldname);
8534 }
8535 }
8536 xmlFree(name);
8537 ctxt->instate = XML_PARSER_CONTENT;
8538#ifdef DEBUG_PUSH
8539 xmlGenericError(xmlGenericErrorContext,
8540 "PP: entering CONTENT\n");
8541#endif
8542 break;
8543 }
8544 case XML_PARSER_CONTENT: {
8545 const xmlChar *test;
8546 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008547 if ((avail < 2) && (ctxt->inputNr == 1))
8548 goto done;
8549 cur = ctxt->input->cur[0];
8550 next = ctxt->input->cur[1];
8551
8552 test = CUR_PTR;
8553 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008554 if ((cur == '<') && (next == '?')) {
8555 if ((!terminate) &&
8556 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8557 goto done;
8558#ifdef DEBUG_PUSH
8559 xmlGenericError(xmlGenericErrorContext,
8560 "PP: Parsing PI\n");
8561#endif
8562 xmlParsePI(ctxt);
8563 } else if ((cur == '<') && (next == '!') &&
8564 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8565 if ((!terminate) &&
8566 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8567 goto done;
8568#ifdef DEBUG_PUSH
8569 xmlGenericError(xmlGenericErrorContext,
8570 "PP: Parsing Comment\n");
8571#endif
8572 xmlParseComment(ctxt);
8573 ctxt->instate = XML_PARSER_CONTENT;
8574 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8575 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8576 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8577 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8578 (ctxt->input->cur[8] == '[')) {
8579 SKIP(9);
8580 ctxt->instate = XML_PARSER_CDATA_SECTION;
8581#ifdef DEBUG_PUSH
8582 xmlGenericError(xmlGenericErrorContext,
8583 "PP: entering CDATA_SECTION\n");
8584#endif
8585 break;
8586 } else if ((cur == '<') && (next == '!') &&
8587 (avail < 9)) {
8588 goto done;
8589 } else if ((cur == '<') && (next == '/')) {
8590 ctxt->instate = XML_PARSER_END_TAG;
8591#ifdef DEBUG_PUSH
8592 xmlGenericError(xmlGenericErrorContext,
8593 "PP: entering END_TAG\n");
8594#endif
8595 break;
8596 } else if (cur == '<') {
8597 ctxt->instate = XML_PARSER_START_TAG;
8598#ifdef DEBUG_PUSH
8599 xmlGenericError(xmlGenericErrorContext,
8600 "PP: entering START_TAG\n");
8601#endif
8602 break;
8603 } else if (cur == '&') {
8604 if ((!terminate) &&
8605 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8606 goto done;
8607#ifdef DEBUG_PUSH
8608 xmlGenericError(xmlGenericErrorContext,
8609 "PP: Parsing Reference\n");
8610#endif
8611 xmlParseReference(ctxt);
8612 } else {
8613 /* TODO Avoid the extra copy, handle directly !!! */
8614 /*
8615 * Goal of the following test is:
8616 * - minimize calls to the SAX 'character' callback
8617 * when they are mergeable
8618 * - handle an problem for isBlank when we only parse
8619 * a sequence of blank chars and the next one is
8620 * not available to check against '<' presence.
8621 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008622 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008623 * of the parser.
8624 */
8625 if ((ctxt->inputNr == 1) &&
8626 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8627 if ((!terminate) &&
8628 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8629 goto done;
8630 }
8631 ctxt->checkIndex = 0;
8632#ifdef DEBUG_PUSH
8633 xmlGenericError(xmlGenericErrorContext,
8634 "PP: Parsing char data\n");
8635#endif
8636 xmlParseCharData(ctxt, 0);
8637 }
8638 /*
8639 * Pop-up of finished entities.
8640 */
8641 while ((RAW == 0) && (ctxt->inputNr > 1))
8642 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008643 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008644 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8646 ctxt->sax->error(ctxt->userData,
8647 "detected an error in element content\n");
8648 ctxt->wellFormed = 0;
8649 ctxt->disableSAX = 1;
8650 ctxt->instate = XML_PARSER_EOF;
8651 break;
8652 }
8653 break;
8654 }
8655 case XML_PARSER_CDATA_SECTION: {
8656 /*
8657 * The Push mode need to have the SAX callback for
8658 * cdataBlock merge back contiguous callbacks.
8659 */
8660 int base;
8661
8662 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8663 if (base < 0) {
8664 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8665 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8666 if (ctxt->sax->cdataBlock != NULL)
8667 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8668 XML_PARSER_BIG_BUFFER_SIZE);
8669 }
8670 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8671 ctxt->checkIndex = 0;
8672 }
8673 goto done;
8674 } else {
8675 if ((ctxt->sax != NULL) && (base > 0) &&
8676 (!ctxt->disableSAX)) {
8677 if (ctxt->sax->cdataBlock != NULL)
8678 ctxt->sax->cdataBlock(ctxt->userData,
8679 ctxt->input->cur, base);
8680 }
8681 SKIP(base + 3);
8682 ctxt->checkIndex = 0;
8683 ctxt->instate = XML_PARSER_CONTENT;
8684#ifdef DEBUG_PUSH
8685 xmlGenericError(xmlGenericErrorContext,
8686 "PP: entering CONTENT\n");
8687#endif
8688 }
8689 break;
8690 }
8691 case XML_PARSER_END_TAG:
8692 if (avail < 2)
8693 goto done;
8694 if ((!terminate) &&
8695 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8696 goto done;
8697 xmlParseEndTag(ctxt);
8698 if (ctxt->name == NULL) {
8699 ctxt->instate = XML_PARSER_EPILOG;
8700#ifdef DEBUG_PUSH
8701 xmlGenericError(xmlGenericErrorContext,
8702 "PP: entering EPILOG\n");
8703#endif
8704 } else {
8705 ctxt->instate = XML_PARSER_CONTENT;
8706#ifdef DEBUG_PUSH
8707 xmlGenericError(xmlGenericErrorContext,
8708 "PP: entering CONTENT\n");
8709#endif
8710 }
8711 break;
8712 case XML_PARSER_DTD: {
8713 /*
8714 * Sorry but progressive parsing of the internal subset
8715 * is not expected to be supported. We first check that
8716 * the full content of the internal subset is available and
8717 * the parsing is launched only at that point.
8718 * Internal subset ends up with "']' S? '>'" in an unescaped
8719 * section and not in a ']]>' sequence which are conditional
8720 * sections (whoever argued to keep that crap in XML deserve
8721 * a place in hell !).
8722 */
8723 int base, i;
8724 xmlChar *buf;
8725 xmlChar quote = 0;
8726
8727 base = ctxt->input->cur - ctxt->input->base;
8728 if (base < 0) return(0);
8729 if (ctxt->checkIndex > base)
8730 base = ctxt->checkIndex;
8731 buf = ctxt->input->buf->buffer->content;
8732 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8733 base++) {
8734 if (quote != 0) {
8735 if (buf[base] == quote)
8736 quote = 0;
8737 continue;
8738 }
8739 if (buf[base] == '"') {
8740 quote = '"';
8741 continue;
8742 }
8743 if (buf[base] == '\'') {
8744 quote = '\'';
8745 continue;
8746 }
8747 if (buf[base] == ']') {
8748 if ((unsigned int) base +1 >=
8749 ctxt->input->buf->buffer->use)
8750 break;
8751 if (buf[base + 1] == ']') {
8752 /* conditional crap, skip both ']' ! */
8753 base++;
8754 continue;
8755 }
8756 for (i = 0;
8757 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8758 i++) {
8759 if (buf[base + i] == '>')
8760 goto found_end_int_subset;
8761 }
8762 break;
8763 }
8764 }
8765 /*
8766 * We didn't found the end of the Internal subset
8767 */
8768 if (quote == 0)
8769 ctxt->checkIndex = base;
8770#ifdef DEBUG_PUSH
8771 if (next == 0)
8772 xmlGenericError(xmlGenericErrorContext,
8773 "PP: lookup of int subset end filed\n");
8774#endif
8775 goto done;
8776
8777found_end_int_subset:
8778 xmlParseInternalSubset(ctxt);
8779 ctxt->inSubset = 2;
8780 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8781 (ctxt->sax->externalSubset != NULL))
8782 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8783 ctxt->extSubSystem, ctxt->extSubURI);
8784 ctxt->inSubset = 0;
8785 ctxt->instate = XML_PARSER_PROLOG;
8786 ctxt->checkIndex = 0;
8787#ifdef DEBUG_PUSH
8788 xmlGenericError(xmlGenericErrorContext,
8789 "PP: entering PROLOG\n");
8790#endif
8791 break;
8792 }
8793 case XML_PARSER_COMMENT:
8794 xmlGenericError(xmlGenericErrorContext,
8795 "PP: internal error, state == COMMENT\n");
8796 ctxt->instate = XML_PARSER_CONTENT;
8797#ifdef DEBUG_PUSH
8798 xmlGenericError(xmlGenericErrorContext,
8799 "PP: entering CONTENT\n");
8800#endif
8801 break;
8802 case XML_PARSER_PI:
8803 xmlGenericError(xmlGenericErrorContext,
8804 "PP: internal error, state == PI\n");
8805 ctxt->instate = XML_PARSER_CONTENT;
8806#ifdef DEBUG_PUSH
8807 xmlGenericError(xmlGenericErrorContext,
8808 "PP: entering CONTENT\n");
8809#endif
8810 break;
8811 case XML_PARSER_ENTITY_DECL:
8812 xmlGenericError(xmlGenericErrorContext,
8813 "PP: internal error, state == ENTITY_DECL\n");
8814 ctxt->instate = XML_PARSER_DTD;
8815#ifdef DEBUG_PUSH
8816 xmlGenericError(xmlGenericErrorContext,
8817 "PP: entering DTD\n");
8818#endif
8819 break;
8820 case XML_PARSER_ENTITY_VALUE:
8821 xmlGenericError(xmlGenericErrorContext,
8822 "PP: internal error, state == ENTITY_VALUE\n");
8823 ctxt->instate = XML_PARSER_CONTENT;
8824#ifdef DEBUG_PUSH
8825 xmlGenericError(xmlGenericErrorContext,
8826 "PP: entering DTD\n");
8827#endif
8828 break;
8829 case XML_PARSER_ATTRIBUTE_VALUE:
8830 xmlGenericError(xmlGenericErrorContext,
8831 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8832 ctxt->instate = XML_PARSER_START_TAG;
8833#ifdef DEBUG_PUSH
8834 xmlGenericError(xmlGenericErrorContext,
8835 "PP: entering START_TAG\n");
8836#endif
8837 break;
8838 case XML_PARSER_SYSTEM_LITERAL:
8839 xmlGenericError(xmlGenericErrorContext,
8840 "PP: internal error, state == SYSTEM_LITERAL\n");
8841 ctxt->instate = XML_PARSER_START_TAG;
8842#ifdef DEBUG_PUSH
8843 xmlGenericError(xmlGenericErrorContext,
8844 "PP: entering START_TAG\n");
8845#endif
8846 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008847 case XML_PARSER_PUBLIC_LITERAL:
8848 xmlGenericError(xmlGenericErrorContext,
8849 "PP: internal error, state == PUBLIC_LITERAL\n");
8850 ctxt->instate = XML_PARSER_START_TAG;
8851#ifdef DEBUG_PUSH
8852 xmlGenericError(xmlGenericErrorContext,
8853 "PP: entering START_TAG\n");
8854#endif
8855 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008856 }
8857 }
8858done:
8859#ifdef DEBUG_PUSH
8860 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8861#endif
8862 return(ret);
8863}
8864
8865/**
Owen Taylor3473f882001-02-23 17:55:21 +00008866 * xmlParseChunk:
8867 * @ctxt: an XML parser context
8868 * @chunk: an char array
8869 * @size: the size in byte of the chunk
8870 * @terminate: last chunk indicator
8871 *
8872 * Parse a Chunk of memory
8873 *
8874 * Returns zero if no error, the xmlParserErrors otherwise.
8875 */
8876int
8877xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8878 int terminate) {
8879 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8880 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8881 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8882 int cur = ctxt->input->cur - ctxt->input->base;
8883
8884 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8885 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8886 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008887 ctxt->input->end =
8888 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008889#ifdef DEBUG_PUSH
8890 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8891#endif
8892
8893 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8894 xmlParseTryOrFinish(ctxt, terminate);
8895 } else if (ctxt->instate != XML_PARSER_EOF) {
8896 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8897 xmlParserInputBufferPtr in = ctxt->input->buf;
8898 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8899 (in->raw != NULL)) {
8900 int nbchars;
8901
8902 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8903 if (nbchars < 0) {
8904 xmlGenericError(xmlGenericErrorContext,
8905 "xmlParseChunk: encoder error\n");
8906 return(XML_ERR_INVALID_ENCODING);
8907 }
8908 }
8909 }
8910 }
8911 xmlParseTryOrFinish(ctxt, terminate);
8912 if (terminate) {
8913 /*
8914 * Check for termination
8915 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00008916 int avail = 0;
8917 if (ctxt->input->buf == NULL)
8918 avail = ctxt->input->length -
8919 (ctxt->input->cur - ctxt->input->base);
8920 else
8921 avail = ctxt->input->buf->buffer->use -
8922 (ctxt->input->cur - ctxt->input->base);
8923
Owen Taylor3473f882001-02-23 17:55:21 +00008924 if ((ctxt->instate != XML_PARSER_EOF) &&
8925 (ctxt->instate != XML_PARSER_EPILOG)) {
8926 ctxt->errNo = XML_ERR_DOCUMENT_END;
8927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8928 ctxt->sax->error(ctxt->userData,
8929 "Extra content at the end of the document\n");
8930 ctxt->wellFormed = 0;
8931 ctxt->disableSAX = 1;
8932 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00008933 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
8934 ctxt->errNo = XML_ERR_DOCUMENT_END;
8935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8936 ctxt->sax->error(ctxt->userData,
8937 "Extra content at the end of the document\n");
8938 ctxt->wellFormed = 0;
8939 ctxt->disableSAX = 1;
8940
8941 }
Owen Taylor3473f882001-02-23 17:55:21 +00008942 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008943 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008944 ctxt->sax->endDocument(ctxt->userData);
8945 }
8946 ctxt->instate = XML_PARSER_EOF;
8947 }
8948 return((xmlParserErrors) ctxt->errNo);
8949}
8950
8951/************************************************************************
8952 * *
8953 * I/O front end functions to the parser *
8954 * *
8955 ************************************************************************/
8956
8957/**
8958 * xmlStopParser:
8959 * @ctxt: an XML parser context
8960 *
8961 * Blocks further parser processing
8962 */
8963void
8964xmlStopParser(xmlParserCtxtPtr ctxt) {
8965 ctxt->instate = XML_PARSER_EOF;
8966 if (ctxt->input != NULL)
8967 ctxt->input->cur = BAD_CAST"";
8968}
8969
8970/**
8971 * xmlCreatePushParserCtxt:
8972 * @sax: a SAX handler
8973 * @user_data: The user data returned on SAX callbacks
8974 * @chunk: a pointer to an array of chars
8975 * @size: number of chars in the array
8976 * @filename: an optional file name or URI
8977 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00008978 * Create a parser context for using the XML parser in push mode.
8979 * If @buffer and @size are non-NULL, the data is used to detect
8980 * the encoding. The remaining characters will be parsed so they
8981 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00008982 * To allow content encoding detection, @size should be >= 4
8983 * The value of @filename is used for fetching external entities
8984 * and error/warning reports.
8985 *
8986 * Returns the new parser context or NULL
8987 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00008988
Owen Taylor3473f882001-02-23 17:55:21 +00008989xmlParserCtxtPtr
8990xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8991 const char *chunk, int size, const char *filename) {
8992 xmlParserCtxtPtr ctxt;
8993 xmlParserInputPtr inputStream;
8994 xmlParserInputBufferPtr buf;
8995 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8996
8997 /*
8998 * plug some encoding conversion routines
8999 */
9000 if ((chunk != NULL) && (size >= 4))
9001 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9002
9003 buf = xmlAllocParserInputBuffer(enc);
9004 if (buf == NULL) return(NULL);
9005
9006 ctxt = xmlNewParserCtxt();
9007 if (ctxt == NULL) {
9008 xmlFree(buf);
9009 return(NULL);
9010 }
9011 if (sax != NULL) {
9012 if (ctxt->sax != &xmlDefaultSAXHandler)
9013 xmlFree(ctxt->sax);
9014 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9015 if (ctxt->sax == NULL) {
9016 xmlFree(buf);
9017 xmlFree(ctxt);
9018 return(NULL);
9019 }
9020 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9021 if (user_data != NULL)
9022 ctxt->userData = user_data;
9023 }
9024 if (filename == NULL) {
9025 ctxt->directory = NULL;
9026 } else {
9027 ctxt->directory = xmlParserGetDirectory(filename);
9028 }
9029
9030 inputStream = xmlNewInputStream(ctxt);
9031 if (inputStream == NULL) {
9032 xmlFreeParserCtxt(ctxt);
9033 return(NULL);
9034 }
9035
9036 if (filename == NULL)
9037 inputStream->filename = NULL;
9038 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009039 inputStream->filename = (char *)
9040 xmlNormalizeWindowsPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009041 inputStream->buf = buf;
9042 inputStream->base = inputStream->buf->buffer->content;
9043 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009044 inputStream->end =
9045 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009046
9047 inputPush(ctxt, inputStream);
9048
9049 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9050 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009051 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9052 int cur = ctxt->input->cur - ctxt->input->base;
9053
Owen Taylor3473f882001-02-23 17:55:21 +00009054 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009055
9056 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9057 ctxt->input->cur = ctxt->input->base + cur;
9058 ctxt->input->end =
9059 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009060#ifdef DEBUG_PUSH
9061 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9062#endif
9063 }
9064
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009065 if (enc != XML_CHAR_ENCODING_NONE) {
9066 xmlSwitchEncoding(ctxt, enc);
9067 }
9068
Owen Taylor3473f882001-02-23 17:55:21 +00009069 return(ctxt);
9070}
9071
9072/**
9073 * xmlCreateIOParserCtxt:
9074 * @sax: a SAX handler
9075 * @user_data: The user data returned on SAX callbacks
9076 * @ioread: an I/O read function
9077 * @ioclose: an I/O close function
9078 * @ioctx: an I/O handler
9079 * @enc: the charset encoding if known
9080 *
9081 * Create a parser context for using the XML parser with an existing
9082 * I/O stream
9083 *
9084 * Returns the new parser context or NULL
9085 */
9086xmlParserCtxtPtr
9087xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9088 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9089 void *ioctx, xmlCharEncoding enc) {
9090 xmlParserCtxtPtr ctxt;
9091 xmlParserInputPtr inputStream;
9092 xmlParserInputBufferPtr buf;
9093
9094 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9095 if (buf == NULL) return(NULL);
9096
9097 ctxt = xmlNewParserCtxt();
9098 if (ctxt == NULL) {
9099 xmlFree(buf);
9100 return(NULL);
9101 }
9102 if (sax != NULL) {
9103 if (ctxt->sax != &xmlDefaultSAXHandler)
9104 xmlFree(ctxt->sax);
9105 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9106 if (ctxt->sax == NULL) {
9107 xmlFree(buf);
9108 xmlFree(ctxt);
9109 return(NULL);
9110 }
9111 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9112 if (user_data != NULL)
9113 ctxt->userData = user_data;
9114 }
9115
9116 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9117 if (inputStream == NULL) {
9118 xmlFreeParserCtxt(ctxt);
9119 return(NULL);
9120 }
9121 inputPush(ctxt, inputStream);
9122
9123 return(ctxt);
9124}
9125
9126/************************************************************************
9127 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009128 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009129 * *
9130 ************************************************************************/
9131
9132/**
9133 * xmlIOParseDTD:
9134 * @sax: the SAX handler block or NULL
9135 * @input: an Input Buffer
9136 * @enc: the charset encoding if known
9137 *
9138 * Load and parse a DTD
9139 *
9140 * Returns the resulting xmlDtdPtr or NULL in case of error.
9141 * @input will be freed at parsing end.
9142 */
9143
9144xmlDtdPtr
9145xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9146 xmlCharEncoding enc) {
9147 xmlDtdPtr ret = NULL;
9148 xmlParserCtxtPtr ctxt;
9149 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009150 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009151
9152 if (input == NULL)
9153 return(NULL);
9154
9155 ctxt = xmlNewParserCtxt();
9156 if (ctxt == NULL) {
9157 return(NULL);
9158 }
9159
9160 /*
9161 * Set-up the SAX context
9162 */
9163 if (sax != NULL) {
9164 if (ctxt->sax != NULL)
9165 xmlFree(ctxt->sax);
9166 ctxt->sax = sax;
9167 ctxt->userData = NULL;
9168 }
9169
9170 /*
9171 * generate a parser input from the I/O handler
9172 */
9173
9174 pinput = xmlNewIOInputStream(ctxt, input, enc);
9175 if (pinput == NULL) {
9176 if (sax != NULL) ctxt->sax = NULL;
9177 xmlFreeParserCtxt(ctxt);
9178 return(NULL);
9179 }
9180
9181 /*
9182 * plug some encoding conversion routines here.
9183 */
9184 xmlPushInput(ctxt, pinput);
9185
9186 pinput->filename = NULL;
9187 pinput->line = 1;
9188 pinput->col = 1;
9189 pinput->base = ctxt->input->cur;
9190 pinput->cur = ctxt->input->cur;
9191 pinput->free = NULL;
9192
9193 /*
9194 * let's parse that entity knowing it's an external subset.
9195 */
9196 ctxt->inSubset = 2;
9197 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9198 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9199 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009200
9201 if (enc == XML_CHAR_ENCODING_NONE) {
9202 /*
9203 * Get the 4 first bytes and decode the charset
9204 * if enc != XML_CHAR_ENCODING_NONE
9205 * plug some encoding conversion routines.
9206 */
9207 start[0] = RAW;
9208 start[1] = NXT(1);
9209 start[2] = NXT(2);
9210 start[3] = NXT(3);
9211 enc = xmlDetectCharEncoding(start, 4);
9212 if (enc != XML_CHAR_ENCODING_NONE) {
9213 xmlSwitchEncoding(ctxt, enc);
9214 }
9215 }
9216
Owen Taylor3473f882001-02-23 17:55:21 +00009217 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9218
9219 if (ctxt->myDoc != NULL) {
9220 if (ctxt->wellFormed) {
9221 ret = ctxt->myDoc->extSubset;
9222 ctxt->myDoc->extSubset = NULL;
9223 } else {
9224 ret = NULL;
9225 }
9226 xmlFreeDoc(ctxt->myDoc);
9227 ctxt->myDoc = NULL;
9228 }
9229 if (sax != NULL) ctxt->sax = NULL;
9230 xmlFreeParserCtxt(ctxt);
9231
9232 return(ret);
9233}
9234
9235/**
9236 * xmlSAXParseDTD:
9237 * @sax: the SAX handler block
9238 * @ExternalID: a NAME* containing the External ID of the DTD
9239 * @SystemID: a NAME* containing the URL to the DTD
9240 *
9241 * Load and parse an external subset.
9242 *
9243 * Returns the resulting xmlDtdPtr or NULL in case of error.
9244 */
9245
9246xmlDtdPtr
9247xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9248 const xmlChar *SystemID) {
9249 xmlDtdPtr ret = NULL;
9250 xmlParserCtxtPtr ctxt;
9251 xmlParserInputPtr input = NULL;
9252 xmlCharEncoding enc;
9253
9254 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9255
9256 ctxt = xmlNewParserCtxt();
9257 if (ctxt == NULL) {
9258 return(NULL);
9259 }
9260
9261 /*
9262 * Set-up the SAX context
9263 */
9264 if (sax != NULL) {
9265 if (ctxt->sax != NULL)
9266 xmlFree(ctxt->sax);
9267 ctxt->sax = sax;
9268 ctxt->userData = NULL;
9269 }
9270
9271 /*
9272 * Ask the Entity resolver to load the damn thing
9273 */
9274
9275 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9276 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9277 if (input == NULL) {
9278 if (sax != NULL) ctxt->sax = NULL;
9279 xmlFreeParserCtxt(ctxt);
9280 return(NULL);
9281 }
9282
9283 /*
9284 * plug some encoding conversion routines here.
9285 */
9286 xmlPushInput(ctxt, input);
9287 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9288 xmlSwitchEncoding(ctxt, enc);
9289
9290 if (input->filename == NULL)
9291 input->filename = (char *) xmlStrdup(SystemID);
9292 input->line = 1;
9293 input->col = 1;
9294 input->base = ctxt->input->cur;
9295 input->cur = ctxt->input->cur;
9296 input->free = NULL;
9297
9298 /*
9299 * let's parse that entity knowing it's an external subset.
9300 */
9301 ctxt->inSubset = 2;
9302 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9303 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9304 ExternalID, SystemID);
9305 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9306
9307 if (ctxt->myDoc != NULL) {
9308 if (ctxt->wellFormed) {
9309 ret = ctxt->myDoc->extSubset;
9310 ctxt->myDoc->extSubset = NULL;
9311 } else {
9312 ret = NULL;
9313 }
9314 xmlFreeDoc(ctxt->myDoc);
9315 ctxt->myDoc = NULL;
9316 }
9317 if (sax != NULL) ctxt->sax = NULL;
9318 xmlFreeParserCtxt(ctxt);
9319
9320 return(ret);
9321}
9322
9323/**
9324 * xmlParseDTD:
9325 * @ExternalID: a NAME* containing the External ID of the DTD
9326 * @SystemID: a NAME* containing the URL to the DTD
9327 *
9328 * Load and parse an external subset.
9329 *
9330 * Returns the resulting xmlDtdPtr or NULL in case of error.
9331 */
9332
9333xmlDtdPtr
9334xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9335 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9336}
9337
9338/************************************************************************
9339 * *
9340 * Front ends when parsing an Entity *
9341 * *
9342 ************************************************************************/
9343
9344/**
Owen Taylor3473f882001-02-23 17:55:21 +00009345 * xmlParseCtxtExternalEntity:
9346 * @ctx: the existing parsing context
9347 * @URL: the URL for the entity to load
9348 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009349 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009350 *
9351 * Parse an external general entity within an existing parsing context
9352 * An external general parsed entity is well-formed if it matches the
9353 * production labeled extParsedEnt.
9354 *
9355 * [78] extParsedEnt ::= TextDecl? content
9356 *
9357 * Returns 0 if the entity is well formed, -1 in case of args problem and
9358 * the parser error code otherwise
9359 */
9360
9361int
9362xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009363 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009364 xmlParserCtxtPtr ctxt;
9365 xmlDocPtr newDoc;
9366 xmlSAXHandlerPtr oldsax = NULL;
9367 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009368 xmlChar start[4];
9369 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009370
9371 if (ctx->depth > 40) {
9372 return(XML_ERR_ENTITY_LOOP);
9373 }
9374
Daniel Veillardcda96922001-08-21 10:56:31 +00009375 if (lst != NULL)
9376 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009377 if ((URL == NULL) && (ID == NULL))
9378 return(-1);
9379 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9380 return(-1);
9381
9382
9383 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9384 if (ctxt == NULL) return(-1);
9385 ctxt->userData = ctxt;
9386 oldsax = ctxt->sax;
9387 ctxt->sax = ctx->sax;
9388 newDoc = xmlNewDoc(BAD_CAST "1.0");
9389 if (newDoc == NULL) {
9390 xmlFreeParserCtxt(ctxt);
9391 return(-1);
9392 }
9393 if (ctx->myDoc != NULL) {
9394 newDoc->intSubset = ctx->myDoc->intSubset;
9395 newDoc->extSubset = ctx->myDoc->extSubset;
9396 }
9397 if (ctx->myDoc->URL != NULL) {
9398 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9399 }
9400 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9401 if (newDoc->children == NULL) {
9402 ctxt->sax = oldsax;
9403 xmlFreeParserCtxt(ctxt);
9404 newDoc->intSubset = NULL;
9405 newDoc->extSubset = NULL;
9406 xmlFreeDoc(newDoc);
9407 return(-1);
9408 }
9409 nodePush(ctxt, newDoc->children);
9410 if (ctx->myDoc == NULL) {
9411 ctxt->myDoc = newDoc;
9412 } else {
9413 ctxt->myDoc = ctx->myDoc;
9414 newDoc->children->doc = ctx->myDoc;
9415 }
9416
Daniel Veillard87a764e2001-06-20 17:41:10 +00009417 /*
9418 * Get the 4 first bytes and decode the charset
9419 * if enc != XML_CHAR_ENCODING_NONE
9420 * plug some encoding conversion routines.
9421 */
9422 GROW
9423 start[0] = RAW;
9424 start[1] = NXT(1);
9425 start[2] = NXT(2);
9426 start[3] = NXT(3);
9427 enc = xmlDetectCharEncoding(start, 4);
9428 if (enc != XML_CHAR_ENCODING_NONE) {
9429 xmlSwitchEncoding(ctxt, enc);
9430 }
9431
Owen Taylor3473f882001-02-23 17:55:21 +00009432 /*
9433 * Parse a possible text declaration first
9434 */
Owen Taylor3473f882001-02-23 17:55:21 +00009435 if ((RAW == '<') && (NXT(1) == '?') &&
9436 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9437 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9438 xmlParseTextDecl(ctxt);
9439 }
9440
9441 /*
9442 * Doing validity checking on chunk doesn't make sense
9443 */
9444 ctxt->instate = XML_PARSER_CONTENT;
9445 ctxt->validate = ctx->validate;
9446 ctxt->loadsubset = ctx->loadsubset;
9447 ctxt->depth = ctx->depth + 1;
9448 ctxt->replaceEntities = ctx->replaceEntities;
9449 if (ctxt->validate) {
9450 ctxt->vctxt.error = ctx->vctxt.error;
9451 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009452 } else {
9453 ctxt->vctxt.error = NULL;
9454 ctxt->vctxt.warning = NULL;
9455 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009456 ctxt->vctxt.nodeTab = NULL;
9457 ctxt->vctxt.nodeNr = 0;
9458 ctxt->vctxt.nodeMax = 0;
9459 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009460
9461 xmlParseContent(ctxt);
9462
9463 if ((RAW == '<') && (NXT(1) == '/')) {
9464 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9466 ctxt->sax->error(ctxt->userData,
9467 "chunk is not well balanced\n");
9468 ctxt->wellFormed = 0;
9469 ctxt->disableSAX = 1;
9470 } else if (RAW != 0) {
9471 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9473 ctxt->sax->error(ctxt->userData,
9474 "extra content at the end of well balanced chunk\n");
9475 ctxt->wellFormed = 0;
9476 ctxt->disableSAX = 1;
9477 }
9478 if (ctxt->node != newDoc->children) {
9479 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9480 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9481 ctxt->sax->error(ctxt->userData,
9482 "chunk is not well balanced\n");
9483 ctxt->wellFormed = 0;
9484 ctxt->disableSAX = 1;
9485 }
9486
9487 if (!ctxt->wellFormed) {
9488 if (ctxt->errNo == 0)
9489 ret = 1;
9490 else
9491 ret = ctxt->errNo;
9492 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009493 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009494 xmlNodePtr cur;
9495
9496 /*
9497 * Return the newly created nodeset after unlinking it from
9498 * they pseudo parent.
9499 */
9500 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009501 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009502 while (cur != NULL) {
9503 cur->parent = NULL;
9504 cur = cur->next;
9505 }
9506 newDoc->children->children = NULL;
9507 }
9508 ret = 0;
9509 }
9510 ctxt->sax = oldsax;
9511 xmlFreeParserCtxt(ctxt);
9512 newDoc->intSubset = NULL;
9513 newDoc->extSubset = NULL;
9514 xmlFreeDoc(newDoc);
9515
9516 return(ret);
9517}
9518
9519/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009520 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009521 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009522 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009523 * @sax: the SAX handler bloc (possibly NULL)
9524 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9525 * @depth: Used for loop detection, use 0
9526 * @URL: the URL for the entity to load
9527 * @ID: the System ID for the entity to load
9528 * @list: the return value for the set of parsed nodes
9529 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009530 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009531 *
9532 * Returns 0 if the entity is well formed, -1 in case of args problem and
9533 * the parser error code otherwise
9534 */
9535
Daniel Veillard257d9102001-05-08 10:41:44 +00009536static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009537xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9538 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009539 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009540 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009541 xmlParserCtxtPtr ctxt;
9542 xmlDocPtr newDoc;
9543 xmlSAXHandlerPtr oldsax = NULL;
9544 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009545 xmlChar start[4];
9546 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009547
9548 if (depth > 40) {
9549 return(XML_ERR_ENTITY_LOOP);
9550 }
9551
9552
9553
9554 if (list != NULL)
9555 *list = NULL;
9556 if ((URL == NULL) && (ID == NULL))
9557 return(-1);
9558 if (doc == NULL) /* @@ relax but check for dereferences */
9559 return(-1);
9560
9561
9562 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9563 if (ctxt == NULL) return(-1);
9564 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009565 if (oldctxt != NULL) {
9566 ctxt->_private = oldctxt->_private;
9567 ctxt->loadsubset = oldctxt->loadsubset;
9568 ctxt->validate = oldctxt->validate;
9569 ctxt->external = oldctxt->external;
9570 } else {
9571 /*
9572 * Doing validity checking on chunk without context
9573 * doesn't make sense
9574 */
9575 ctxt->_private = NULL;
9576 ctxt->validate = 0;
9577 ctxt->external = 2;
9578 ctxt->loadsubset = 0;
9579 }
Owen Taylor3473f882001-02-23 17:55:21 +00009580 if (sax != NULL) {
9581 oldsax = ctxt->sax;
9582 ctxt->sax = sax;
9583 if (user_data != NULL)
9584 ctxt->userData = user_data;
9585 }
9586 newDoc = xmlNewDoc(BAD_CAST "1.0");
9587 if (newDoc == NULL) {
9588 xmlFreeParserCtxt(ctxt);
9589 return(-1);
9590 }
9591 if (doc != NULL) {
9592 newDoc->intSubset = doc->intSubset;
9593 newDoc->extSubset = doc->extSubset;
9594 }
9595 if (doc->URL != NULL) {
9596 newDoc->URL = xmlStrdup(doc->URL);
9597 }
9598 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9599 if (newDoc->children == NULL) {
9600 if (sax != NULL)
9601 ctxt->sax = oldsax;
9602 xmlFreeParserCtxt(ctxt);
9603 newDoc->intSubset = NULL;
9604 newDoc->extSubset = NULL;
9605 xmlFreeDoc(newDoc);
9606 return(-1);
9607 }
9608 nodePush(ctxt, newDoc->children);
9609 if (doc == NULL) {
9610 ctxt->myDoc = newDoc;
9611 } else {
9612 ctxt->myDoc = doc;
9613 newDoc->children->doc = doc;
9614 }
9615
Daniel Veillard87a764e2001-06-20 17:41:10 +00009616 /*
9617 * Get the 4 first bytes and decode the charset
9618 * if enc != XML_CHAR_ENCODING_NONE
9619 * plug some encoding conversion routines.
9620 */
9621 GROW;
9622 start[0] = RAW;
9623 start[1] = NXT(1);
9624 start[2] = NXT(2);
9625 start[3] = NXT(3);
9626 enc = xmlDetectCharEncoding(start, 4);
9627 if (enc != XML_CHAR_ENCODING_NONE) {
9628 xmlSwitchEncoding(ctxt, enc);
9629 }
9630
Owen Taylor3473f882001-02-23 17:55:21 +00009631 /*
9632 * Parse a possible text declaration first
9633 */
Owen Taylor3473f882001-02-23 17:55:21 +00009634 if ((RAW == '<') && (NXT(1) == '?') &&
9635 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9636 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9637 xmlParseTextDecl(ctxt);
9638 }
9639
Owen Taylor3473f882001-02-23 17:55:21 +00009640 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009641 ctxt->depth = depth;
9642
9643 xmlParseContent(ctxt);
9644
Daniel Veillard561b7f82002-03-20 21:55:57 +00009645 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009646 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9648 ctxt->sax->error(ctxt->userData,
9649 "chunk is not well balanced\n");
9650 ctxt->wellFormed = 0;
9651 ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009652 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009653 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9655 ctxt->sax->error(ctxt->userData,
9656 "extra content at the end of well balanced chunk\n");
9657 ctxt->wellFormed = 0;
9658 ctxt->disableSAX = 1;
9659 }
9660 if (ctxt->node != newDoc->children) {
9661 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9663 ctxt->sax->error(ctxt->userData,
9664 "chunk is not well balanced\n");
9665 ctxt->wellFormed = 0;
9666 ctxt->disableSAX = 1;
9667 }
9668
9669 if (!ctxt->wellFormed) {
9670 if (ctxt->errNo == 0)
9671 ret = 1;
9672 else
9673 ret = ctxt->errNo;
9674 } else {
9675 if (list != NULL) {
9676 xmlNodePtr cur;
9677
9678 /*
9679 * Return the newly created nodeset after unlinking it from
9680 * they pseudo parent.
9681 */
9682 cur = newDoc->children->children;
9683 *list = cur;
9684 while (cur != NULL) {
9685 cur->parent = NULL;
9686 cur = cur->next;
9687 }
9688 newDoc->children->children = NULL;
9689 }
9690 ret = 0;
9691 }
9692 if (sax != NULL)
9693 ctxt->sax = oldsax;
9694 xmlFreeParserCtxt(ctxt);
9695 newDoc->intSubset = NULL;
9696 newDoc->extSubset = NULL;
9697 xmlFreeDoc(newDoc);
9698
9699 return(ret);
9700}
9701
9702/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009703 * xmlParseExternalEntity:
9704 * @doc: the document the chunk pertains to
9705 * @sax: the SAX handler bloc (possibly NULL)
9706 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9707 * @depth: Used for loop detection, use 0
9708 * @URL: the URL for the entity to load
9709 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009710 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009711 *
9712 * Parse an external general entity
9713 * An external general parsed entity is well-formed if it matches the
9714 * production labeled extParsedEnt.
9715 *
9716 * [78] extParsedEnt ::= TextDecl? content
9717 *
9718 * Returns 0 if the entity is well formed, -1 in case of args problem and
9719 * the parser error code otherwise
9720 */
9721
9722int
9723xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009724 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009725 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009726 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009727}
9728
9729/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009730 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009731 * @doc: the document the chunk pertains to
9732 * @sax: the SAX handler bloc (possibly NULL)
9733 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9734 * @depth: Used for loop detection, use 0
9735 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009736 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009737 *
9738 * Parse a well-balanced chunk of an XML document
9739 * called by the parser
9740 * The allowed sequence for the Well Balanced Chunk is the one defined by
9741 * the content production in the XML grammar:
9742 *
9743 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9744 *
9745 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9746 * the parser error code otherwise
9747 */
9748
9749int
9750xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009751 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009752 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9753 depth, string, lst, 0 );
9754}
9755
9756/**
9757 * xmlParseBalancedChunkMemoryRecover:
9758 * @doc: the document the chunk pertains to
9759 * @sax: the SAX handler bloc (possibly NULL)
9760 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9761 * @depth: Used for loop detection, use 0
9762 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9763 * @lst: the return value for the set of parsed nodes
9764 * @recover: return nodes even if the data is broken (use 0)
9765 *
9766 *
9767 * Parse a well-balanced chunk of an XML document
9768 * called by the parser
9769 * The allowed sequence for the Well Balanced Chunk is the one defined by
9770 * the content production in the XML grammar:
9771 *
9772 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9773 *
9774 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9775 * the parser error code otherwise
9776 *
9777 * In case recover is set to 1, the nodelist will not be empty even if
9778 * the parsed chunk is not well balanced.
9779 */
9780int
9781xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9782 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
9783 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +00009784 xmlParserCtxtPtr ctxt;
9785 xmlDocPtr newDoc;
9786 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +00009787 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +00009788 int size;
9789 int ret = 0;
9790
9791 if (depth > 40) {
9792 return(XML_ERR_ENTITY_LOOP);
9793 }
9794
9795
Daniel Veillardcda96922001-08-21 10:56:31 +00009796 if (lst != NULL)
9797 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009798 if (string == NULL)
9799 return(-1);
9800
9801 size = xmlStrlen(string);
9802
9803 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9804 if (ctxt == NULL) return(-1);
9805 ctxt->userData = ctxt;
9806 if (sax != NULL) {
9807 oldsax = ctxt->sax;
9808 ctxt->sax = sax;
9809 if (user_data != NULL)
9810 ctxt->userData = user_data;
9811 }
9812 newDoc = xmlNewDoc(BAD_CAST "1.0");
9813 if (newDoc == NULL) {
9814 xmlFreeParserCtxt(ctxt);
9815 return(-1);
9816 }
9817 if (doc != NULL) {
9818 newDoc->intSubset = doc->intSubset;
9819 newDoc->extSubset = doc->extSubset;
9820 }
9821 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9822 if (newDoc->children == NULL) {
9823 if (sax != NULL)
9824 ctxt->sax = oldsax;
9825 xmlFreeParserCtxt(ctxt);
9826 newDoc->intSubset = NULL;
9827 newDoc->extSubset = NULL;
9828 xmlFreeDoc(newDoc);
9829 return(-1);
9830 }
9831 nodePush(ctxt, newDoc->children);
9832 if (doc == NULL) {
9833 ctxt->myDoc = newDoc;
9834 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +00009835 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +00009836 newDoc->children->doc = doc;
9837 }
9838 ctxt->instate = XML_PARSER_CONTENT;
9839 ctxt->depth = depth;
9840
9841 /*
9842 * Doing validity checking on chunk doesn't make sense
9843 */
9844 ctxt->validate = 0;
9845 ctxt->loadsubset = 0;
9846
Daniel Veillard935494a2002-10-22 14:22:46 +00009847 content = doc->children;
9848 doc->children = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009849 xmlParseContent(ctxt);
Daniel Veillard935494a2002-10-22 14:22:46 +00009850 doc->children = content;
Owen Taylor3473f882001-02-23 17:55:21 +00009851
9852 if ((RAW == '<') && (NXT(1) == '/')) {
9853 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9855 ctxt->sax->error(ctxt->userData,
9856 "chunk is not well balanced\n");
9857 ctxt->wellFormed = 0;
9858 ctxt->disableSAX = 1;
9859 } else if (RAW != 0) {
9860 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9862 ctxt->sax->error(ctxt->userData,
9863 "extra content at the end of well balanced chunk\n");
9864 ctxt->wellFormed = 0;
9865 ctxt->disableSAX = 1;
9866 }
9867 if (ctxt->node != newDoc->children) {
9868 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9870 ctxt->sax->error(ctxt->userData,
9871 "chunk is not well balanced\n");
9872 ctxt->wellFormed = 0;
9873 ctxt->disableSAX = 1;
9874 }
9875
9876 if (!ctxt->wellFormed) {
9877 if (ctxt->errNo == 0)
9878 ret = 1;
9879 else
9880 ret = ctxt->errNo;
9881 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009882 ret = 0;
9883 }
9884
9885 if (lst != NULL && (ret == 0 || recover == 1)) {
9886 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009887
9888 /*
9889 * Return the newly created nodeset after unlinking it from
9890 * they pseudo parent.
9891 */
9892 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009893 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009894 while (cur != NULL) {
9895 cur->parent = NULL;
9896 cur = cur->next;
9897 }
9898 newDoc->children->children = NULL;
9899 }
Daniel Veillard58e44c92002-08-02 22:19:49 +00009900
Owen Taylor3473f882001-02-23 17:55:21 +00009901 if (sax != NULL)
9902 ctxt->sax = oldsax;
9903 xmlFreeParserCtxt(ctxt);
9904 newDoc->intSubset = NULL;
9905 newDoc->extSubset = NULL;
9906 xmlFreeDoc(newDoc);
9907
9908 return(ret);
9909}
9910
9911/**
9912 * xmlSAXParseEntity:
9913 * @sax: the SAX handler block
9914 * @filename: the filename
9915 *
9916 * parse an XML external entity out of context and build a tree.
9917 * It use the given SAX function block to handle the parsing callback.
9918 * If sax is NULL, fallback to the default DOM tree building routines.
9919 *
9920 * [78] extParsedEnt ::= TextDecl? content
9921 *
9922 * This correspond to a "Well Balanced" chunk
9923 *
9924 * Returns the resulting document tree
9925 */
9926
9927xmlDocPtr
9928xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9929 xmlDocPtr ret;
9930 xmlParserCtxtPtr ctxt;
9931 char *directory = NULL;
9932
9933 ctxt = xmlCreateFileParserCtxt(filename);
9934 if (ctxt == NULL) {
9935 return(NULL);
9936 }
9937 if (sax != NULL) {
9938 if (ctxt->sax != NULL)
9939 xmlFree(ctxt->sax);
9940 ctxt->sax = sax;
9941 ctxt->userData = NULL;
9942 }
9943
9944 if ((ctxt->directory == NULL) && (directory == NULL))
9945 directory = xmlParserGetDirectory(filename);
9946
9947 xmlParseExtParsedEnt(ctxt);
9948
9949 if (ctxt->wellFormed)
9950 ret = ctxt->myDoc;
9951 else {
9952 ret = NULL;
9953 xmlFreeDoc(ctxt->myDoc);
9954 ctxt->myDoc = NULL;
9955 }
9956 if (sax != NULL)
9957 ctxt->sax = NULL;
9958 xmlFreeParserCtxt(ctxt);
9959
9960 return(ret);
9961}
9962
9963/**
9964 * xmlParseEntity:
9965 * @filename: the filename
9966 *
9967 * parse an XML external entity out of context and build a tree.
9968 *
9969 * [78] extParsedEnt ::= TextDecl? content
9970 *
9971 * This correspond to a "Well Balanced" chunk
9972 *
9973 * Returns the resulting document tree
9974 */
9975
9976xmlDocPtr
9977xmlParseEntity(const char *filename) {
9978 return(xmlSAXParseEntity(NULL, filename));
9979}
9980
9981/**
9982 * xmlCreateEntityParserCtxt:
9983 * @URL: the entity URL
9984 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009985 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009986 *
9987 * Create a parser context for an external entity
9988 * Automatic support for ZLIB/Compress compressed document is provided
9989 * by default if found at compile-time.
9990 *
9991 * Returns the new parser context or NULL
9992 */
9993xmlParserCtxtPtr
9994xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9995 const xmlChar *base) {
9996 xmlParserCtxtPtr ctxt;
9997 xmlParserInputPtr inputStream;
9998 char *directory = NULL;
9999 xmlChar *uri;
10000
10001 ctxt = xmlNewParserCtxt();
10002 if (ctxt == NULL) {
10003 return(NULL);
10004 }
10005
10006 uri = xmlBuildURI(URL, base);
10007
10008 if (uri == NULL) {
10009 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10010 if (inputStream == NULL) {
10011 xmlFreeParserCtxt(ctxt);
10012 return(NULL);
10013 }
10014
10015 inputPush(ctxt, inputStream);
10016
10017 if ((ctxt->directory == NULL) && (directory == NULL))
10018 directory = xmlParserGetDirectory((char *)URL);
10019 if ((ctxt->directory == NULL) && (directory != NULL))
10020 ctxt->directory = directory;
10021 } else {
10022 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10023 if (inputStream == NULL) {
10024 xmlFree(uri);
10025 xmlFreeParserCtxt(ctxt);
10026 return(NULL);
10027 }
10028
10029 inputPush(ctxt, inputStream);
10030
10031 if ((ctxt->directory == NULL) && (directory == NULL))
10032 directory = xmlParserGetDirectory((char *)uri);
10033 if ((ctxt->directory == NULL) && (directory != NULL))
10034 ctxt->directory = directory;
10035 xmlFree(uri);
10036 }
10037
10038 return(ctxt);
10039}
10040
10041/************************************************************************
10042 * *
10043 * Front ends when parsing from a file *
10044 * *
10045 ************************************************************************/
10046
10047/**
10048 * xmlCreateFileParserCtxt:
10049 * @filename: the filename
10050 *
10051 * Create a parser context for a file content.
10052 * Automatic support for ZLIB/Compress compressed document is provided
10053 * by default if found at compile-time.
10054 *
10055 * Returns the new parser context or NULL
10056 */
10057xmlParserCtxtPtr
10058xmlCreateFileParserCtxt(const char *filename)
10059{
10060 xmlParserCtxtPtr ctxt;
10061 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010062 char *directory = NULL;
Daniel Veillardf4862f02002-09-10 11:13:43 +000010063 xmlChar *normalized;
Owen Taylor3473f882001-02-23 17:55:21 +000010064
Owen Taylor3473f882001-02-23 17:55:21 +000010065 ctxt = xmlNewParserCtxt();
10066 if (ctxt == NULL) {
10067 if (xmlDefaultSAXHandler.error != NULL) {
10068 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10069 }
10070 return(NULL);
10071 }
10072
Daniel Veillardf4862f02002-09-10 11:13:43 +000010073 normalized = xmlNormalizeWindowsPath((const xmlChar *) filename);
10074 if (normalized == NULL) {
10075 xmlFreeParserCtxt(ctxt);
10076 return(NULL);
10077 }
10078 inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010079 if (inputStream == NULL) {
10080 xmlFreeParserCtxt(ctxt);
Daniel Veillardf4862f02002-09-10 11:13:43 +000010081 xmlFree(normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010082 return(NULL);
10083 }
10084
Owen Taylor3473f882001-02-23 17:55:21 +000010085 inputPush(ctxt, inputStream);
10086 if ((ctxt->directory == NULL) && (directory == NULL))
Daniel Veillardf4862f02002-09-10 11:13:43 +000010087 directory = xmlParserGetDirectory((char *) normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010088 if ((ctxt->directory == NULL) && (directory != NULL))
10089 ctxt->directory = directory;
10090
Daniel Veillardf4862f02002-09-10 11:13:43 +000010091 xmlFree(normalized);
10092
Owen Taylor3473f882001-02-23 17:55:21 +000010093 return(ctxt);
10094}
10095
10096/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010097 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010098 * @sax: the SAX handler block
10099 * @filename: the filename
10100 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10101 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010102 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010103 *
10104 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10105 * compressed document is provided by default if found at compile-time.
10106 * It use the given SAX function block to handle the parsing callback.
10107 * If sax is NULL, fallback to the default DOM tree building routines.
10108 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010109 * User data (void *) is stored within the parser context in the
10110 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010111 *
Owen Taylor3473f882001-02-23 17:55:21 +000010112 * Returns the resulting document tree
10113 */
10114
10115xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010116xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10117 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010118 xmlDocPtr ret;
10119 xmlParserCtxtPtr ctxt;
10120 char *directory = NULL;
10121
Daniel Veillard635ef722001-10-29 11:48:19 +000010122 xmlInitParser();
10123
Owen Taylor3473f882001-02-23 17:55:21 +000010124 ctxt = xmlCreateFileParserCtxt(filename);
10125 if (ctxt == NULL) {
10126 return(NULL);
10127 }
10128 if (sax != NULL) {
10129 if (ctxt->sax != NULL)
10130 xmlFree(ctxt->sax);
10131 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010132 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010133 if (data!=NULL) {
10134 ctxt->_private=data;
10135 }
Owen Taylor3473f882001-02-23 17:55:21 +000010136
10137 if ((ctxt->directory == NULL) && (directory == NULL))
10138 directory = xmlParserGetDirectory(filename);
10139 if ((ctxt->directory == NULL) && (directory != NULL))
10140 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10141
10142 xmlParseDocument(ctxt);
10143
10144 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10145 else {
10146 ret = NULL;
10147 xmlFreeDoc(ctxt->myDoc);
10148 ctxt->myDoc = NULL;
10149 }
10150 if (sax != NULL)
10151 ctxt->sax = NULL;
10152 xmlFreeParserCtxt(ctxt);
10153
10154 return(ret);
10155}
10156
10157/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010158 * xmlSAXParseFile:
10159 * @sax: the SAX handler block
10160 * @filename: the filename
10161 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10162 * documents
10163 *
10164 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10165 * compressed document is provided by default if found at compile-time.
10166 * It use the given SAX function block to handle the parsing callback.
10167 * If sax is NULL, fallback to the default DOM tree building routines.
10168 *
10169 * Returns the resulting document tree
10170 */
10171
10172xmlDocPtr
10173xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10174 int recovery) {
10175 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10176}
10177
10178/**
Owen Taylor3473f882001-02-23 17:55:21 +000010179 * xmlRecoverDoc:
10180 * @cur: a pointer to an array of xmlChar
10181 *
10182 * parse an XML in-memory document and build a tree.
10183 * In the case the document is not Well Formed, a tree is built anyway
10184 *
10185 * Returns the resulting document tree
10186 */
10187
10188xmlDocPtr
10189xmlRecoverDoc(xmlChar *cur) {
10190 return(xmlSAXParseDoc(NULL, cur, 1));
10191}
10192
10193/**
10194 * xmlParseFile:
10195 * @filename: the filename
10196 *
10197 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10198 * compressed document is provided by default if found at compile-time.
10199 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010200 * Returns the resulting document tree if the file was wellformed,
10201 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010202 */
10203
10204xmlDocPtr
10205xmlParseFile(const char *filename) {
10206 return(xmlSAXParseFile(NULL, filename, 0));
10207}
10208
10209/**
10210 * xmlRecoverFile:
10211 * @filename: the filename
10212 *
10213 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10214 * compressed document is provided by default if found at compile-time.
10215 * In the case the document is not Well Formed, a tree is built anyway
10216 *
10217 * Returns the resulting document tree
10218 */
10219
10220xmlDocPtr
10221xmlRecoverFile(const char *filename) {
10222 return(xmlSAXParseFile(NULL, filename, 1));
10223}
10224
10225
10226/**
10227 * xmlSetupParserForBuffer:
10228 * @ctxt: an XML parser context
10229 * @buffer: a xmlChar * buffer
10230 * @filename: a file name
10231 *
10232 * Setup the parser context to parse a new buffer; Clears any prior
10233 * contents from the parser context. The buffer parameter must not be
10234 * NULL, but the filename parameter can be
10235 */
10236void
10237xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10238 const char* filename)
10239{
10240 xmlParserInputPtr input;
10241
10242 input = xmlNewInputStream(ctxt);
10243 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010244 xmlGenericError(xmlGenericErrorContext,
10245 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010246 xmlFree(ctxt);
10247 return;
10248 }
10249
10250 xmlClearParserCtxt(ctxt);
10251 if (filename != NULL)
10252 input->filename = xmlMemStrdup(filename);
10253 input->base = buffer;
10254 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010255 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010256 inputPush(ctxt, input);
10257}
10258
10259/**
10260 * xmlSAXUserParseFile:
10261 * @sax: a SAX handler
10262 * @user_data: The user data returned on SAX callbacks
10263 * @filename: a file name
10264 *
10265 * parse an XML file and call the given SAX handler routines.
10266 * Automatic support for ZLIB/Compress compressed document is provided
10267 *
10268 * Returns 0 in case of success or a error number otherwise
10269 */
10270int
10271xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10272 const char *filename) {
10273 int ret = 0;
10274 xmlParserCtxtPtr ctxt;
10275
10276 ctxt = xmlCreateFileParserCtxt(filename);
10277 if (ctxt == NULL) return -1;
10278 if (ctxt->sax != &xmlDefaultSAXHandler)
10279 xmlFree(ctxt->sax);
10280 ctxt->sax = sax;
10281 if (user_data != NULL)
10282 ctxt->userData = user_data;
10283
10284 xmlParseDocument(ctxt);
10285
10286 if (ctxt->wellFormed)
10287 ret = 0;
10288 else {
10289 if (ctxt->errNo != 0)
10290 ret = ctxt->errNo;
10291 else
10292 ret = -1;
10293 }
10294 if (sax != NULL)
10295 ctxt->sax = NULL;
10296 xmlFreeParserCtxt(ctxt);
10297
10298 return ret;
10299}
10300
10301/************************************************************************
10302 * *
10303 * Front ends when parsing from memory *
10304 * *
10305 ************************************************************************/
10306
10307/**
10308 * xmlCreateMemoryParserCtxt:
10309 * @buffer: a pointer to a char array
10310 * @size: the size of the array
10311 *
10312 * Create a parser context for an XML in-memory document.
10313 *
10314 * Returns the new parser context or NULL
10315 */
10316xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010317xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010318 xmlParserCtxtPtr ctxt;
10319 xmlParserInputPtr input;
10320 xmlParserInputBufferPtr buf;
10321
10322 if (buffer == NULL)
10323 return(NULL);
10324 if (size <= 0)
10325 return(NULL);
10326
10327 ctxt = xmlNewParserCtxt();
10328 if (ctxt == NULL)
10329 return(NULL);
10330
10331 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10332 if (buf == NULL) return(NULL);
10333
10334 input = xmlNewInputStream(ctxt);
10335 if (input == NULL) {
10336 xmlFreeParserCtxt(ctxt);
10337 return(NULL);
10338 }
10339
10340 input->filename = NULL;
10341 input->buf = buf;
10342 input->base = input->buf->buffer->content;
10343 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010344 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010345
10346 inputPush(ctxt, input);
10347 return(ctxt);
10348}
10349
10350/**
10351 * xmlSAXParseMemory:
10352 * @sax: the SAX handler block
10353 * @buffer: an pointer to a char array
10354 * @size: the size of the array
10355 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10356 * documents
10357 *
10358 * parse an XML in-memory block and use the given SAX function block
10359 * to handle the parsing callback. If sax is NULL, fallback to the default
10360 * DOM tree building routines.
10361 *
10362 * Returns the resulting document tree
10363 */
10364xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010365xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10366 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010367 xmlDocPtr ret;
10368 xmlParserCtxtPtr ctxt;
10369
10370 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10371 if (ctxt == NULL) return(NULL);
10372 if (sax != NULL) {
10373 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010374 }
10375
10376 xmlParseDocument(ctxt);
10377
10378 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10379 else {
10380 ret = NULL;
10381 xmlFreeDoc(ctxt->myDoc);
10382 ctxt->myDoc = NULL;
10383 }
10384 if (sax != NULL)
10385 ctxt->sax = NULL;
10386 xmlFreeParserCtxt(ctxt);
10387
10388 return(ret);
10389}
10390
10391/**
10392 * xmlParseMemory:
10393 * @buffer: an pointer to a char array
10394 * @size: the size of the array
10395 *
10396 * parse an XML in-memory block and build a tree.
10397 *
10398 * Returns the resulting document tree
10399 */
10400
Daniel Veillard50822cb2001-07-26 20:05:51 +000010401xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010402 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10403}
10404
10405/**
10406 * xmlRecoverMemory:
10407 * @buffer: an pointer to a char array
10408 * @size: the size of the array
10409 *
10410 * parse an XML in-memory block and build a tree.
10411 * In the case the document is not Well Formed, a tree is built anyway
10412 *
10413 * Returns the resulting document tree
10414 */
10415
Daniel Veillard50822cb2001-07-26 20:05:51 +000010416xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010417 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10418}
10419
10420/**
10421 * xmlSAXUserParseMemory:
10422 * @sax: a SAX handler
10423 * @user_data: The user data returned on SAX callbacks
10424 * @buffer: an in-memory XML document input
10425 * @size: the length of the XML document in bytes
10426 *
10427 * A better SAX parsing routine.
10428 * parse an XML in-memory buffer and call the given SAX handler routines.
10429 *
10430 * Returns 0 in case of success or a error number otherwise
10431 */
10432int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010433 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010434 int ret = 0;
10435 xmlParserCtxtPtr ctxt;
10436 xmlSAXHandlerPtr oldsax = NULL;
10437
Daniel Veillard9e923512002-08-14 08:48:52 +000010438 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010439 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10440 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010441 oldsax = ctxt->sax;
10442 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010443 if (user_data != NULL)
10444 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010445
10446 xmlParseDocument(ctxt);
10447
10448 if (ctxt->wellFormed)
10449 ret = 0;
10450 else {
10451 if (ctxt->errNo != 0)
10452 ret = ctxt->errNo;
10453 else
10454 ret = -1;
10455 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010456 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010457 xmlFreeParserCtxt(ctxt);
10458
10459 return ret;
10460}
10461
10462/**
10463 * xmlCreateDocParserCtxt:
10464 * @cur: a pointer to an array of xmlChar
10465 *
10466 * Creates a parser context for an XML in-memory document.
10467 *
10468 * Returns the new parser context or NULL
10469 */
10470xmlParserCtxtPtr
10471xmlCreateDocParserCtxt(xmlChar *cur) {
10472 int len;
10473
10474 if (cur == NULL)
10475 return(NULL);
10476 len = xmlStrlen(cur);
10477 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10478}
10479
10480/**
10481 * xmlSAXParseDoc:
10482 * @sax: the SAX handler block
10483 * @cur: a pointer to an array of xmlChar
10484 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10485 * documents
10486 *
10487 * parse an XML in-memory document and build a tree.
10488 * It use the given SAX function block to handle the parsing callback.
10489 * If sax is NULL, fallback to the default DOM tree building routines.
10490 *
10491 * Returns the resulting document tree
10492 */
10493
10494xmlDocPtr
10495xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10496 xmlDocPtr ret;
10497 xmlParserCtxtPtr ctxt;
10498
10499 if (cur == NULL) return(NULL);
10500
10501
10502 ctxt = xmlCreateDocParserCtxt(cur);
10503 if (ctxt == NULL) return(NULL);
10504 if (sax != NULL) {
10505 ctxt->sax = sax;
10506 ctxt->userData = NULL;
10507 }
10508
10509 xmlParseDocument(ctxt);
10510 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10511 else {
10512 ret = NULL;
10513 xmlFreeDoc(ctxt->myDoc);
10514 ctxt->myDoc = NULL;
10515 }
10516 if (sax != NULL)
10517 ctxt->sax = NULL;
10518 xmlFreeParserCtxt(ctxt);
10519
10520 return(ret);
10521}
10522
10523/**
10524 * xmlParseDoc:
10525 * @cur: a pointer to an array of xmlChar
10526 *
10527 * parse an XML in-memory document and build a tree.
10528 *
10529 * Returns the resulting document tree
10530 */
10531
10532xmlDocPtr
10533xmlParseDoc(xmlChar *cur) {
10534 return(xmlSAXParseDoc(NULL, cur, 0));
10535}
10536
Daniel Veillard8107a222002-01-13 14:10:10 +000010537/************************************************************************
10538 * *
10539 * Specific function to keep track of entities references *
10540 * and used by the XSLT debugger *
10541 * *
10542 ************************************************************************/
10543
10544static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10545
10546/**
10547 * xmlAddEntityReference:
10548 * @ent : A valid entity
10549 * @firstNode : A valid first node for children of entity
10550 * @lastNode : A valid last node of children entity
10551 *
10552 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10553 */
10554static void
10555xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10556 xmlNodePtr lastNode)
10557{
10558 if (xmlEntityRefFunc != NULL) {
10559 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10560 }
10561}
10562
10563
10564/**
10565 * xmlSetEntityReferenceFunc:
10566 * @func : A valid function
10567 *
10568 * Set the function to call call back when a xml reference has been made
10569 */
10570void
10571xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10572{
10573 xmlEntityRefFunc = func;
10574}
Owen Taylor3473f882001-02-23 17:55:21 +000010575
10576/************************************************************************
10577 * *
10578 * Miscellaneous *
10579 * *
10580 ************************************************************************/
10581
10582#ifdef LIBXML_XPATH_ENABLED
10583#include <libxml/xpath.h>
10584#endif
10585
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010586extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010587static int xmlParserInitialized = 0;
10588
10589/**
10590 * xmlInitParser:
10591 *
10592 * Initialization function for the XML parser.
10593 * This is not reentrant. Call once before processing in case of
10594 * use in multithreaded programs.
10595 */
10596
10597void
10598xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010599 if (xmlParserInitialized != 0)
10600 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010601
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010602 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10603 (xmlGenericError == NULL))
10604 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010605 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010606 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010607 xmlInitCharEncodingHandlers();
10608 xmlInitializePredefinedEntities();
10609 xmlDefaultSAXHandlerInit();
10610 xmlRegisterDefaultInputCallbacks();
10611 xmlRegisterDefaultOutputCallbacks();
10612#ifdef LIBXML_HTML_ENABLED
10613 htmlInitAutoClose();
10614 htmlDefaultSAXHandlerInit();
10615#endif
10616#ifdef LIBXML_XPATH_ENABLED
10617 xmlXPathInit();
10618#endif
10619 xmlParserInitialized = 1;
10620}
10621
10622/**
10623 * xmlCleanupParser:
10624 *
10625 * Cleanup function for the XML parser. It tries to reclaim all
10626 * parsing related global memory allocated for the parser processing.
10627 * It doesn't deallocate any document related memory. Calling this
10628 * function should not prevent reusing the parser.
10629 */
10630
10631void
10632xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010633 xmlCleanupCharEncodingHandlers();
10634 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010635#ifdef LIBXML_CATALOG_ENABLED
10636 xmlCatalogCleanup();
10637#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010638 xmlCleanupThreads();
10639 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010640}