blob: 0374f6814f1fd8f2bf72006ee6fb7c9f5042fe0d [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Owen Taylor3473f882001-02-23 17:55:21 +0000108/************************************************************************
109 * *
110 * Parser stacks related functions and macros *
111 * *
112 ************************************************************************/
113
114xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
115 const xmlChar ** str);
116
117/*
118 * Generic function for accessing stacks in the Parser Context
119 */
120
121#define PUSH_AND_POP(scope, type, name) \
122scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
123 if (ctxt->name##Nr >= ctxt->name##Max) { \
124 ctxt->name##Max *= 2; \
125 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
126 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
127 if (ctxt->name##Tab == NULL) { \
128 xmlGenericError(xmlGenericErrorContext, \
129 "realloc failed !\n"); \
130 return(0); \
131 } \
132 } \
133 ctxt->name##Tab[ctxt->name##Nr] = value; \
134 ctxt->name = value; \
135 return(ctxt->name##Nr++); \
136} \
137scope type name##Pop(xmlParserCtxtPtr ctxt) { \
138 type ret; \
139 if (ctxt->name##Nr <= 0) return(0); \
140 ctxt->name##Nr--; \
141 if (ctxt->name##Nr > 0) \
142 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
143 else \
144 ctxt->name = NULL; \
145 ret = ctxt->name##Tab[ctxt->name##Nr]; \
146 ctxt->name##Tab[ctxt->name##Nr] = 0; \
147 return(ret); \
148} \
149
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000150/**
151 * inputPop:
152 * @ctxt: an XML parser context
153 *
154 * Pops the top parser input from the input stack
155 *
156 * Returns the input just removed
157 */
158/**
159 * inputPush:
160 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 *
163 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000164 *
165 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000166 */
167/**
168 * namePop:
169 * @ctxt: an XML parser context
170 *
171 * Pops the top element name from the name stack
172 *
173 * Returns the name just removed
174 */
175/**
176 * namePush:
177 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 *
180 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000181 *
182 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 */
184/**
185 * nodePop:
186 * @ctxt: an XML parser context
187 *
188 * Pops the top element node from the node stack
189 *
190 * Returns the node just removed
191 */
192/**
193 * nodePush:
194 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000196 *
197 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000198 *
199 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000200 */
Owen Taylor3473f882001-02-23 17:55:21 +0000201/*
202 * Those macros actually generate the functions
203 */
204PUSH_AND_POP(extern, xmlParserInputPtr, input)
205PUSH_AND_POP(extern, xmlNodePtr, node)
206PUSH_AND_POP(extern, xmlChar*, name)
207
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000208static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000209 if (ctxt->spaceNr >= ctxt->spaceMax) {
210 ctxt->spaceMax *= 2;
211 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
212 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
213 if (ctxt->spaceTab == NULL) {
214 xmlGenericError(xmlGenericErrorContext,
215 "realloc failed !\n");
216 return(0);
217 }
218 }
219 ctxt->spaceTab[ctxt->spaceNr] = val;
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
221 return(ctxt->spaceNr++);
222}
223
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000224static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000225 int ret;
226 if (ctxt->spaceNr <= 0) return(0);
227 ctxt->spaceNr--;
228 if (ctxt->spaceNr > 0)
229 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
230 else
231 ctxt->space = NULL;
232 ret = ctxt->spaceTab[ctxt->spaceNr];
233 ctxt->spaceTab[ctxt->spaceNr] = -1;
234 return(ret);
235}
236
237/*
238 * Macros for accessing the content. Those should be used only by the parser,
239 * and not exported.
240 *
241 * Dirty macros, i.e. one often need to make assumption on the context to
242 * use them
243 *
244 * CUR_PTR return the current pointer to the xmlChar to be parsed.
245 * To be used with extreme caution since operations consuming
246 * characters may move the input buffer to a different location !
247 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
248 * This should be used internally by the parser
249 * only to compare to ASCII values otherwise it would break when
250 * running with UTF-8 encoding.
251 * RAW same as CUR but in the input buffer, bypass any token
252 * extraction that may have been done
253 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
254 * to compare on ASCII based substring.
255 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
256 * strings within the parser.
257 *
258 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
259 *
260 * NEXT Skip to the next character, this does the proper decoding
261 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000262 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000263 * CUR_CHAR(l) returns the current unicode character (int), set l
264 * to the number of xmlChars used for the encoding [0-5].
265 * CUR_SCHAR same but operate on a string instead of the context
266 * COPY_BUF copy the current unicode char to the target buffer, increment
267 * the index
268 * GROW, SHRINK handling of input buffers
269 */
270
Daniel Veillardfdc91562002-07-01 21:52:03 +0000271#define RAW (*ctxt->input->cur)
272#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000273#define NXT(val) ctxt->input->cur[(val)]
274#define CUR_PTR ctxt->input->cur
275
276#define SKIP(val) do { \
277 ctxt->nbChars += (val),ctxt->input->cur += (val); \
278 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000279 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
281 xmlPopInput(ctxt); \
282 } while (0)
283
Daniel Veillard46de64e2002-05-29 08:21:33 +0000284#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
285 xmlSHRINK (ctxt);
286
287static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
288 xmlParserInputShrink(ctxt->input);
289 if ((*ctxt->input->cur == 0) &&
290 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
291 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000292 }
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard46de64e2002-05-29 08:21:33 +0000294#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
295 xmlGROW (ctxt);
296
297static void xmlGROW (xmlParserCtxtPtr ctxt) {
298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
299 if ((*ctxt->input->cur == 0) &&
300 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
301 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000302 }
Owen Taylor3473f882001-02-23 17:55:21 +0000303
304#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
305
306#define NEXT xmlNextChar(ctxt)
307
Daniel Veillard21a0f912001-02-25 19:54:14 +0000308#define NEXT1 { \
309 ctxt->input->cur++; \
310 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000311 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000312 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
313 }
314
Owen Taylor3473f882001-02-23 17:55:21 +0000315#define NEXTL(l) do { \
316 if (*(ctxt->input->cur) == '\n') { \
317 ctxt->input->line++; ctxt->input->col = 1; \
318 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000319 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000320 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000321 } while (0)
322
323#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
324#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
325
326#define COPY_BUF(l,b,i,v) \
327 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000328 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000329
330/**
331 * xmlSkipBlankChars:
332 * @ctxt: the XML parser context
333 *
334 * skip all blanks character found at that point in the input streams.
335 * It pops up finished entities in the process if allowable at that point.
336 *
337 * Returns the number of space chars skipped
338 */
339
340int
341xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000342 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000343
344 /*
345 * It's Okay to use CUR/NEXT here since all the blanks are on
346 * the ASCII range.
347 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000348 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
349 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000350 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000351 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000352 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000353 cur = ctxt->input->cur;
354 while (IS_BLANK(*cur)) {
355 if (*cur == '\n') {
356 ctxt->input->line++; ctxt->input->col = 1;
357 }
358 cur++;
359 res++;
360 if (*cur == 0) {
361 ctxt->input->cur = cur;
362 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
363 cur = ctxt->input->cur;
364 }
365 }
366 ctxt->input->cur = cur;
367 } else {
368 int cur;
369 do {
370 cur = CUR;
371 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
372 NEXT;
373 cur = CUR;
374 res++;
375 }
376 while ((cur == 0) && (ctxt->inputNr > 1) &&
377 (ctxt->instate != XML_PARSER_COMMENT)) {
378 xmlPopInput(ctxt);
379 cur = CUR;
380 }
381 /*
382 * Need to handle support of entities branching here
383 */
384 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
385 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
386 }
Owen Taylor3473f882001-02-23 17:55:21 +0000387 return(res);
388}
389
390/************************************************************************
391 * *
392 * Commodity functions to handle entities *
393 * *
394 ************************************************************************/
395
396/**
397 * xmlPopInput:
398 * @ctxt: an XML parser context
399 *
400 * xmlPopInput: the current input pointed by ctxt->input came to an end
401 * pop it and return the next char.
402 *
403 * Returns the current xmlChar in the parser context
404 */
405xmlChar
406xmlPopInput(xmlParserCtxtPtr ctxt) {
407 if (ctxt->inputNr == 1) return(0); /* End of main Input */
408 if (xmlParserDebugEntities)
409 xmlGenericError(xmlGenericErrorContext,
410 "Popping input %d\n", ctxt->inputNr);
411 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000412 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000413 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
414 return(xmlPopInput(ctxt));
415 return(CUR);
416}
417
418/**
419 * xmlPushInput:
420 * @ctxt: an XML parser context
421 * @input: an XML parser input fragment (entity, XML fragment ...).
422 *
423 * xmlPushInput: switch to a new input stream which is stacked on top
424 * of the previous one(s).
425 */
426void
427xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
428 if (input == NULL) return;
429
430 if (xmlParserDebugEntities) {
431 if ((ctxt->input != NULL) && (ctxt->input->filename))
432 xmlGenericError(xmlGenericErrorContext,
433 "%s(%d): ", ctxt->input->filename,
434 ctxt->input->line);
435 xmlGenericError(xmlGenericErrorContext,
436 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
437 }
438 inputPush(ctxt, input);
439 GROW;
440}
441
442/**
443 * xmlParseCharRef:
444 * @ctxt: an XML parser context
445 *
446 * parse Reference declarations
447 *
448 * [66] CharRef ::= '&#' [0-9]+ ';' |
449 * '&#x' [0-9a-fA-F]+ ';'
450 *
451 * [ WFC: Legal Character ]
452 * Characters referred to using character references must match the
453 * production for Char.
454 *
455 * Returns the value parsed (as an int), 0 in case of error
456 */
457int
458xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000459 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000460 int count = 0;
461
Owen Taylor3473f882001-02-23 17:55:21 +0000462 /*
463 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
464 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000465 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000466 (NXT(2) == 'x')) {
467 SKIP(3);
468 GROW;
469 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000470 if (count++ > 20) {
471 count = 0;
472 GROW;
473 }
474 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000475 val = val * 16 + (CUR - '0');
476 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
477 val = val * 16 + (CUR - 'a') + 10;
478 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
479 val = val * 16 + (CUR - 'A') + 10;
480 else {
481 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
483 ctxt->sax->error(ctxt->userData,
484 "xmlParseCharRef: invalid hexadecimal value\n");
485 ctxt->wellFormed = 0;
486 ctxt->disableSAX = 1;
487 val = 0;
488 break;
489 }
490 NEXT;
491 count++;
492 }
493 if (RAW == ';') {
494 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
495 ctxt->nbChars ++;
496 ctxt->input->cur++;
497 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000498 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000499 SKIP(2);
500 GROW;
501 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000502 if (count++ > 20) {
503 count = 0;
504 GROW;
505 }
506 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000507 val = val * 10 + (CUR - '0');
508 else {
509 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
511 ctxt->sax->error(ctxt->userData,
512 "xmlParseCharRef: invalid decimal value\n");
513 ctxt->wellFormed = 0;
514 ctxt->disableSAX = 1;
515 val = 0;
516 break;
517 }
518 NEXT;
519 count++;
520 }
521 if (RAW == ';') {
522 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
523 ctxt->nbChars ++;
524 ctxt->input->cur++;
525 }
526 } else {
527 ctxt->errNo = XML_ERR_INVALID_CHARREF;
528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
529 ctxt->sax->error(ctxt->userData,
530 "xmlParseCharRef: invalid value\n");
531 ctxt->wellFormed = 0;
532 ctxt->disableSAX = 1;
533 }
534
535 /*
536 * [ WFC: Legal Character ]
537 * Characters referred to using character references must match the
538 * production for Char.
539 */
540 if (IS_CHAR(val)) {
541 return(val);
542 } else {
543 ctxt->errNo = XML_ERR_INVALID_CHAR;
544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000545 ctxt->sax->error(ctxt->userData,
546 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000547 val);
548 ctxt->wellFormed = 0;
549 ctxt->disableSAX = 1;
550 }
551 return(0);
552}
553
554/**
555 * xmlParseStringCharRef:
556 * @ctxt: an XML parser context
557 * @str: a pointer to an index in the string
558 *
559 * parse Reference declarations, variant parsing from a string rather
560 * than an an input flow.
561 *
562 * [66] CharRef ::= '&#' [0-9]+ ';' |
563 * '&#x' [0-9a-fA-F]+ ';'
564 *
565 * [ WFC: Legal Character ]
566 * Characters referred to using character references must match the
567 * production for Char.
568 *
569 * Returns the value parsed (as an int), 0 in case of error, str will be
570 * updated to the current value of the index
571 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000572static int
Owen Taylor3473f882001-02-23 17:55:21 +0000573xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
574 const xmlChar *ptr;
575 xmlChar cur;
576 int val = 0;
577
578 if ((str == NULL) || (*str == NULL)) return(0);
579 ptr = *str;
580 cur = *ptr;
581 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
582 ptr += 3;
583 cur = *ptr;
584 while (cur != ';') { /* Non input consuming loop */
585 if ((cur >= '0') && (cur <= '9'))
586 val = val * 16 + (cur - '0');
587 else if ((cur >= 'a') && (cur <= 'f'))
588 val = val * 16 + (cur - 'a') + 10;
589 else if ((cur >= 'A') && (cur <= 'F'))
590 val = val * 16 + (cur - 'A') + 10;
591 else {
592 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
594 ctxt->sax->error(ctxt->userData,
595 "xmlParseStringCharRef: invalid hexadecimal value\n");
596 ctxt->wellFormed = 0;
597 ctxt->disableSAX = 1;
598 val = 0;
599 break;
600 }
601 ptr++;
602 cur = *ptr;
603 }
604 if (cur == ';')
605 ptr++;
606 } else if ((cur == '&') && (ptr[1] == '#')){
607 ptr += 2;
608 cur = *ptr;
609 while (cur != ';') { /* Non input consuming loops */
610 if ((cur >= '0') && (cur <= '9'))
611 val = val * 10 + (cur - '0');
612 else {
613 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
615 ctxt->sax->error(ctxt->userData,
616 "xmlParseStringCharRef: invalid decimal value\n");
617 ctxt->wellFormed = 0;
618 ctxt->disableSAX = 1;
619 val = 0;
620 break;
621 }
622 ptr++;
623 cur = *ptr;
624 }
625 if (cur == ';')
626 ptr++;
627 } else {
628 ctxt->errNo = XML_ERR_INVALID_CHARREF;
629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
630 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000631 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000632 ctxt->wellFormed = 0;
633 ctxt->disableSAX = 1;
634 return(0);
635 }
636 *str = ptr;
637
638 /*
639 * [ WFC: Legal Character ]
640 * Characters referred to using character references must match the
641 * production for Char.
642 */
643 if (IS_CHAR(val)) {
644 return(val);
645 } else {
646 ctxt->errNo = XML_ERR_INVALID_CHAR;
647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
648 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000649 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000650 ctxt->wellFormed = 0;
651 ctxt->disableSAX = 1;
652 }
653 return(0);
654}
655
656/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000657 * xmlNewBlanksWrapperInputStream:
658 * @ctxt: an XML parser context
659 * @entity: an Entity pointer
660 *
661 * Create a new input stream for wrapping
662 * blanks around a PEReference
663 *
664 * Returns the new input stream or NULL
665 */
666
667static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
668
669xmlParserInputPtr
670xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
671 xmlParserInputPtr input;
672 xmlChar *buffer;
673 size_t length;
674 if (entity == NULL) {
675 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
677 ctxt->sax->error(ctxt->userData,
678 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
679 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
680 return(NULL);
681 }
682 if (xmlParserDebugEntities)
683 xmlGenericError(xmlGenericErrorContext,
684 "new blanks wrapper for entity: %s\n", entity->name);
685 input = xmlNewInputStream(ctxt);
686 if (input == NULL) {
687 return(NULL);
688 }
689 length = xmlStrlen(entity->name) + 5;
690 buffer = xmlMalloc(length);
691 if (buffer == NULL) {
692 return(NULL);
693 }
694 buffer [0] = ' ';
695 buffer [1] = '%';
696 buffer [length-3] = ';';
697 buffer [length-2] = ' ';
698 buffer [length-1] = 0;
699 memcpy(buffer + 2, entity->name, length - 5);
700 input->free = deallocblankswrapper;
701 input->base = buffer;
702 input->cur = buffer;
703 input->length = length;
704 input->end = &buffer[length];
705 return(input);
706}
707
708/**
Owen Taylor3473f882001-02-23 17:55:21 +0000709 * xmlParserHandlePEReference:
710 * @ctxt: the parser context
711 *
712 * [69] PEReference ::= '%' Name ';'
713 *
714 * [ WFC: No Recursion ]
715 * A parsed entity must not contain a recursive
716 * reference to itself, either directly or indirectly.
717 *
718 * [ WFC: Entity Declared ]
719 * In a document without any DTD, a document with only an internal DTD
720 * subset which contains no parameter entity references, or a document
721 * with "standalone='yes'", ... ... The declaration of a parameter
722 * entity must precede any reference to it...
723 *
724 * [ VC: Entity Declared ]
725 * In a document with an external subset or external parameter entities
726 * with "standalone='no'", ... ... The declaration of a parameter entity
727 * must precede any reference to it...
728 *
729 * [ WFC: In DTD ]
730 * Parameter-entity references may only appear in the DTD.
731 * NOTE: misleading but this is handled.
732 *
733 * A PEReference may have been detected in the current input stream
734 * the handling is done accordingly to
735 * http://www.w3.org/TR/REC-xml#entproc
736 * i.e.
737 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000738 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000739 */
740void
741xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
742 xmlChar *name;
743 xmlEntityPtr entity = NULL;
744 xmlParserInputPtr input;
745
Owen Taylor3473f882001-02-23 17:55:21 +0000746 if (RAW != '%') return;
747 switch(ctxt->instate) {
748 case XML_PARSER_CDATA_SECTION:
749 return;
750 case XML_PARSER_COMMENT:
751 return;
752 case XML_PARSER_START_TAG:
753 return;
754 case XML_PARSER_END_TAG:
755 return;
756 case XML_PARSER_EOF:
757 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
759 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
760 ctxt->wellFormed = 0;
761 ctxt->disableSAX = 1;
762 return;
763 case XML_PARSER_PROLOG:
764 case XML_PARSER_START:
765 case XML_PARSER_MISC:
766 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
768 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
769 ctxt->wellFormed = 0;
770 ctxt->disableSAX = 1;
771 return;
772 case XML_PARSER_ENTITY_DECL:
773 case XML_PARSER_CONTENT:
774 case XML_PARSER_ATTRIBUTE_VALUE:
775 case XML_PARSER_PI:
776 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000777 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000778 /* we just ignore it there */
779 return;
780 case XML_PARSER_EPILOG:
781 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
783 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
784 ctxt->wellFormed = 0;
785 ctxt->disableSAX = 1;
786 return;
787 case XML_PARSER_ENTITY_VALUE:
788 /*
789 * NOTE: in the case of entity values, we don't do the
790 * substitution here since we need the literal
791 * entity value to be able to save the internal
792 * subset of the document.
793 * This will be handled by xmlStringDecodeEntities
794 */
795 return;
796 case XML_PARSER_DTD:
797 /*
798 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
799 * In the internal DTD subset, parameter-entity references
800 * can occur only where markup declarations can occur, not
801 * within markup declarations.
802 * In that case this is handled in xmlParseMarkupDecl
803 */
804 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
805 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000806 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
807 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000808 break;
809 case XML_PARSER_IGNORE:
810 return;
811 }
812
813 NEXT;
814 name = xmlParseName(ctxt);
815 if (xmlParserDebugEntities)
816 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000817 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000818 if (name == NULL) {
819 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000821 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000822 ctxt->wellFormed = 0;
823 ctxt->disableSAX = 1;
824 } else {
825 if (RAW == ';') {
826 NEXT;
827 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
828 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
829 if (entity == NULL) {
830
831 /*
832 * [ WFC: Entity Declared ]
833 * In a document without any DTD, a document with only an
834 * internal DTD subset which contains no parameter entity
835 * references, or a document with "standalone='yes'", ...
836 * ... The declaration of a parameter entity must precede
837 * any reference to it...
838 */
839 if ((ctxt->standalone == 1) ||
840 ((ctxt->hasExternalSubset == 0) &&
841 (ctxt->hasPErefs == 0))) {
842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
843 ctxt->sax->error(ctxt->userData,
844 "PEReference: %%%s; not found\n", name);
845 ctxt->wellFormed = 0;
846 ctxt->disableSAX = 1;
847 } else {
848 /*
849 * [ VC: Entity Declared ]
850 * In a document with an external subset or external
851 * parameter entities with "standalone='no'", ...
852 * ... The declaration of a parameter entity must precede
853 * any reference to it...
854 */
855 if ((!ctxt->disableSAX) &&
856 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
857 ctxt->vctxt.error(ctxt->vctxt.userData,
858 "PEReference: %%%s; not found\n", name);
859 } else if ((!ctxt->disableSAX) &&
860 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
861 ctxt->sax->warning(ctxt->userData,
862 "PEReference: %%%s; not found\n", name);
863 ctxt->valid = 0;
864 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000865 } else if (ctxt->input->free != deallocblankswrapper) {
866 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
867 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000868 } else {
869 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
870 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000871 xmlChar start[4];
872 xmlCharEncoding enc;
873
Owen Taylor3473f882001-02-23 17:55:21 +0000874 /*
875 * handle the extra spaces added before and after
876 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000877 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000878 */
879 input = xmlNewEntityInputStream(ctxt, entity);
880 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000881
882 /*
883 * Get the 4 first bytes and decode the charset
884 * if enc != XML_CHAR_ENCODING_NONE
885 * plug some encoding conversion routines.
886 */
887 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000888 if (entity->length >= 4) {
889 start[0] = RAW;
890 start[1] = NXT(1);
891 start[2] = NXT(2);
892 start[3] = NXT(3);
893 enc = xmlDetectCharEncoding(start, 4);
894 if (enc != XML_CHAR_ENCODING_NONE) {
895 xmlSwitchEncoding(ctxt, enc);
896 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000897 }
898
Owen Taylor3473f882001-02-23 17:55:21 +0000899 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
900 (RAW == '<') && (NXT(1) == '?') &&
901 (NXT(2) == 'x') && (NXT(3) == 'm') &&
902 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
903 xmlParseTextDecl(ctxt);
904 }
Owen Taylor3473f882001-02-23 17:55:21 +0000905 } else {
906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
907 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000908 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000909 name);
910 ctxt->wellFormed = 0;
911 ctxt->disableSAX = 1;
912 }
913 }
914 } else {
915 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
917 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000918 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000919 ctxt->wellFormed = 0;
920 ctxt->disableSAX = 1;
921 }
922 xmlFree(name);
923 }
924}
925
926/*
927 * Macro used to grow the current buffer.
928 */
929#define growBuffer(buffer) { \
930 buffer##_size *= 2; \
931 buffer = (xmlChar *) \
932 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
933 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000934 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +0000935 return(NULL); \
936 } \
937}
938
939/**
940 * xmlStringDecodeEntities:
941 * @ctxt: the parser context
942 * @str: the input string
943 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
944 * @end: an end marker xmlChar, 0 if none
945 * @end2: an end marker xmlChar, 0 if none
946 * @end3: an end marker xmlChar, 0 if none
947 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000948 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000949 *
950 * [67] Reference ::= EntityRef | CharRef
951 *
952 * [69] PEReference ::= '%' Name ';'
953 *
954 * Returns A newly allocated string with the substitution done. The caller
955 * must deallocate it !
956 */
957xmlChar *
958xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
959 xmlChar end, xmlChar end2, xmlChar end3) {
960 xmlChar *buffer = NULL;
961 int buffer_size = 0;
962
963 xmlChar *current = NULL;
964 xmlEntityPtr ent;
965 int c,l;
966 int nbchars = 0;
967
968 if (str == NULL)
969 return(NULL);
970
971 if (ctxt->depth > 40) {
972 ctxt->errNo = XML_ERR_ENTITY_LOOP;
973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
974 ctxt->sax->error(ctxt->userData,
975 "Detected entity reference loop\n");
976 ctxt->wellFormed = 0;
977 ctxt->disableSAX = 1;
978 return(NULL);
979 }
980
981 /*
982 * allocate a translation buffer.
983 */
984 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
985 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
986 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000987 xmlGenericError(xmlGenericErrorContext,
988 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000989 return(NULL);
990 }
991
992 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000993 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000994 * we are operating on already parsed values.
995 */
996 c = CUR_SCHAR(str, l);
997 while ((c != 0) && (c != end) && /* non input consuming loop */
998 (c != end2) && (c != end3)) {
999
1000 if (c == 0) break;
1001 if ((c == '&') && (str[1] == '#')) {
1002 int val = xmlParseStringCharRef(ctxt, &str);
1003 if (val != 0) {
1004 COPY_BUF(0,buffer,nbchars,val);
1005 }
1006 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1007 if (xmlParserDebugEntities)
1008 xmlGenericError(xmlGenericErrorContext,
1009 "String decoding Entity Reference: %.30s\n",
1010 str);
1011 ent = xmlParseStringEntityRef(ctxt, &str);
1012 if ((ent != NULL) &&
1013 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1014 if (ent->content != NULL) {
1015 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1016 } else {
1017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1018 ctxt->sax->error(ctxt->userData,
1019 "internal error entity has no content\n");
1020 }
1021 } else if ((ent != NULL) && (ent->content != NULL)) {
1022 xmlChar *rep;
1023
1024 ctxt->depth++;
1025 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1026 0, 0, 0);
1027 ctxt->depth--;
1028 if (rep != NULL) {
1029 current = rep;
1030 while (*current != 0) { /* non input consuming loop */
1031 buffer[nbchars++] = *current++;
1032 if (nbchars >
1033 buffer_size - XML_PARSER_BUFFER_SIZE) {
1034 growBuffer(buffer);
1035 }
1036 }
1037 xmlFree(rep);
1038 }
1039 } else if (ent != NULL) {
1040 int i = xmlStrlen(ent->name);
1041 const xmlChar *cur = ent->name;
1042
1043 buffer[nbchars++] = '&';
1044 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1045 growBuffer(buffer);
1046 }
1047 for (;i > 0;i--)
1048 buffer[nbchars++] = *cur++;
1049 buffer[nbchars++] = ';';
1050 }
1051 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1052 if (xmlParserDebugEntities)
1053 xmlGenericError(xmlGenericErrorContext,
1054 "String decoding PE Reference: %.30s\n", str);
1055 ent = xmlParseStringPEReference(ctxt, &str);
1056 if (ent != NULL) {
1057 xmlChar *rep;
1058
1059 ctxt->depth++;
1060 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1061 0, 0, 0);
1062 ctxt->depth--;
1063 if (rep != NULL) {
1064 current = rep;
1065 while (*current != 0) { /* non input consuming loop */
1066 buffer[nbchars++] = *current++;
1067 if (nbchars >
1068 buffer_size - XML_PARSER_BUFFER_SIZE) {
1069 growBuffer(buffer);
1070 }
1071 }
1072 xmlFree(rep);
1073 }
1074 }
1075 } else {
1076 COPY_BUF(l,buffer,nbchars,c);
1077 str += l;
1078 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1079 growBuffer(buffer);
1080 }
1081 }
1082 c = CUR_SCHAR(str, l);
1083 }
1084 buffer[nbchars++] = 0;
1085 return(buffer);
1086}
1087
1088
1089/************************************************************************
1090 * *
1091 * Commodity functions to handle xmlChars *
1092 * *
1093 ************************************************************************/
1094
1095/**
1096 * xmlStrndup:
1097 * @cur: the input xmlChar *
1098 * @len: the len of @cur
1099 *
1100 * a strndup for array of xmlChar's
1101 *
1102 * Returns a new xmlChar * or NULL
1103 */
1104xmlChar *
1105xmlStrndup(const xmlChar *cur, int len) {
1106 xmlChar *ret;
1107
1108 if ((cur == NULL) || (len < 0)) return(NULL);
1109 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1110 if (ret == NULL) {
1111 xmlGenericError(xmlGenericErrorContext,
1112 "malloc of %ld byte failed\n",
1113 (len + 1) * (long)sizeof(xmlChar));
1114 return(NULL);
1115 }
1116 memcpy(ret, cur, len * sizeof(xmlChar));
1117 ret[len] = 0;
1118 return(ret);
1119}
1120
1121/**
1122 * xmlStrdup:
1123 * @cur: the input xmlChar *
1124 *
1125 * a strdup for array of xmlChar's. Since they are supposed to be
1126 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1127 * a termination mark of '0'.
1128 *
1129 * Returns a new xmlChar * or NULL
1130 */
1131xmlChar *
1132xmlStrdup(const xmlChar *cur) {
1133 const xmlChar *p = cur;
1134
1135 if (cur == NULL) return(NULL);
1136 while (*p != 0) p++; /* non input consuming */
1137 return(xmlStrndup(cur, p - cur));
1138}
1139
1140/**
1141 * xmlCharStrndup:
1142 * @cur: the input char *
1143 * @len: the len of @cur
1144 *
1145 * a strndup for char's to xmlChar's
1146 *
1147 * Returns a new xmlChar * or NULL
1148 */
1149
1150xmlChar *
1151xmlCharStrndup(const char *cur, int len) {
1152 int i;
1153 xmlChar *ret;
1154
1155 if ((cur == NULL) || (len < 0)) return(NULL);
1156 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1157 if (ret == NULL) {
1158 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1159 (len + 1) * (long)sizeof(xmlChar));
1160 return(NULL);
1161 }
1162 for (i = 0;i < len;i++)
1163 ret[i] = (xmlChar) cur[i];
1164 ret[len] = 0;
1165 return(ret);
1166}
1167
1168/**
1169 * xmlCharStrdup:
1170 * @cur: the input char *
1171 * @len: the len of @cur
1172 *
1173 * a strdup for char's to xmlChar's
1174 *
1175 * Returns a new xmlChar * or NULL
1176 */
1177
1178xmlChar *
1179xmlCharStrdup(const char *cur) {
1180 const char *p = cur;
1181
1182 if (cur == NULL) return(NULL);
1183 while (*p != '\0') p++; /* non input consuming */
1184 return(xmlCharStrndup(cur, p - cur));
1185}
1186
1187/**
1188 * xmlStrcmp:
1189 * @str1: the first xmlChar *
1190 * @str2: the second xmlChar *
1191 *
1192 * a strcmp for xmlChar's
1193 *
1194 * Returns the integer result of the comparison
1195 */
1196
1197int
1198xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1199 register int tmp;
1200
1201 if (str1 == str2) return(0);
1202 if (str1 == NULL) return(-1);
1203 if (str2 == NULL) return(1);
1204 do {
1205 tmp = *str1++ - *str2;
1206 if (tmp != 0) return(tmp);
1207 } while (*str2++ != 0);
1208 return 0;
1209}
1210
1211/**
1212 * xmlStrEqual:
1213 * @str1: the first xmlChar *
1214 * @str2: the second xmlChar *
1215 *
1216 * Check if both string are equal of have same content
1217 * Should be a bit more readable and faster than xmlStrEqual()
1218 *
1219 * Returns 1 if they are equal, 0 if they are different
1220 */
1221
1222int
1223xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1224 if (str1 == str2) return(1);
1225 if (str1 == NULL) return(0);
1226 if (str2 == NULL) return(0);
1227 do {
1228 if (*str1++ != *str2) return(0);
1229 } while (*str2++);
1230 return(1);
1231}
1232
1233/**
1234 * xmlStrncmp:
1235 * @str1: the first xmlChar *
1236 * @str2: the second xmlChar *
1237 * @len: the max comparison length
1238 *
1239 * a strncmp for xmlChar's
1240 *
1241 * Returns the integer result of the comparison
1242 */
1243
1244int
1245xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1246 register int tmp;
1247
1248 if (len <= 0) return(0);
1249 if (str1 == str2) return(0);
1250 if (str1 == NULL) return(-1);
1251 if (str2 == NULL) return(1);
1252 do {
1253 tmp = *str1++ - *str2;
1254 if (tmp != 0 || --len == 0) return(tmp);
1255 } while (*str2++ != 0);
1256 return 0;
1257}
1258
Daniel Veillardb44025c2001-10-11 22:55:55 +00001259static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001260 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1261 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1262 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1263 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1264 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1265 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1266 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1267 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1268 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1269 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1270 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1271 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1272 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1273 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1274 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1275 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1276 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1277 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1278 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1279 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1280 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1281 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1282 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1283 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1284 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1285 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1286 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1287 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1288 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1289 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1290 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1291 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1292};
1293
1294/**
1295 * xmlStrcasecmp:
1296 * @str1: the first xmlChar *
1297 * @str2: the second xmlChar *
1298 *
1299 * a strcasecmp for xmlChar's
1300 *
1301 * Returns the integer result of the comparison
1302 */
1303
1304int
1305xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1306 register int tmp;
1307
1308 if (str1 == str2) return(0);
1309 if (str1 == NULL) return(-1);
1310 if (str2 == NULL) return(1);
1311 do {
1312 tmp = casemap[*str1++] - casemap[*str2];
1313 if (tmp != 0) return(tmp);
1314 } while (*str2++ != 0);
1315 return 0;
1316}
1317
1318/**
1319 * xmlStrncasecmp:
1320 * @str1: the first xmlChar *
1321 * @str2: the second xmlChar *
1322 * @len: the max comparison length
1323 *
1324 * a strncasecmp for xmlChar's
1325 *
1326 * Returns the integer result of the comparison
1327 */
1328
1329int
1330xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1331 register int tmp;
1332
1333 if (len <= 0) return(0);
1334 if (str1 == str2) return(0);
1335 if (str1 == NULL) return(-1);
1336 if (str2 == NULL) return(1);
1337 do {
1338 tmp = casemap[*str1++] - casemap[*str2];
1339 if (tmp != 0 || --len == 0) return(tmp);
1340 } while (*str2++ != 0);
1341 return 0;
1342}
1343
1344/**
1345 * xmlStrchr:
1346 * @str: the xmlChar * array
1347 * @val: the xmlChar to search
1348 *
1349 * a strchr for xmlChar's
1350 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001351 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001352 */
1353
1354const xmlChar *
1355xmlStrchr(const xmlChar *str, xmlChar val) {
1356 if (str == NULL) return(NULL);
1357 while (*str != 0) { /* non input consuming */
1358 if (*str == val) return((xmlChar *) str);
1359 str++;
1360 }
1361 return(NULL);
1362}
1363
1364/**
1365 * xmlStrstr:
1366 * @str: the xmlChar * array (haystack)
1367 * @val: the xmlChar to search (needle)
1368 *
1369 * a strstr for xmlChar's
1370 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001371 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001372 */
1373
1374const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001375xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001376 int n;
1377
1378 if (str == NULL) return(NULL);
1379 if (val == NULL) return(NULL);
1380 n = xmlStrlen(val);
1381
1382 if (n == 0) return(str);
1383 while (*str != 0) { /* non input consuming */
1384 if (*str == *val) {
1385 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1386 }
1387 str++;
1388 }
1389 return(NULL);
1390}
1391
1392/**
1393 * xmlStrcasestr:
1394 * @str: the xmlChar * array (haystack)
1395 * @val: the xmlChar to search (needle)
1396 *
1397 * a case-ignoring strstr for xmlChar's
1398 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001399 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001400 */
1401
1402const xmlChar *
1403xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1404 int n;
1405
1406 if (str == NULL) return(NULL);
1407 if (val == NULL) return(NULL);
1408 n = xmlStrlen(val);
1409
1410 if (n == 0) return(str);
1411 while (*str != 0) { /* non input consuming */
1412 if (casemap[*str] == casemap[*val])
1413 if (!xmlStrncasecmp(str, val, n)) return(str);
1414 str++;
1415 }
1416 return(NULL);
1417}
1418
1419/**
1420 * xmlStrsub:
1421 * @str: the xmlChar * array (haystack)
1422 * @start: the index of the first char (zero based)
1423 * @len: the length of the substring
1424 *
1425 * Extract a substring of a given string
1426 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001427 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001428 */
1429
1430xmlChar *
1431xmlStrsub(const xmlChar *str, int start, int len) {
1432 int i;
1433
1434 if (str == NULL) return(NULL);
1435 if (start < 0) return(NULL);
1436 if (len < 0) return(NULL);
1437
1438 for (i = 0;i < start;i++) {
1439 if (*str == 0) return(NULL);
1440 str++;
1441 }
1442 if (*str == 0) return(NULL);
1443 return(xmlStrndup(str, len));
1444}
1445
1446/**
1447 * xmlStrlen:
1448 * @str: the xmlChar * array
1449 *
1450 * length of a xmlChar's string
1451 *
1452 * Returns the number of xmlChar contained in the ARRAY.
1453 */
1454
1455int
1456xmlStrlen(const xmlChar *str) {
1457 int len = 0;
1458
1459 if (str == NULL) return(0);
1460 while (*str != 0) { /* non input consuming */
1461 str++;
1462 len++;
1463 }
1464 return(len);
1465}
1466
1467/**
1468 * xmlStrncat:
1469 * @cur: the original xmlChar * array
1470 * @add: the xmlChar * array added
1471 * @len: the length of @add
1472 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001473 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001474 * first bytes of @add.
1475 *
1476 * Returns a new xmlChar *, the original @cur is reallocated if needed
1477 * and should not be freed
1478 */
1479
1480xmlChar *
1481xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1482 int size;
1483 xmlChar *ret;
1484
1485 if ((add == NULL) || (len == 0))
1486 return(cur);
1487 if (cur == NULL)
1488 return(xmlStrndup(add, len));
1489
1490 size = xmlStrlen(cur);
1491 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1492 if (ret == NULL) {
1493 xmlGenericError(xmlGenericErrorContext,
1494 "xmlStrncat: realloc of %ld byte failed\n",
1495 (size + len + 1) * (long)sizeof(xmlChar));
1496 return(cur);
1497 }
1498 memcpy(&ret[size], add, len * sizeof(xmlChar));
1499 ret[size + len] = 0;
1500 return(ret);
1501}
1502
1503/**
1504 * xmlStrcat:
1505 * @cur: the original xmlChar * array
1506 * @add: the xmlChar * array added
1507 *
1508 * a strcat for array of xmlChar's. Since they are supposed to be
1509 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1510 * a termination mark of '0'.
1511 *
1512 * Returns a new xmlChar * containing the concatenated string.
1513 */
1514xmlChar *
1515xmlStrcat(xmlChar *cur, const xmlChar *add) {
1516 const xmlChar *p = add;
1517
1518 if (add == NULL) return(cur);
1519 if (cur == NULL)
1520 return(xmlStrdup(add));
1521
1522 while (*p != 0) p++; /* non input consuming */
1523 return(xmlStrncat(cur, add, p - add));
1524}
1525
1526/************************************************************************
1527 * *
1528 * Commodity functions, cleanup needed ? *
1529 * *
1530 ************************************************************************/
1531
1532/**
1533 * areBlanks:
1534 * @ctxt: an XML parser context
1535 * @str: a xmlChar *
1536 * @len: the size of @str
1537 *
1538 * Is this a sequence of blank chars that one can ignore ?
1539 *
1540 * Returns 1 if ignorable 0 otherwise.
1541 */
1542
1543static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1544 int i, ret;
1545 xmlNodePtr lastChild;
1546
Daniel Veillard05c13a22001-09-09 08:38:09 +00001547 /*
1548 * Don't spend time trying to differentiate them, the same callback is
1549 * used !
1550 */
1551 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001552 return(0);
1553
Owen Taylor3473f882001-02-23 17:55:21 +00001554 /*
1555 * Check for xml:space value.
1556 */
1557 if (*(ctxt->space) == 1)
1558 return(0);
1559
1560 /*
1561 * Check that the string is made of blanks
1562 */
1563 for (i = 0;i < len;i++)
1564 if (!(IS_BLANK(str[i]))) return(0);
1565
1566 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001567 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001568 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001569 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001570 if (ctxt->myDoc != NULL) {
1571 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1572 if (ret == 0) return(1);
1573 if (ret == 1) return(0);
1574 }
1575
1576 /*
1577 * Otherwise, heuristic :-\
1578 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001579 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001580 if ((ctxt->node->children == NULL) &&
1581 (RAW == '<') && (NXT(1) == '/')) return(0);
1582
1583 lastChild = xmlGetLastChild(ctxt->node);
1584 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001585 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1586 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001587 } else if (xmlNodeIsText(lastChild))
1588 return(0);
1589 else if ((ctxt->node->children != NULL) &&
1590 (xmlNodeIsText(ctxt->node->children)))
1591 return(0);
1592 return(1);
1593}
1594
Owen Taylor3473f882001-02-23 17:55:21 +00001595/************************************************************************
1596 * *
1597 * Extra stuff for namespace support *
1598 * Relates to http://www.w3.org/TR/WD-xml-names *
1599 * *
1600 ************************************************************************/
1601
1602/**
1603 * xmlSplitQName:
1604 * @ctxt: an XML parser context
1605 * @name: an XML parser context
1606 * @prefix: a xmlChar **
1607 *
1608 * parse an UTF8 encoded XML qualified name string
1609 *
1610 * [NS 5] QName ::= (Prefix ':')? LocalPart
1611 *
1612 * [NS 6] Prefix ::= NCName
1613 *
1614 * [NS 7] LocalPart ::= NCName
1615 *
1616 * Returns the local part, and prefix is updated
1617 * to get the Prefix if any.
1618 */
1619
1620xmlChar *
1621xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1622 xmlChar buf[XML_MAX_NAMELEN + 5];
1623 xmlChar *buffer = NULL;
1624 int len = 0;
1625 int max = XML_MAX_NAMELEN;
1626 xmlChar *ret = NULL;
1627 const xmlChar *cur = name;
1628 int c;
1629
1630 *prefix = NULL;
1631
1632#ifndef XML_XML_NAMESPACE
1633 /* xml: prefix is not really a namespace */
1634 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1635 (cur[2] == 'l') && (cur[3] == ':'))
1636 return(xmlStrdup(name));
1637#endif
1638
1639 /* nasty but valid */
1640 if (cur[0] == ':')
1641 return(xmlStrdup(name));
1642
1643 c = *cur++;
1644 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1645 buf[len++] = c;
1646 c = *cur++;
1647 }
1648 if (len >= max) {
1649 /*
1650 * Okay someone managed to make a huge name, so he's ready to pay
1651 * for the processing speed.
1652 */
1653 max = len * 2;
1654
1655 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1656 if (buffer == NULL) {
1657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1658 ctxt->sax->error(ctxt->userData,
1659 "xmlSplitQName: out of memory\n");
1660 return(NULL);
1661 }
1662 memcpy(buffer, buf, len);
1663 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1664 if (len + 10 > max) {
1665 max *= 2;
1666 buffer = (xmlChar *) xmlRealloc(buffer,
1667 max * sizeof(xmlChar));
1668 if (buffer == NULL) {
1669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1670 ctxt->sax->error(ctxt->userData,
1671 "xmlSplitQName: out of memory\n");
1672 return(NULL);
1673 }
1674 }
1675 buffer[len++] = c;
1676 c = *cur++;
1677 }
1678 buffer[len] = 0;
1679 }
1680
1681 if (buffer == NULL)
1682 ret = xmlStrndup(buf, len);
1683 else {
1684 ret = buffer;
1685 buffer = NULL;
1686 max = XML_MAX_NAMELEN;
1687 }
1688
1689
1690 if (c == ':') {
1691 c = *cur++;
1692 if (c == 0) return(ret);
1693 *prefix = ret;
1694 len = 0;
1695
1696 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1697 buf[len++] = c;
1698 c = *cur++;
1699 }
1700 if (len >= max) {
1701 /*
1702 * Okay someone managed to make a huge name, so he's ready to pay
1703 * for the processing speed.
1704 */
1705 max = len * 2;
1706
1707 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1708 if (buffer == NULL) {
1709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1710 ctxt->sax->error(ctxt->userData,
1711 "xmlSplitQName: out of memory\n");
1712 return(NULL);
1713 }
1714 memcpy(buffer, buf, len);
1715 while (c != 0) { /* tested bigname2.xml */
1716 if (len + 10 > max) {
1717 max *= 2;
1718 buffer = (xmlChar *) xmlRealloc(buffer,
1719 max * sizeof(xmlChar));
1720 if (buffer == NULL) {
1721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1722 ctxt->sax->error(ctxt->userData,
1723 "xmlSplitQName: out of memory\n");
1724 return(NULL);
1725 }
1726 }
1727 buffer[len++] = c;
1728 c = *cur++;
1729 }
1730 buffer[len] = 0;
1731 }
1732
1733 if (buffer == NULL)
1734 ret = xmlStrndup(buf, len);
1735 else {
1736 ret = buffer;
1737 }
1738 }
1739
1740 return(ret);
1741}
1742
1743/************************************************************************
1744 * *
1745 * The parser itself *
1746 * Relates to http://www.w3.org/TR/REC-xml *
1747 * *
1748 ************************************************************************/
1749
Daniel Veillard76d66f42001-05-16 21:05:17 +00001750static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001751/**
1752 * xmlParseName:
1753 * @ctxt: an XML parser context
1754 *
1755 * parse an XML name.
1756 *
1757 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1758 * CombiningChar | Extender
1759 *
1760 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1761 *
1762 * [6] Names ::= Name (S Name)*
1763 *
1764 * Returns the Name parsed or NULL
1765 */
1766
1767xmlChar *
1768xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001769 const xmlChar *in;
1770 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001771 int count = 0;
1772
1773 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001774
1775 /*
1776 * Accelerator for simple ASCII names
1777 */
1778 in = ctxt->input->cur;
1779 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1780 ((*in >= 0x41) && (*in <= 0x5A)) ||
1781 (*in == '_') || (*in == ':')) {
1782 in++;
1783 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1784 ((*in >= 0x41) && (*in <= 0x5A)) ||
1785 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001786 (*in == '_') || (*in == '-') ||
1787 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001788 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001789 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001790 count = in - ctxt->input->cur;
1791 ret = xmlStrndup(ctxt->input->cur, count);
1792 ctxt->input->cur = in;
1793 return(ret);
1794 }
1795 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001796 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001797}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001798
Daniel Veillard46de64e2002-05-29 08:21:33 +00001799/**
1800 * xmlParseNameAndCompare:
1801 * @ctxt: an XML parser context
1802 *
1803 * parse an XML name and compares for match
1804 * (specialized for endtag parsing)
1805 *
1806 *
1807 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1808 * and the name for mismatch
1809 */
1810
1811xmlChar *
1812xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1813 const xmlChar *cmp = other;
1814 const xmlChar *in;
1815 xmlChar *ret;
1816 int count = 0;
1817
1818 GROW;
1819
1820 in = ctxt->input->cur;
1821 while (*in != 0 && *in == *cmp) {
1822 ++in;
1823 ++cmp;
1824 }
1825 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1826 /* success */
1827 ctxt->input->cur = in;
1828 return (xmlChar*) 1;
1829 }
1830 /* failure (or end of input buffer), check with full function */
1831 ret = xmlParseName (ctxt);
1832 if (ret != 0 && xmlStrEqual (ret, other)) {
1833 xmlFree (ret);
1834 return (xmlChar*) 1;
1835 }
1836 return ret;
1837}
1838
Daniel Veillard76d66f42001-05-16 21:05:17 +00001839static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001840xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1841 xmlChar buf[XML_MAX_NAMELEN + 5];
1842 int len = 0, l;
1843 int c;
1844 int count = 0;
1845
1846 /*
1847 * Handler for more complex cases
1848 */
1849 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001850 c = CUR_CHAR(l);
1851 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1852 (!IS_LETTER(c) && (c != '_') &&
1853 (c != ':'))) {
1854 return(NULL);
1855 }
1856
1857 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1858 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1859 (c == '.') || (c == '-') ||
1860 (c == '_') || (c == ':') ||
1861 (IS_COMBINING(c)) ||
1862 (IS_EXTENDER(c)))) {
1863 if (count++ > 100) {
1864 count = 0;
1865 GROW;
1866 }
1867 COPY_BUF(l,buf,len,c);
1868 NEXTL(l);
1869 c = CUR_CHAR(l);
1870 if (len >= XML_MAX_NAMELEN) {
1871 /*
1872 * Okay someone managed to make a huge name, so he's ready to pay
1873 * for the processing speed.
1874 */
1875 xmlChar *buffer;
1876 int max = len * 2;
1877
1878 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1879 if (buffer == NULL) {
1880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1881 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001882 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001883 return(NULL);
1884 }
1885 memcpy(buffer, buf, len);
1886 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1887 (c == '.') || (c == '-') ||
1888 (c == '_') || (c == ':') ||
1889 (IS_COMBINING(c)) ||
1890 (IS_EXTENDER(c))) {
1891 if (count++ > 100) {
1892 count = 0;
1893 GROW;
1894 }
1895 if (len + 10 > max) {
1896 max *= 2;
1897 buffer = (xmlChar *) xmlRealloc(buffer,
1898 max * sizeof(xmlChar));
1899 if (buffer == NULL) {
1900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1901 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001902 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001903 return(NULL);
1904 }
1905 }
1906 COPY_BUF(l,buffer,len,c);
1907 NEXTL(l);
1908 c = CUR_CHAR(l);
1909 }
1910 buffer[len] = 0;
1911 return(buffer);
1912 }
1913 }
1914 return(xmlStrndup(buf, len));
1915}
1916
1917/**
1918 * xmlParseStringName:
1919 * @ctxt: an XML parser context
1920 * @str: a pointer to the string pointer (IN/OUT)
1921 *
1922 * parse an XML name.
1923 *
1924 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1925 * CombiningChar | Extender
1926 *
1927 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1928 *
1929 * [6] Names ::= Name (S Name)*
1930 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001931 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001932 * is updated to the current location in the string.
1933 */
1934
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001935static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001936xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1937 xmlChar buf[XML_MAX_NAMELEN + 5];
1938 const xmlChar *cur = *str;
1939 int len = 0, l;
1940 int c;
1941
1942 c = CUR_SCHAR(cur, l);
1943 if (!IS_LETTER(c) && (c != '_') &&
1944 (c != ':')) {
1945 return(NULL);
1946 }
1947
1948 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1949 (c == '.') || (c == '-') ||
1950 (c == '_') || (c == ':') ||
1951 (IS_COMBINING(c)) ||
1952 (IS_EXTENDER(c))) {
1953 COPY_BUF(l,buf,len,c);
1954 cur += l;
1955 c = CUR_SCHAR(cur, l);
1956 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1957 /*
1958 * Okay someone managed to make a huge name, so he's ready to pay
1959 * for the processing speed.
1960 */
1961 xmlChar *buffer;
1962 int max = len * 2;
1963
1964 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1965 if (buffer == NULL) {
1966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1967 ctxt->sax->error(ctxt->userData,
1968 "xmlParseStringName: out of memory\n");
1969 return(NULL);
1970 }
1971 memcpy(buffer, buf, len);
1972 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1973 (c == '.') || (c == '-') ||
1974 (c == '_') || (c == ':') ||
1975 (IS_COMBINING(c)) ||
1976 (IS_EXTENDER(c))) {
1977 if (len + 10 > max) {
1978 max *= 2;
1979 buffer = (xmlChar *) xmlRealloc(buffer,
1980 max * sizeof(xmlChar));
1981 if (buffer == NULL) {
1982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1983 ctxt->sax->error(ctxt->userData,
1984 "xmlParseStringName: out of memory\n");
1985 return(NULL);
1986 }
1987 }
1988 COPY_BUF(l,buffer,len,c);
1989 cur += l;
1990 c = CUR_SCHAR(cur, l);
1991 }
1992 buffer[len] = 0;
1993 *str = cur;
1994 return(buffer);
1995 }
1996 }
1997 *str = cur;
1998 return(xmlStrndup(buf, len));
1999}
2000
2001/**
2002 * xmlParseNmtoken:
2003 * @ctxt: an XML parser context
2004 *
2005 * parse an XML Nmtoken.
2006 *
2007 * [7] Nmtoken ::= (NameChar)+
2008 *
2009 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2010 *
2011 * Returns the Nmtoken parsed or NULL
2012 */
2013
2014xmlChar *
2015xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2016 xmlChar buf[XML_MAX_NAMELEN + 5];
2017 int len = 0, l;
2018 int c;
2019 int count = 0;
2020
2021 GROW;
2022 c = CUR_CHAR(l);
2023
2024 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2025 (c == '.') || (c == '-') ||
2026 (c == '_') || (c == ':') ||
2027 (IS_COMBINING(c)) ||
2028 (IS_EXTENDER(c))) {
2029 if (count++ > 100) {
2030 count = 0;
2031 GROW;
2032 }
2033 COPY_BUF(l,buf,len,c);
2034 NEXTL(l);
2035 c = CUR_CHAR(l);
2036 if (len >= XML_MAX_NAMELEN) {
2037 /*
2038 * Okay someone managed to make a huge token, so he's ready to pay
2039 * for the processing speed.
2040 */
2041 xmlChar *buffer;
2042 int max = len * 2;
2043
2044 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2045 if (buffer == NULL) {
2046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2047 ctxt->sax->error(ctxt->userData,
2048 "xmlParseNmtoken: out of memory\n");
2049 return(NULL);
2050 }
2051 memcpy(buffer, buf, len);
2052 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2053 (c == '.') || (c == '-') ||
2054 (c == '_') || (c == ':') ||
2055 (IS_COMBINING(c)) ||
2056 (IS_EXTENDER(c))) {
2057 if (count++ > 100) {
2058 count = 0;
2059 GROW;
2060 }
2061 if (len + 10 > max) {
2062 max *= 2;
2063 buffer = (xmlChar *) xmlRealloc(buffer,
2064 max * sizeof(xmlChar));
2065 if (buffer == NULL) {
2066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2067 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002068 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002069 return(NULL);
2070 }
2071 }
2072 COPY_BUF(l,buffer,len,c);
2073 NEXTL(l);
2074 c = CUR_CHAR(l);
2075 }
2076 buffer[len] = 0;
2077 return(buffer);
2078 }
2079 }
2080 if (len == 0)
2081 return(NULL);
2082 return(xmlStrndup(buf, len));
2083}
2084
2085/**
2086 * xmlParseEntityValue:
2087 * @ctxt: an XML parser context
2088 * @orig: if non-NULL store a copy of the original entity value
2089 *
2090 * parse a value for ENTITY declarations
2091 *
2092 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2093 * "'" ([^%&'] | PEReference | Reference)* "'"
2094 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002095 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002096 */
2097
2098xmlChar *
2099xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2100 xmlChar *buf = NULL;
2101 int len = 0;
2102 int size = XML_PARSER_BUFFER_SIZE;
2103 int c, l;
2104 xmlChar stop;
2105 xmlChar *ret = NULL;
2106 const xmlChar *cur = NULL;
2107 xmlParserInputPtr input;
2108
2109 if (RAW == '"') stop = '"';
2110 else if (RAW == '\'') stop = '\'';
2111 else {
2112 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2114 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2115 ctxt->wellFormed = 0;
2116 ctxt->disableSAX = 1;
2117 return(NULL);
2118 }
2119 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2120 if (buf == NULL) {
2121 xmlGenericError(xmlGenericErrorContext,
2122 "malloc of %d byte failed\n", size);
2123 return(NULL);
2124 }
2125
2126 /*
2127 * The content of the entity definition is copied in a buffer.
2128 */
2129
2130 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2131 input = ctxt->input;
2132 GROW;
2133 NEXT;
2134 c = CUR_CHAR(l);
2135 /*
2136 * NOTE: 4.4.5 Included in Literal
2137 * When a parameter entity reference appears in a literal entity
2138 * value, ... a single or double quote character in the replacement
2139 * text is always treated as a normal data character and will not
2140 * terminate the literal.
2141 * In practice it means we stop the loop only when back at parsing
2142 * the initial entity and the quote is found
2143 */
2144 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2145 (ctxt->input != input))) {
2146 if (len + 5 >= size) {
2147 size *= 2;
2148 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2149 if (buf == NULL) {
2150 xmlGenericError(xmlGenericErrorContext,
2151 "realloc of %d byte failed\n", size);
2152 return(NULL);
2153 }
2154 }
2155 COPY_BUF(l,buf,len,c);
2156 NEXTL(l);
2157 /*
2158 * Pop-up of finished entities.
2159 */
2160 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2161 xmlPopInput(ctxt);
2162
2163 GROW;
2164 c = CUR_CHAR(l);
2165 if (c == 0) {
2166 GROW;
2167 c = CUR_CHAR(l);
2168 }
2169 }
2170 buf[len] = 0;
2171
2172 /*
2173 * Raise problem w.r.t. '&' and '%' being used in non-entities
2174 * reference constructs. Note Charref will be handled in
2175 * xmlStringDecodeEntities()
2176 */
2177 cur = buf;
2178 while (*cur != 0) { /* non input consuming */
2179 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2180 xmlChar *name;
2181 xmlChar tmp = *cur;
2182
2183 cur++;
2184 name = xmlParseStringName(ctxt, &cur);
2185 if ((name == NULL) || (*cur != ';')) {
2186 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2188 ctxt->sax->error(ctxt->userData,
2189 "EntityValue: '%c' forbidden except for entities references\n",
2190 tmp);
2191 ctxt->wellFormed = 0;
2192 ctxt->disableSAX = 1;
2193 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002194 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2195 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002196 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2198 ctxt->sax->error(ctxt->userData,
2199 "EntityValue: PEReferences forbidden in internal subset\n",
2200 tmp);
2201 ctxt->wellFormed = 0;
2202 ctxt->disableSAX = 1;
2203 }
2204 if (name != NULL)
2205 xmlFree(name);
2206 }
2207 cur++;
2208 }
2209
2210 /*
2211 * Then PEReference entities are substituted.
2212 */
2213 if (c != stop) {
2214 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2216 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2217 ctxt->wellFormed = 0;
2218 ctxt->disableSAX = 1;
2219 xmlFree(buf);
2220 } else {
2221 NEXT;
2222 /*
2223 * NOTE: 4.4.7 Bypassed
2224 * When a general entity reference appears in the EntityValue in
2225 * an entity declaration, it is bypassed and left as is.
2226 * so XML_SUBSTITUTE_REF is not set here.
2227 */
2228 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2229 0, 0, 0);
2230 if (orig != NULL)
2231 *orig = buf;
2232 else
2233 xmlFree(buf);
2234 }
2235
2236 return(ret);
2237}
2238
2239/**
2240 * xmlParseAttValue:
2241 * @ctxt: an XML parser context
2242 *
2243 * parse a value for an attribute
2244 * Note: the parser won't do substitution of entities here, this
2245 * will be handled later in xmlStringGetNodeList
2246 *
2247 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2248 * "'" ([^<&'] | Reference)* "'"
2249 *
2250 * 3.3.3 Attribute-Value Normalization:
2251 * Before the value of an attribute is passed to the application or
2252 * checked for validity, the XML processor must normalize it as follows:
2253 * - a character reference is processed by appending the referenced
2254 * character to the attribute value
2255 * - an entity reference is processed by recursively processing the
2256 * replacement text of the entity
2257 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2258 * appending #x20 to the normalized value, except that only a single
2259 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2260 * parsed entity or the literal entity value of an internal parsed entity
2261 * - other characters are processed by appending them to the normalized value
2262 * If the declared value is not CDATA, then the XML processor must further
2263 * process the normalized attribute value by discarding any leading and
2264 * trailing space (#x20) characters, and by replacing sequences of space
2265 * (#x20) characters by a single space (#x20) character.
2266 * All attributes for which no declaration has been read should be treated
2267 * by a non-validating parser as if declared CDATA.
2268 *
2269 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2270 */
2271
2272xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002273xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2274
2275xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002276xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2277 xmlChar limit = 0;
2278 xmlChar *buf = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002279 xmlChar *in = NULL;
2280 xmlChar *ret = NULL;
2281 SHRINK;
2282 GROW;
2283 in = CUR_PTR;
2284 if (*in != '"' && *in != '\'') {
2285 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2287 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2288 ctxt->wellFormed = 0;
2289 ctxt->disableSAX = 1;
2290 return(NULL);
2291 }
2292 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2293 limit = *in;
2294 ++in;
2295
2296 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2297 *in != '&' && *in != '<'
2298 ) {
2299 ++in;
2300 }
2301 if (*in != limit) {
2302 return xmlParseAttValueComplex(ctxt);
2303 }
2304 ++in;
2305 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2306 CUR_PTR = in;
2307 return ret;
2308}
2309
2310xmlChar *
2311xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2312 xmlChar limit = 0;
2313 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002314 int len = 0;
2315 int buf_size = 0;
2316 int c, l;
2317 xmlChar *current = NULL;
2318 xmlEntityPtr ent;
2319
2320
2321 SHRINK;
2322 if (NXT(0) == '"') {
2323 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2324 limit = '"';
2325 NEXT;
2326 } else if (NXT(0) == '\'') {
2327 limit = '\'';
2328 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2329 NEXT;
2330 } else {
2331 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2333 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2334 ctxt->wellFormed = 0;
2335 ctxt->disableSAX = 1;
2336 return(NULL);
2337 }
2338
2339 /*
2340 * allocate a translation buffer.
2341 */
2342 buf_size = XML_PARSER_BUFFER_SIZE;
2343 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2344 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002345 xmlGenericError(xmlGenericErrorContext,
2346 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002347 return(NULL);
2348 }
2349
2350 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002351 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002352 */
2353 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002354 while ((NXT(0) != limit) && /* checked */
2355 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002356 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002357 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002358 if (NXT(1) == '#') {
2359 int val = xmlParseCharRef(ctxt);
2360 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002361 if (ctxt->replaceEntities) {
2362 if (len > buf_size - 10) {
2363 growBuffer(buf);
2364 }
2365 buf[len++] = '&';
2366 } else {
2367 /*
2368 * The reparsing will be done in xmlStringGetNodeList()
2369 * called by the attribute() function in SAX.c
2370 */
2371 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002372
Daniel Veillard319a7422001-09-11 09:27:09 +00002373 if (len > buf_size - 10) {
2374 growBuffer(buf);
2375 }
2376 current = &buffer[0];
2377 while (*current != 0) { /* non input consuming */
2378 buf[len++] = *current++;
2379 }
Owen Taylor3473f882001-02-23 17:55:21 +00002380 }
2381 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002382 if (len > buf_size - 10) {
2383 growBuffer(buf);
2384 }
Owen Taylor3473f882001-02-23 17:55:21 +00002385 len += xmlCopyChar(0, &buf[len], val);
2386 }
2387 } else {
2388 ent = xmlParseEntityRef(ctxt);
2389 if ((ent != NULL) &&
2390 (ctxt->replaceEntities != 0)) {
2391 xmlChar *rep;
2392
2393 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2394 rep = xmlStringDecodeEntities(ctxt, ent->content,
2395 XML_SUBSTITUTE_REF, 0, 0, 0);
2396 if (rep != NULL) {
2397 current = rep;
2398 while (*current != 0) { /* non input consuming */
2399 buf[len++] = *current++;
2400 if (len > buf_size - 10) {
2401 growBuffer(buf);
2402 }
2403 }
2404 xmlFree(rep);
2405 }
2406 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002407 if (len > buf_size - 10) {
2408 growBuffer(buf);
2409 }
Owen Taylor3473f882001-02-23 17:55:21 +00002410 if (ent->content != NULL)
2411 buf[len++] = ent->content[0];
2412 }
2413 } else if (ent != NULL) {
2414 int i = xmlStrlen(ent->name);
2415 const xmlChar *cur = ent->name;
2416
2417 /*
2418 * This may look absurd but is needed to detect
2419 * entities problems
2420 */
2421 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2422 (ent->content != NULL)) {
2423 xmlChar *rep;
2424 rep = xmlStringDecodeEntities(ctxt, ent->content,
2425 XML_SUBSTITUTE_REF, 0, 0, 0);
2426 if (rep != NULL)
2427 xmlFree(rep);
2428 }
2429
2430 /*
2431 * Just output the reference
2432 */
2433 buf[len++] = '&';
2434 if (len > buf_size - i - 10) {
2435 growBuffer(buf);
2436 }
2437 for (;i > 0;i--)
2438 buf[len++] = *cur++;
2439 buf[len++] = ';';
2440 }
2441 }
2442 } else {
2443 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2444 COPY_BUF(l,buf,len,0x20);
2445 if (len > buf_size - 10) {
2446 growBuffer(buf);
2447 }
2448 } else {
2449 COPY_BUF(l,buf,len,c);
2450 if (len > buf_size - 10) {
2451 growBuffer(buf);
2452 }
2453 }
2454 NEXTL(l);
2455 }
2456 GROW;
2457 c = CUR_CHAR(l);
2458 }
2459 buf[len++] = 0;
2460 if (RAW == '<') {
2461 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2463 ctxt->sax->error(ctxt->userData,
2464 "Unescaped '<' not allowed in attributes values\n");
2465 ctxt->wellFormed = 0;
2466 ctxt->disableSAX = 1;
2467 } else if (RAW != limit) {
2468 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2470 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2471 ctxt->wellFormed = 0;
2472 ctxt->disableSAX = 1;
2473 } else
2474 NEXT;
2475 return(buf);
2476}
2477
2478/**
2479 * xmlParseSystemLiteral:
2480 * @ctxt: an XML parser context
2481 *
2482 * parse an XML Literal
2483 *
2484 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2485 *
2486 * Returns the SystemLiteral parsed or NULL
2487 */
2488
2489xmlChar *
2490xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2491 xmlChar *buf = NULL;
2492 int len = 0;
2493 int size = XML_PARSER_BUFFER_SIZE;
2494 int cur, l;
2495 xmlChar stop;
2496 int state = ctxt->instate;
2497 int count = 0;
2498
2499 SHRINK;
2500 if (RAW == '"') {
2501 NEXT;
2502 stop = '"';
2503 } else if (RAW == '\'') {
2504 NEXT;
2505 stop = '\'';
2506 } else {
2507 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2509 ctxt->sax->error(ctxt->userData,
2510 "SystemLiteral \" or ' expected\n");
2511 ctxt->wellFormed = 0;
2512 ctxt->disableSAX = 1;
2513 return(NULL);
2514 }
2515
2516 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2517 if (buf == NULL) {
2518 xmlGenericError(xmlGenericErrorContext,
2519 "malloc of %d byte failed\n", size);
2520 return(NULL);
2521 }
2522 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2523 cur = CUR_CHAR(l);
2524 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2525 if (len + 5 >= size) {
2526 size *= 2;
2527 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2528 if (buf == NULL) {
2529 xmlGenericError(xmlGenericErrorContext,
2530 "realloc of %d byte failed\n", size);
2531 ctxt->instate = (xmlParserInputState) state;
2532 return(NULL);
2533 }
2534 }
2535 count++;
2536 if (count > 50) {
2537 GROW;
2538 count = 0;
2539 }
2540 COPY_BUF(l,buf,len,cur);
2541 NEXTL(l);
2542 cur = CUR_CHAR(l);
2543 if (cur == 0) {
2544 GROW;
2545 SHRINK;
2546 cur = CUR_CHAR(l);
2547 }
2548 }
2549 buf[len] = 0;
2550 ctxt->instate = (xmlParserInputState) state;
2551 if (!IS_CHAR(cur)) {
2552 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2554 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2555 ctxt->wellFormed = 0;
2556 ctxt->disableSAX = 1;
2557 } else {
2558 NEXT;
2559 }
2560 return(buf);
2561}
2562
2563/**
2564 * xmlParsePubidLiteral:
2565 * @ctxt: an XML parser context
2566 *
2567 * parse an XML public literal
2568 *
2569 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2570 *
2571 * Returns the PubidLiteral parsed or NULL.
2572 */
2573
2574xmlChar *
2575xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2576 xmlChar *buf = NULL;
2577 int len = 0;
2578 int size = XML_PARSER_BUFFER_SIZE;
2579 xmlChar cur;
2580 xmlChar stop;
2581 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002582 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002583
2584 SHRINK;
2585 if (RAW == '"') {
2586 NEXT;
2587 stop = '"';
2588 } else if (RAW == '\'') {
2589 NEXT;
2590 stop = '\'';
2591 } else {
2592 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2594 ctxt->sax->error(ctxt->userData,
2595 "SystemLiteral \" or ' expected\n");
2596 ctxt->wellFormed = 0;
2597 ctxt->disableSAX = 1;
2598 return(NULL);
2599 }
2600 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2601 if (buf == NULL) {
2602 xmlGenericError(xmlGenericErrorContext,
2603 "malloc of %d byte failed\n", size);
2604 return(NULL);
2605 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002606 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002607 cur = CUR;
2608 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2609 if (len + 1 >= size) {
2610 size *= 2;
2611 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2612 if (buf == NULL) {
2613 xmlGenericError(xmlGenericErrorContext,
2614 "realloc of %d byte failed\n", size);
2615 return(NULL);
2616 }
2617 }
2618 buf[len++] = cur;
2619 count++;
2620 if (count > 50) {
2621 GROW;
2622 count = 0;
2623 }
2624 NEXT;
2625 cur = CUR;
2626 if (cur == 0) {
2627 GROW;
2628 SHRINK;
2629 cur = CUR;
2630 }
2631 }
2632 buf[len] = 0;
2633 if (cur != stop) {
2634 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2636 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2637 ctxt->wellFormed = 0;
2638 ctxt->disableSAX = 1;
2639 } else {
2640 NEXT;
2641 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002642 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002643 return(buf);
2644}
2645
Daniel Veillard48b2f892001-02-25 16:11:03 +00002646void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002647/**
2648 * xmlParseCharData:
2649 * @ctxt: an XML parser context
2650 * @cdata: int indicating whether we are within a CDATA section
2651 *
2652 * parse a CharData section.
2653 * if we are within a CDATA section ']]>' marks an end of section.
2654 *
2655 * The right angle bracket (>) may be represented using the string "&gt;",
2656 * and must, for compatibility, be escaped using "&gt;" or a character
2657 * reference when it appears in the string "]]>" in content, when that
2658 * string is not marking the end of a CDATA section.
2659 *
2660 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2661 */
2662
2663void
2664xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002665 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002666 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002667 int line = ctxt->input->line;
2668 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002669
2670 SHRINK;
2671 GROW;
2672 /*
2673 * Accelerated common case where input don't need to be
2674 * modified before passing it to the handler.
2675 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002676 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002677 in = ctxt->input->cur;
2678 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002679get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002680 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2681 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002682 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002683 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002684 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002685 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002686 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002687 ctxt->input->line++;
2688 in++;
2689 }
2690 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002691 }
2692 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002693 if ((in[1] == ']') && (in[2] == '>')) {
2694 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2696 ctxt->sax->error(ctxt->userData,
2697 "Sequence ']]>' not allowed in content\n");
2698 ctxt->input->cur = in;
2699 ctxt->wellFormed = 0;
2700 ctxt->disableSAX = 1;
2701 return;
2702 }
2703 in++;
2704 goto get_more;
2705 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002706 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002707 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002708 if (IS_BLANK(*ctxt->input->cur)) {
2709 const xmlChar *tmp = ctxt->input->cur;
2710 ctxt->input->cur = in;
2711 if (areBlanks(ctxt, tmp, nbchar)) {
2712 if (ctxt->sax->ignorableWhitespace != NULL)
2713 ctxt->sax->ignorableWhitespace(ctxt->userData,
2714 tmp, nbchar);
2715 } else {
2716 if (ctxt->sax->characters != NULL)
2717 ctxt->sax->characters(ctxt->userData,
2718 tmp, nbchar);
2719 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002720 line = ctxt->input->line;
2721 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002722 } else {
2723 if (ctxt->sax->characters != NULL)
2724 ctxt->sax->characters(ctxt->userData,
2725 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002726 line = ctxt->input->line;
2727 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002728 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002729 }
2730 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002731 if (*in == 0xD) {
2732 in++;
2733 if (*in == 0xA) {
2734 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002735 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002736 ctxt->input->line++;
2737 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002738 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002739 in--;
2740 }
2741 if (*in == '<') {
2742 return;
2743 }
2744 if (*in == '&') {
2745 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002746 }
2747 SHRINK;
2748 GROW;
2749 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002750 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002751 nbchar = 0;
2752 }
Daniel Veillard50582112001-03-26 22:52:16 +00002753 ctxt->input->line = line;
2754 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002755 xmlParseCharDataComplex(ctxt, cdata);
2756}
2757
2758void
2759xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002760 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2761 int nbchar = 0;
2762 int cur, l;
2763 int count = 0;
2764
2765 SHRINK;
2766 GROW;
2767 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002768 while ((cur != '<') && /* checked */
2769 (cur != '&') &&
2770 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002771 if ((cur == ']') && (NXT(1) == ']') &&
2772 (NXT(2) == '>')) {
2773 if (cdata) break;
2774 else {
2775 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2777 ctxt->sax->error(ctxt->userData,
2778 "Sequence ']]>' not allowed in content\n");
2779 /* Should this be relaxed ??? I see a "must here */
2780 ctxt->wellFormed = 0;
2781 ctxt->disableSAX = 1;
2782 }
2783 }
2784 COPY_BUF(l,buf,nbchar,cur);
2785 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2786 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002787 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002788 */
2789 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2790 if (areBlanks(ctxt, buf, nbchar)) {
2791 if (ctxt->sax->ignorableWhitespace != NULL)
2792 ctxt->sax->ignorableWhitespace(ctxt->userData,
2793 buf, nbchar);
2794 } else {
2795 if (ctxt->sax->characters != NULL)
2796 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2797 }
2798 }
2799 nbchar = 0;
2800 }
2801 count++;
2802 if (count > 50) {
2803 GROW;
2804 count = 0;
2805 }
2806 NEXTL(l);
2807 cur = CUR_CHAR(l);
2808 }
2809 if (nbchar != 0) {
2810 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002811 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002812 */
2813 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2814 if (areBlanks(ctxt, buf, nbchar)) {
2815 if (ctxt->sax->ignorableWhitespace != NULL)
2816 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2817 } else {
2818 if (ctxt->sax->characters != NULL)
2819 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2820 }
2821 }
2822 }
2823}
2824
2825/**
2826 * xmlParseExternalID:
2827 * @ctxt: an XML parser context
2828 * @publicID: a xmlChar** receiving PubidLiteral
2829 * @strict: indicate whether we should restrict parsing to only
2830 * production [75], see NOTE below
2831 *
2832 * Parse an External ID or a Public ID
2833 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002834 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002835 * 'PUBLIC' S PubidLiteral S SystemLiteral
2836 *
2837 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2838 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2839 *
2840 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2841 *
2842 * Returns the function returns SystemLiteral and in the second
2843 * case publicID receives PubidLiteral, is strict is off
2844 * it is possible to return NULL and have publicID set.
2845 */
2846
2847xmlChar *
2848xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2849 xmlChar *URI = NULL;
2850
2851 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002852
2853 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002854 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2855 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2856 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2857 SKIP(6);
2858 if (!IS_BLANK(CUR)) {
2859 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2861 ctxt->sax->error(ctxt->userData,
2862 "Space required after 'SYSTEM'\n");
2863 ctxt->wellFormed = 0;
2864 ctxt->disableSAX = 1;
2865 }
2866 SKIP_BLANKS;
2867 URI = xmlParseSystemLiteral(ctxt);
2868 if (URI == NULL) {
2869 ctxt->errNo = XML_ERR_URI_REQUIRED;
2870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2871 ctxt->sax->error(ctxt->userData,
2872 "xmlParseExternalID: SYSTEM, no URI\n");
2873 ctxt->wellFormed = 0;
2874 ctxt->disableSAX = 1;
2875 }
2876 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2877 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2878 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2879 SKIP(6);
2880 if (!IS_BLANK(CUR)) {
2881 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2883 ctxt->sax->error(ctxt->userData,
2884 "Space required after 'PUBLIC'\n");
2885 ctxt->wellFormed = 0;
2886 ctxt->disableSAX = 1;
2887 }
2888 SKIP_BLANKS;
2889 *publicID = xmlParsePubidLiteral(ctxt);
2890 if (*publicID == NULL) {
2891 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2893 ctxt->sax->error(ctxt->userData,
2894 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2895 ctxt->wellFormed = 0;
2896 ctxt->disableSAX = 1;
2897 }
2898 if (strict) {
2899 /*
2900 * We don't handle [83] so "S SystemLiteral" is required.
2901 */
2902 if (!IS_BLANK(CUR)) {
2903 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2904 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2905 ctxt->sax->error(ctxt->userData,
2906 "Space required after the Public Identifier\n");
2907 ctxt->wellFormed = 0;
2908 ctxt->disableSAX = 1;
2909 }
2910 } else {
2911 /*
2912 * We handle [83] so we return immediately, if
2913 * "S SystemLiteral" is not detected. From a purely parsing
2914 * point of view that's a nice mess.
2915 */
2916 const xmlChar *ptr;
2917 GROW;
2918
2919 ptr = CUR_PTR;
2920 if (!IS_BLANK(*ptr)) return(NULL);
2921
2922 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2923 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2924 }
2925 SKIP_BLANKS;
2926 URI = xmlParseSystemLiteral(ctxt);
2927 if (URI == NULL) {
2928 ctxt->errNo = XML_ERR_URI_REQUIRED;
2929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2930 ctxt->sax->error(ctxt->userData,
2931 "xmlParseExternalID: PUBLIC, no URI\n");
2932 ctxt->wellFormed = 0;
2933 ctxt->disableSAX = 1;
2934 }
2935 }
2936 return(URI);
2937}
2938
2939/**
2940 * xmlParseComment:
2941 * @ctxt: an XML parser context
2942 *
2943 * Skip an XML (SGML) comment <!-- .... -->
2944 * The spec says that "For compatibility, the string "--" (double-hyphen)
2945 * must not occur within comments. "
2946 *
2947 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2948 */
2949void
2950xmlParseComment(xmlParserCtxtPtr ctxt) {
2951 xmlChar *buf = NULL;
2952 int len;
2953 int size = XML_PARSER_BUFFER_SIZE;
2954 int q, ql;
2955 int r, rl;
2956 int cur, l;
2957 xmlParserInputState state;
2958 xmlParserInputPtr input = ctxt->input;
2959 int count = 0;
2960
2961 /*
2962 * Check that there is a comment right here.
2963 */
2964 if ((RAW != '<') || (NXT(1) != '!') ||
2965 (NXT(2) != '-') || (NXT(3) != '-')) return;
2966
2967 state = ctxt->instate;
2968 ctxt->instate = XML_PARSER_COMMENT;
2969 SHRINK;
2970 SKIP(4);
2971 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2972 if (buf == NULL) {
2973 xmlGenericError(xmlGenericErrorContext,
2974 "malloc of %d byte failed\n", size);
2975 ctxt->instate = state;
2976 return;
2977 }
2978 q = CUR_CHAR(ql);
2979 NEXTL(ql);
2980 r = CUR_CHAR(rl);
2981 NEXTL(rl);
2982 cur = CUR_CHAR(l);
2983 len = 0;
2984 while (IS_CHAR(cur) && /* checked */
2985 ((cur != '>') ||
2986 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002987 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002988 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2990 ctxt->sax->error(ctxt->userData,
2991 "Comment must not contain '--' (double-hyphen)`\n");
2992 ctxt->wellFormed = 0;
2993 ctxt->disableSAX = 1;
2994 }
2995 if (len + 5 >= size) {
2996 size *= 2;
2997 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2998 if (buf == NULL) {
2999 xmlGenericError(xmlGenericErrorContext,
3000 "realloc of %d byte failed\n", size);
3001 ctxt->instate = state;
3002 return;
3003 }
3004 }
3005 COPY_BUF(ql,buf,len,q);
3006 q = r;
3007 ql = rl;
3008 r = cur;
3009 rl = l;
3010
3011 count++;
3012 if (count > 50) {
3013 GROW;
3014 count = 0;
3015 }
3016 NEXTL(l);
3017 cur = CUR_CHAR(l);
3018 if (cur == 0) {
3019 SHRINK;
3020 GROW;
3021 cur = CUR_CHAR(l);
3022 }
3023 }
3024 buf[len] = 0;
3025 if (!IS_CHAR(cur)) {
3026 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3028 ctxt->sax->error(ctxt->userData,
3029 "Comment not terminated \n<!--%.50s\n", buf);
3030 ctxt->wellFormed = 0;
3031 ctxt->disableSAX = 1;
3032 xmlFree(buf);
3033 } else {
3034 if (input != ctxt->input) {
3035 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3037 ctxt->sax->error(ctxt->userData,
3038"Comment doesn't start and stop in the same entity\n");
3039 ctxt->wellFormed = 0;
3040 ctxt->disableSAX = 1;
3041 }
3042 NEXT;
3043 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3044 (!ctxt->disableSAX))
3045 ctxt->sax->comment(ctxt->userData, buf);
3046 xmlFree(buf);
3047 }
3048 ctxt->instate = state;
3049}
3050
3051/**
3052 * xmlParsePITarget:
3053 * @ctxt: an XML parser context
3054 *
3055 * parse the name of a PI
3056 *
3057 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3058 *
3059 * Returns the PITarget name or NULL
3060 */
3061
3062xmlChar *
3063xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3064 xmlChar *name;
3065
3066 name = xmlParseName(ctxt);
3067 if ((name != NULL) &&
3068 ((name[0] == 'x') || (name[0] == 'X')) &&
3069 ((name[1] == 'm') || (name[1] == 'M')) &&
3070 ((name[2] == 'l') || (name[2] == 'L'))) {
3071 int i;
3072 if ((name[0] == 'x') && (name[1] == 'm') &&
3073 (name[2] == 'l') && (name[3] == 0)) {
3074 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3076 ctxt->sax->error(ctxt->userData,
3077 "XML declaration allowed only at the start of the document\n");
3078 ctxt->wellFormed = 0;
3079 ctxt->disableSAX = 1;
3080 return(name);
3081 } else if (name[3] == 0) {
3082 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3084 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3085 ctxt->wellFormed = 0;
3086 ctxt->disableSAX = 1;
3087 return(name);
3088 }
3089 for (i = 0;;i++) {
3090 if (xmlW3CPIs[i] == NULL) break;
3091 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3092 return(name);
3093 }
3094 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3095 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3096 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003097 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003098 }
3099 }
3100 return(name);
3101}
3102
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003103#ifdef LIBXML_CATALOG_ENABLED
3104/**
3105 * xmlParseCatalogPI:
3106 * @ctxt: an XML parser context
3107 * @catalog: the PI value string
3108 *
3109 * parse an XML Catalog Processing Instruction.
3110 *
3111 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3112 *
3113 * Occurs only if allowed by the user and if happening in the Misc
3114 * part of the document before any doctype informations
3115 * This will add the given catalog to the parsing context in order
3116 * to be used if there is a resolution need further down in the document
3117 */
3118
3119static void
3120xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3121 xmlChar *URL = NULL;
3122 const xmlChar *tmp, *base;
3123 xmlChar marker;
3124
3125 tmp = catalog;
3126 while (IS_BLANK(*tmp)) tmp++;
3127 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3128 goto error;
3129 tmp += 7;
3130 while (IS_BLANK(*tmp)) tmp++;
3131 if (*tmp != '=') {
3132 return;
3133 }
3134 tmp++;
3135 while (IS_BLANK(*tmp)) tmp++;
3136 marker = *tmp;
3137 if ((marker != '\'') && (marker != '"'))
3138 goto error;
3139 tmp++;
3140 base = tmp;
3141 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3142 if (*tmp == 0)
3143 goto error;
3144 URL = xmlStrndup(base, tmp - base);
3145 tmp++;
3146 while (IS_BLANK(*tmp)) tmp++;
3147 if (*tmp != 0)
3148 goto error;
3149
3150 if (URL != NULL) {
3151 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3152 xmlFree(URL);
3153 }
3154 return;
3155
3156error:
3157 ctxt->errNo = XML_WAR_CATALOG_PI;
3158 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3159 ctxt->sax->warning(ctxt->userData,
3160 "Catalog PI syntax error: %s\n", catalog);
3161 if (URL != NULL)
3162 xmlFree(URL);
3163}
3164#endif
3165
Owen Taylor3473f882001-02-23 17:55:21 +00003166/**
3167 * xmlParsePI:
3168 * @ctxt: an XML parser context
3169 *
3170 * parse an XML Processing Instruction.
3171 *
3172 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3173 *
3174 * The processing is transfered to SAX once parsed.
3175 */
3176
3177void
3178xmlParsePI(xmlParserCtxtPtr ctxt) {
3179 xmlChar *buf = NULL;
3180 int len = 0;
3181 int size = XML_PARSER_BUFFER_SIZE;
3182 int cur, l;
3183 xmlChar *target;
3184 xmlParserInputState state;
3185 int count = 0;
3186
3187 if ((RAW == '<') && (NXT(1) == '?')) {
3188 xmlParserInputPtr input = ctxt->input;
3189 state = ctxt->instate;
3190 ctxt->instate = XML_PARSER_PI;
3191 /*
3192 * this is a Processing Instruction.
3193 */
3194 SKIP(2);
3195 SHRINK;
3196
3197 /*
3198 * Parse the target name and check for special support like
3199 * namespace.
3200 */
3201 target = xmlParsePITarget(ctxt);
3202 if (target != NULL) {
3203 if ((RAW == '?') && (NXT(1) == '>')) {
3204 if (input != ctxt->input) {
3205 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3207 ctxt->sax->error(ctxt->userData,
3208 "PI declaration doesn't start and stop in the same entity\n");
3209 ctxt->wellFormed = 0;
3210 ctxt->disableSAX = 1;
3211 }
3212 SKIP(2);
3213
3214 /*
3215 * SAX: PI detected.
3216 */
3217 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3218 (ctxt->sax->processingInstruction != NULL))
3219 ctxt->sax->processingInstruction(ctxt->userData,
3220 target, NULL);
3221 ctxt->instate = state;
3222 xmlFree(target);
3223 return;
3224 }
3225 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3226 if (buf == NULL) {
3227 xmlGenericError(xmlGenericErrorContext,
3228 "malloc of %d byte failed\n", size);
3229 ctxt->instate = state;
3230 return;
3231 }
3232 cur = CUR;
3233 if (!IS_BLANK(cur)) {
3234 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3236 ctxt->sax->error(ctxt->userData,
3237 "xmlParsePI: PI %s space expected\n", target);
3238 ctxt->wellFormed = 0;
3239 ctxt->disableSAX = 1;
3240 }
3241 SKIP_BLANKS;
3242 cur = CUR_CHAR(l);
3243 while (IS_CHAR(cur) && /* checked */
3244 ((cur != '?') || (NXT(1) != '>'))) {
3245 if (len + 5 >= size) {
3246 size *= 2;
3247 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3248 if (buf == NULL) {
3249 xmlGenericError(xmlGenericErrorContext,
3250 "realloc of %d byte failed\n", size);
3251 ctxt->instate = state;
3252 return;
3253 }
3254 }
3255 count++;
3256 if (count > 50) {
3257 GROW;
3258 count = 0;
3259 }
3260 COPY_BUF(l,buf,len,cur);
3261 NEXTL(l);
3262 cur = CUR_CHAR(l);
3263 if (cur == 0) {
3264 SHRINK;
3265 GROW;
3266 cur = CUR_CHAR(l);
3267 }
3268 }
3269 buf[len] = 0;
3270 if (cur != '?') {
3271 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3273 ctxt->sax->error(ctxt->userData,
3274 "xmlParsePI: PI %s never end ...\n", target);
3275 ctxt->wellFormed = 0;
3276 ctxt->disableSAX = 1;
3277 } else {
3278 if (input != ctxt->input) {
3279 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3281 ctxt->sax->error(ctxt->userData,
3282 "PI declaration doesn't start and stop in the same entity\n");
3283 ctxt->wellFormed = 0;
3284 ctxt->disableSAX = 1;
3285 }
3286 SKIP(2);
3287
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003288#ifdef LIBXML_CATALOG_ENABLED
3289 if (((state == XML_PARSER_MISC) ||
3290 (state == XML_PARSER_START)) &&
3291 (xmlStrEqual(target, XML_CATALOG_PI))) {
3292 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3293 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3294 (allow == XML_CATA_ALLOW_ALL))
3295 xmlParseCatalogPI(ctxt, buf);
3296 }
3297#endif
3298
3299
Owen Taylor3473f882001-02-23 17:55:21 +00003300 /*
3301 * SAX: PI detected.
3302 */
3303 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3304 (ctxt->sax->processingInstruction != NULL))
3305 ctxt->sax->processingInstruction(ctxt->userData,
3306 target, buf);
3307 }
3308 xmlFree(buf);
3309 xmlFree(target);
3310 } else {
3311 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3313 ctxt->sax->error(ctxt->userData,
3314 "xmlParsePI : no target name\n");
3315 ctxt->wellFormed = 0;
3316 ctxt->disableSAX = 1;
3317 }
3318 ctxt->instate = state;
3319 }
3320}
3321
3322/**
3323 * xmlParseNotationDecl:
3324 * @ctxt: an XML parser context
3325 *
3326 * parse a notation declaration
3327 *
3328 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3329 *
3330 * Hence there is actually 3 choices:
3331 * 'PUBLIC' S PubidLiteral
3332 * 'PUBLIC' S PubidLiteral S SystemLiteral
3333 * and 'SYSTEM' S SystemLiteral
3334 *
3335 * See the NOTE on xmlParseExternalID().
3336 */
3337
3338void
3339xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3340 xmlChar *name;
3341 xmlChar *Pubid;
3342 xmlChar *Systemid;
3343
3344 if ((RAW == '<') && (NXT(1) == '!') &&
3345 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3346 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3347 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3348 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3349 xmlParserInputPtr input = ctxt->input;
3350 SHRINK;
3351 SKIP(10);
3352 if (!IS_BLANK(CUR)) {
3353 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3355 ctxt->sax->error(ctxt->userData,
3356 "Space required after '<!NOTATION'\n");
3357 ctxt->wellFormed = 0;
3358 ctxt->disableSAX = 1;
3359 return;
3360 }
3361 SKIP_BLANKS;
3362
Daniel Veillard76d66f42001-05-16 21:05:17 +00003363 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003364 if (name == NULL) {
3365 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3367 ctxt->sax->error(ctxt->userData,
3368 "NOTATION: Name expected here\n");
3369 ctxt->wellFormed = 0;
3370 ctxt->disableSAX = 1;
3371 return;
3372 }
3373 if (!IS_BLANK(CUR)) {
3374 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3376 ctxt->sax->error(ctxt->userData,
3377 "Space required after the NOTATION name'\n");
3378 ctxt->wellFormed = 0;
3379 ctxt->disableSAX = 1;
3380 return;
3381 }
3382 SKIP_BLANKS;
3383
3384 /*
3385 * Parse the IDs.
3386 */
3387 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3388 SKIP_BLANKS;
3389
3390 if (RAW == '>') {
3391 if (input != ctxt->input) {
3392 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3394 ctxt->sax->error(ctxt->userData,
3395"Notation declaration doesn't start and stop in the same entity\n");
3396 ctxt->wellFormed = 0;
3397 ctxt->disableSAX = 1;
3398 }
3399 NEXT;
3400 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3401 (ctxt->sax->notationDecl != NULL))
3402 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3403 } else {
3404 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3406 ctxt->sax->error(ctxt->userData,
3407 "'>' required to close NOTATION declaration\n");
3408 ctxt->wellFormed = 0;
3409 ctxt->disableSAX = 1;
3410 }
3411 xmlFree(name);
3412 if (Systemid != NULL) xmlFree(Systemid);
3413 if (Pubid != NULL) xmlFree(Pubid);
3414 }
3415}
3416
3417/**
3418 * xmlParseEntityDecl:
3419 * @ctxt: an XML parser context
3420 *
3421 * parse <!ENTITY declarations
3422 *
3423 * [70] EntityDecl ::= GEDecl | PEDecl
3424 *
3425 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3426 *
3427 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3428 *
3429 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3430 *
3431 * [74] PEDef ::= EntityValue | ExternalID
3432 *
3433 * [76] NDataDecl ::= S 'NDATA' S Name
3434 *
3435 * [ VC: Notation Declared ]
3436 * The Name must match the declared name of a notation.
3437 */
3438
3439void
3440xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3441 xmlChar *name = NULL;
3442 xmlChar *value = NULL;
3443 xmlChar *URI = NULL, *literal = NULL;
3444 xmlChar *ndata = NULL;
3445 int isParameter = 0;
3446 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003447 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003448
3449 GROW;
3450 if ((RAW == '<') && (NXT(1) == '!') &&
3451 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3452 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3453 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3454 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003455 SHRINK;
3456 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003457 skipped = SKIP_BLANKS;
3458 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003459 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3461 ctxt->sax->error(ctxt->userData,
3462 "Space required after '<!ENTITY'\n");
3463 ctxt->wellFormed = 0;
3464 ctxt->disableSAX = 1;
3465 }
Owen Taylor3473f882001-02-23 17:55:21 +00003466
3467 if (RAW == '%') {
3468 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003469 skipped = SKIP_BLANKS;
3470 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003471 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3473 ctxt->sax->error(ctxt->userData,
3474 "Space required after '%'\n");
3475 ctxt->wellFormed = 0;
3476 ctxt->disableSAX = 1;
3477 }
Owen Taylor3473f882001-02-23 17:55:21 +00003478 isParameter = 1;
3479 }
3480
Daniel Veillard76d66f42001-05-16 21:05:17 +00003481 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003482 if (name == NULL) {
3483 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3485 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3486 ctxt->wellFormed = 0;
3487 ctxt->disableSAX = 1;
3488 return;
3489 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003490 skipped = SKIP_BLANKS;
3491 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003492 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3493 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3494 ctxt->sax->error(ctxt->userData,
3495 "Space required after the entity name\n");
3496 ctxt->wellFormed = 0;
3497 ctxt->disableSAX = 1;
3498 }
Owen Taylor3473f882001-02-23 17:55:21 +00003499
Daniel Veillardf5582f12002-06-11 10:08:16 +00003500 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003501 /*
3502 * handle the various case of definitions...
3503 */
3504 if (isParameter) {
3505 if ((RAW == '"') || (RAW == '\'')) {
3506 value = xmlParseEntityValue(ctxt, &orig);
3507 if (value) {
3508 if ((ctxt->sax != NULL) &&
3509 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3510 ctxt->sax->entityDecl(ctxt->userData, name,
3511 XML_INTERNAL_PARAMETER_ENTITY,
3512 NULL, NULL, value);
3513 }
3514 } else {
3515 URI = xmlParseExternalID(ctxt, &literal, 1);
3516 if ((URI == NULL) && (literal == NULL)) {
3517 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3518 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3519 ctxt->sax->error(ctxt->userData,
3520 "Entity value required\n");
3521 ctxt->wellFormed = 0;
3522 ctxt->disableSAX = 1;
3523 }
3524 if (URI) {
3525 xmlURIPtr uri;
3526
3527 uri = xmlParseURI((const char *) URI);
3528 if (uri == NULL) {
3529 ctxt->errNo = XML_ERR_INVALID_URI;
3530 if ((ctxt->sax != NULL) &&
3531 (!ctxt->disableSAX) &&
3532 (ctxt->sax->error != NULL))
3533 ctxt->sax->error(ctxt->userData,
3534 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003535 /*
3536 * This really ought to be a well formedness error
3537 * but the XML Core WG decided otherwise c.f. issue
3538 * E26 of the XML erratas.
3539 */
Owen Taylor3473f882001-02-23 17:55:21 +00003540 } else {
3541 if (uri->fragment != NULL) {
3542 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3543 if ((ctxt->sax != NULL) &&
3544 (!ctxt->disableSAX) &&
3545 (ctxt->sax->error != NULL))
3546 ctxt->sax->error(ctxt->userData,
3547 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003548 /*
3549 * Okay this is foolish to block those but not
3550 * invalid URIs.
3551 */
Owen Taylor3473f882001-02-23 17:55:21 +00003552 ctxt->wellFormed = 0;
3553 } else {
3554 if ((ctxt->sax != NULL) &&
3555 (!ctxt->disableSAX) &&
3556 (ctxt->sax->entityDecl != NULL))
3557 ctxt->sax->entityDecl(ctxt->userData, name,
3558 XML_EXTERNAL_PARAMETER_ENTITY,
3559 literal, URI, NULL);
3560 }
3561 xmlFreeURI(uri);
3562 }
3563 }
3564 }
3565 } else {
3566 if ((RAW == '"') || (RAW == '\'')) {
3567 value = xmlParseEntityValue(ctxt, &orig);
3568 if ((ctxt->sax != NULL) &&
3569 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3570 ctxt->sax->entityDecl(ctxt->userData, name,
3571 XML_INTERNAL_GENERAL_ENTITY,
3572 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003573 /*
3574 * For expat compatibility in SAX mode.
3575 */
3576 if ((ctxt->myDoc == NULL) ||
3577 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3578 if (ctxt->myDoc == NULL) {
3579 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3580 }
3581 if (ctxt->myDoc->intSubset == NULL)
3582 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3583 BAD_CAST "fake", NULL, NULL);
3584
3585 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3586 NULL, NULL, value);
3587 }
Owen Taylor3473f882001-02-23 17:55:21 +00003588 } else {
3589 URI = xmlParseExternalID(ctxt, &literal, 1);
3590 if ((URI == NULL) && (literal == NULL)) {
3591 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3593 ctxt->sax->error(ctxt->userData,
3594 "Entity value required\n");
3595 ctxt->wellFormed = 0;
3596 ctxt->disableSAX = 1;
3597 }
3598 if (URI) {
3599 xmlURIPtr uri;
3600
3601 uri = xmlParseURI((const char *)URI);
3602 if (uri == NULL) {
3603 ctxt->errNo = XML_ERR_INVALID_URI;
3604 if ((ctxt->sax != NULL) &&
3605 (!ctxt->disableSAX) &&
3606 (ctxt->sax->error != NULL))
3607 ctxt->sax->error(ctxt->userData,
3608 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003609 /*
3610 * This really ought to be a well formedness error
3611 * but the XML Core WG decided otherwise c.f. issue
3612 * E26 of the XML erratas.
3613 */
Owen Taylor3473f882001-02-23 17:55:21 +00003614 } else {
3615 if (uri->fragment != NULL) {
3616 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3617 if ((ctxt->sax != NULL) &&
3618 (!ctxt->disableSAX) &&
3619 (ctxt->sax->error != NULL))
3620 ctxt->sax->error(ctxt->userData,
3621 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003622 /*
3623 * Okay this is foolish to block those but not
3624 * invalid URIs.
3625 */
Owen Taylor3473f882001-02-23 17:55:21 +00003626 ctxt->wellFormed = 0;
3627 }
3628 xmlFreeURI(uri);
3629 }
3630 }
3631 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3632 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3634 ctxt->sax->error(ctxt->userData,
3635 "Space required before 'NDATA'\n");
3636 ctxt->wellFormed = 0;
3637 ctxt->disableSAX = 1;
3638 }
3639 SKIP_BLANKS;
3640 if ((RAW == 'N') && (NXT(1) == 'D') &&
3641 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3642 (NXT(4) == 'A')) {
3643 SKIP(5);
3644 if (!IS_BLANK(CUR)) {
3645 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3647 ctxt->sax->error(ctxt->userData,
3648 "Space required after 'NDATA'\n");
3649 ctxt->wellFormed = 0;
3650 ctxt->disableSAX = 1;
3651 }
3652 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003653 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003654 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3655 (ctxt->sax->unparsedEntityDecl != NULL))
3656 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3657 literal, URI, ndata);
3658 } else {
3659 if ((ctxt->sax != NULL) &&
3660 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3661 ctxt->sax->entityDecl(ctxt->userData, name,
3662 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3663 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003664 /*
3665 * For expat compatibility in SAX mode.
3666 * assuming the entity repalcement was asked for
3667 */
3668 if ((ctxt->replaceEntities != 0) &&
3669 ((ctxt->myDoc == NULL) ||
3670 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3671 if (ctxt->myDoc == NULL) {
3672 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3673 }
3674
3675 if (ctxt->myDoc->intSubset == NULL)
3676 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3677 BAD_CAST "fake", NULL, NULL);
3678 entityDecl(ctxt, name,
3679 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3680 literal, URI, NULL);
3681 }
Owen Taylor3473f882001-02-23 17:55:21 +00003682 }
3683 }
3684 }
3685 SKIP_BLANKS;
3686 if (RAW != '>') {
3687 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3689 ctxt->sax->error(ctxt->userData,
3690 "xmlParseEntityDecl: entity %s not terminated\n", name);
3691 ctxt->wellFormed = 0;
3692 ctxt->disableSAX = 1;
3693 } else {
3694 if (input != ctxt->input) {
3695 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3697 ctxt->sax->error(ctxt->userData,
3698"Entity declaration doesn't start and stop in the same entity\n");
3699 ctxt->wellFormed = 0;
3700 ctxt->disableSAX = 1;
3701 }
3702 NEXT;
3703 }
3704 if (orig != NULL) {
3705 /*
3706 * Ugly mechanism to save the raw entity value.
3707 */
3708 xmlEntityPtr cur = NULL;
3709
3710 if (isParameter) {
3711 if ((ctxt->sax != NULL) &&
3712 (ctxt->sax->getParameterEntity != NULL))
3713 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3714 } else {
3715 if ((ctxt->sax != NULL) &&
3716 (ctxt->sax->getEntity != NULL))
3717 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003718 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3719 cur = getEntity(ctxt, name);
3720 }
Owen Taylor3473f882001-02-23 17:55:21 +00003721 }
3722 if (cur != NULL) {
3723 if (cur->orig != NULL)
3724 xmlFree(orig);
3725 else
3726 cur->orig = orig;
3727 } else
3728 xmlFree(orig);
3729 }
3730 if (name != NULL) xmlFree(name);
3731 if (value != NULL) xmlFree(value);
3732 if (URI != NULL) xmlFree(URI);
3733 if (literal != NULL) xmlFree(literal);
3734 if (ndata != NULL) xmlFree(ndata);
3735 }
3736}
3737
3738/**
3739 * xmlParseDefaultDecl:
3740 * @ctxt: an XML parser context
3741 * @value: Receive a possible fixed default value for the attribute
3742 *
3743 * Parse an attribute default declaration
3744 *
3745 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3746 *
3747 * [ VC: Required Attribute ]
3748 * if the default declaration is the keyword #REQUIRED, then the
3749 * attribute must be specified for all elements of the type in the
3750 * attribute-list declaration.
3751 *
3752 * [ VC: Attribute Default Legal ]
3753 * The declared default value must meet the lexical constraints of
3754 * the declared attribute type c.f. xmlValidateAttributeDecl()
3755 *
3756 * [ VC: Fixed Attribute Default ]
3757 * if an attribute has a default value declared with the #FIXED
3758 * keyword, instances of that attribute must match the default value.
3759 *
3760 * [ WFC: No < in Attribute Values ]
3761 * handled in xmlParseAttValue()
3762 *
3763 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3764 * or XML_ATTRIBUTE_FIXED.
3765 */
3766
3767int
3768xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3769 int val;
3770 xmlChar *ret;
3771
3772 *value = NULL;
3773 if ((RAW == '#') && (NXT(1) == 'R') &&
3774 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3775 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3776 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3777 (NXT(8) == 'D')) {
3778 SKIP(9);
3779 return(XML_ATTRIBUTE_REQUIRED);
3780 }
3781 if ((RAW == '#') && (NXT(1) == 'I') &&
3782 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3783 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3784 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3785 SKIP(8);
3786 return(XML_ATTRIBUTE_IMPLIED);
3787 }
3788 val = XML_ATTRIBUTE_NONE;
3789 if ((RAW == '#') && (NXT(1) == 'F') &&
3790 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3791 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3792 SKIP(6);
3793 val = XML_ATTRIBUTE_FIXED;
3794 if (!IS_BLANK(CUR)) {
3795 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3797 ctxt->sax->error(ctxt->userData,
3798 "Space required after '#FIXED'\n");
3799 ctxt->wellFormed = 0;
3800 ctxt->disableSAX = 1;
3801 }
3802 SKIP_BLANKS;
3803 }
3804 ret = xmlParseAttValue(ctxt);
3805 ctxt->instate = XML_PARSER_DTD;
3806 if (ret == NULL) {
3807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3808 ctxt->sax->error(ctxt->userData,
3809 "Attribute default value declaration error\n");
3810 ctxt->wellFormed = 0;
3811 ctxt->disableSAX = 1;
3812 } else
3813 *value = ret;
3814 return(val);
3815}
3816
3817/**
3818 * xmlParseNotationType:
3819 * @ctxt: an XML parser context
3820 *
3821 * parse an Notation attribute type.
3822 *
3823 * Note: the leading 'NOTATION' S part has already being parsed...
3824 *
3825 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3826 *
3827 * [ VC: Notation Attributes ]
3828 * Values of this type must match one of the notation names included
3829 * in the declaration; all notation names in the declaration must be declared.
3830 *
3831 * Returns: the notation attribute tree built while parsing
3832 */
3833
3834xmlEnumerationPtr
3835xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3836 xmlChar *name;
3837 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3838
3839 if (RAW != '(') {
3840 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3842 ctxt->sax->error(ctxt->userData,
3843 "'(' required to start 'NOTATION'\n");
3844 ctxt->wellFormed = 0;
3845 ctxt->disableSAX = 1;
3846 return(NULL);
3847 }
3848 SHRINK;
3849 do {
3850 NEXT;
3851 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003852 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003853 if (name == NULL) {
3854 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3856 ctxt->sax->error(ctxt->userData,
3857 "Name expected in NOTATION declaration\n");
3858 ctxt->wellFormed = 0;
3859 ctxt->disableSAX = 1;
3860 return(ret);
3861 }
3862 cur = xmlCreateEnumeration(name);
3863 xmlFree(name);
3864 if (cur == NULL) return(ret);
3865 if (last == NULL) ret = last = cur;
3866 else {
3867 last->next = cur;
3868 last = cur;
3869 }
3870 SKIP_BLANKS;
3871 } while (RAW == '|');
3872 if (RAW != ')') {
3873 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3875 ctxt->sax->error(ctxt->userData,
3876 "')' required to finish NOTATION declaration\n");
3877 ctxt->wellFormed = 0;
3878 ctxt->disableSAX = 1;
3879 if ((last != NULL) && (last != ret))
3880 xmlFreeEnumeration(last);
3881 return(ret);
3882 }
3883 NEXT;
3884 return(ret);
3885}
3886
3887/**
3888 * xmlParseEnumerationType:
3889 * @ctxt: an XML parser context
3890 *
3891 * parse an Enumeration attribute type.
3892 *
3893 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3894 *
3895 * [ VC: Enumeration ]
3896 * Values of this type must match one of the Nmtoken tokens in
3897 * the declaration
3898 *
3899 * Returns: the enumeration attribute tree built while parsing
3900 */
3901
3902xmlEnumerationPtr
3903xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3904 xmlChar *name;
3905 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3906
3907 if (RAW != '(') {
3908 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3910 ctxt->sax->error(ctxt->userData,
3911 "'(' required to start ATTLIST enumeration\n");
3912 ctxt->wellFormed = 0;
3913 ctxt->disableSAX = 1;
3914 return(NULL);
3915 }
3916 SHRINK;
3917 do {
3918 NEXT;
3919 SKIP_BLANKS;
3920 name = xmlParseNmtoken(ctxt);
3921 if (name == NULL) {
3922 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3924 ctxt->sax->error(ctxt->userData,
3925 "NmToken expected in ATTLIST enumeration\n");
3926 ctxt->wellFormed = 0;
3927 ctxt->disableSAX = 1;
3928 return(ret);
3929 }
3930 cur = xmlCreateEnumeration(name);
3931 xmlFree(name);
3932 if (cur == NULL) return(ret);
3933 if (last == NULL) ret = last = cur;
3934 else {
3935 last->next = cur;
3936 last = cur;
3937 }
3938 SKIP_BLANKS;
3939 } while (RAW == '|');
3940 if (RAW != ')') {
3941 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3943 ctxt->sax->error(ctxt->userData,
3944 "')' required to finish ATTLIST enumeration\n");
3945 ctxt->wellFormed = 0;
3946 ctxt->disableSAX = 1;
3947 return(ret);
3948 }
3949 NEXT;
3950 return(ret);
3951}
3952
3953/**
3954 * xmlParseEnumeratedType:
3955 * @ctxt: an XML parser context
3956 * @tree: the enumeration tree built while parsing
3957 *
3958 * parse an Enumerated attribute type.
3959 *
3960 * [57] EnumeratedType ::= NotationType | Enumeration
3961 *
3962 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3963 *
3964 *
3965 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3966 */
3967
3968int
3969xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3970 if ((RAW == 'N') && (NXT(1) == 'O') &&
3971 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3972 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3973 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3974 SKIP(8);
3975 if (!IS_BLANK(CUR)) {
3976 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3978 ctxt->sax->error(ctxt->userData,
3979 "Space required after 'NOTATION'\n");
3980 ctxt->wellFormed = 0;
3981 ctxt->disableSAX = 1;
3982 return(0);
3983 }
3984 SKIP_BLANKS;
3985 *tree = xmlParseNotationType(ctxt);
3986 if (*tree == NULL) return(0);
3987 return(XML_ATTRIBUTE_NOTATION);
3988 }
3989 *tree = xmlParseEnumerationType(ctxt);
3990 if (*tree == NULL) return(0);
3991 return(XML_ATTRIBUTE_ENUMERATION);
3992}
3993
3994/**
3995 * xmlParseAttributeType:
3996 * @ctxt: an XML parser context
3997 * @tree: the enumeration tree built while parsing
3998 *
3999 * parse the Attribute list def for an element
4000 *
4001 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4002 *
4003 * [55] StringType ::= 'CDATA'
4004 *
4005 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4006 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4007 *
4008 * Validity constraints for attribute values syntax are checked in
4009 * xmlValidateAttributeValue()
4010 *
4011 * [ VC: ID ]
4012 * Values of type ID must match the Name production. A name must not
4013 * appear more than once in an XML document as a value of this type;
4014 * i.e., ID values must uniquely identify the elements which bear them.
4015 *
4016 * [ VC: One ID per Element Type ]
4017 * No element type may have more than one ID attribute specified.
4018 *
4019 * [ VC: ID Attribute Default ]
4020 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4021 *
4022 * [ VC: IDREF ]
4023 * Values of type IDREF must match the Name production, and values
4024 * of type IDREFS must match Names; each IDREF Name must match the value
4025 * of an ID attribute on some element in the XML document; i.e. IDREF
4026 * values must match the value of some ID attribute.
4027 *
4028 * [ VC: Entity Name ]
4029 * Values of type ENTITY must match the Name production, values
4030 * of type ENTITIES must match Names; each Entity Name must match the
4031 * name of an unparsed entity declared in the DTD.
4032 *
4033 * [ VC: Name Token ]
4034 * Values of type NMTOKEN must match the Nmtoken production; values
4035 * of type NMTOKENS must match Nmtokens.
4036 *
4037 * Returns the attribute type
4038 */
4039int
4040xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4041 SHRINK;
4042 if ((RAW == 'C') && (NXT(1) == 'D') &&
4043 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4044 (NXT(4) == 'A')) {
4045 SKIP(5);
4046 return(XML_ATTRIBUTE_CDATA);
4047 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4048 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4049 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4050 SKIP(6);
4051 return(XML_ATTRIBUTE_IDREFS);
4052 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4053 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4054 (NXT(4) == 'F')) {
4055 SKIP(5);
4056 return(XML_ATTRIBUTE_IDREF);
4057 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4058 SKIP(2);
4059 return(XML_ATTRIBUTE_ID);
4060 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4061 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4062 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4063 SKIP(6);
4064 return(XML_ATTRIBUTE_ENTITY);
4065 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4066 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4067 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4068 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4069 SKIP(8);
4070 return(XML_ATTRIBUTE_ENTITIES);
4071 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4072 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4073 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4074 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4075 SKIP(8);
4076 return(XML_ATTRIBUTE_NMTOKENS);
4077 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4078 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4079 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4080 (NXT(6) == 'N')) {
4081 SKIP(7);
4082 return(XML_ATTRIBUTE_NMTOKEN);
4083 }
4084 return(xmlParseEnumeratedType(ctxt, tree));
4085}
4086
4087/**
4088 * xmlParseAttributeListDecl:
4089 * @ctxt: an XML parser context
4090 *
4091 * : parse the Attribute list def for an element
4092 *
4093 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4094 *
4095 * [53] AttDef ::= S Name S AttType S DefaultDecl
4096 *
4097 */
4098void
4099xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4100 xmlChar *elemName;
4101 xmlChar *attrName;
4102 xmlEnumerationPtr tree;
4103
4104 if ((RAW == '<') && (NXT(1) == '!') &&
4105 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4106 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4107 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4108 (NXT(8) == 'T')) {
4109 xmlParserInputPtr input = ctxt->input;
4110
4111 SKIP(9);
4112 if (!IS_BLANK(CUR)) {
4113 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4115 ctxt->sax->error(ctxt->userData,
4116 "Space required after '<!ATTLIST'\n");
4117 ctxt->wellFormed = 0;
4118 ctxt->disableSAX = 1;
4119 }
4120 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004121 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004122 if (elemName == NULL) {
4123 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4125 ctxt->sax->error(ctxt->userData,
4126 "ATTLIST: no name for Element\n");
4127 ctxt->wellFormed = 0;
4128 ctxt->disableSAX = 1;
4129 return;
4130 }
4131 SKIP_BLANKS;
4132 GROW;
4133 while (RAW != '>') {
4134 const xmlChar *check = CUR_PTR;
4135 int type;
4136 int def;
4137 xmlChar *defaultValue = NULL;
4138
4139 GROW;
4140 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004141 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004142 if (attrName == NULL) {
4143 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4145 ctxt->sax->error(ctxt->userData,
4146 "ATTLIST: no name for Attribute\n");
4147 ctxt->wellFormed = 0;
4148 ctxt->disableSAX = 1;
4149 break;
4150 }
4151 GROW;
4152 if (!IS_BLANK(CUR)) {
4153 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4154 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4155 ctxt->sax->error(ctxt->userData,
4156 "Space required after the attribute name\n");
4157 ctxt->wellFormed = 0;
4158 ctxt->disableSAX = 1;
4159 if (attrName != NULL)
4160 xmlFree(attrName);
4161 if (defaultValue != NULL)
4162 xmlFree(defaultValue);
4163 break;
4164 }
4165 SKIP_BLANKS;
4166
4167 type = xmlParseAttributeType(ctxt, &tree);
4168 if (type <= 0) {
4169 if (attrName != NULL)
4170 xmlFree(attrName);
4171 if (defaultValue != NULL)
4172 xmlFree(defaultValue);
4173 break;
4174 }
4175
4176 GROW;
4177 if (!IS_BLANK(CUR)) {
4178 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4179 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4180 ctxt->sax->error(ctxt->userData,
4181 "Space required after the attribute type\n");
4182 ctxt->wellFormed = 0;
4183 ctxt->disableSAX = 1;
4184 if (attrName != NULL)
4185 xmlFree(attrName);
4186 if (defaultValue != NULL)
4187 xmlFree(defaultValue);
4188 if (tree != NULL)
4189 xmlFreeEnumeration(tree);
4190 break;
4191 }
4192 SKIP_BLANKS;
4193
4194 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4195 if (def <= 0) {
4196 if (attrName != NULL)
4197 xmlFree(attrName);
4198 if (defaultValue != NULL)
4199 xmlFree(defaultValue);
4200 if (tree != NULL)
4201 xmlFreeEnumeration(tree);
4202 break;
4203 }
4204
4205 GROW;
4206 if (RAW != '>') {
4207 if (!IS_BLANK(CUR)) {
4208 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4209 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4210 ctxt->sax->error(ctxt->userData,
4211 "Space required after the attribute default value\n");
4212 ctxt->wellFormed = 0;
4213 ctxt->disableSAX = 1;
4214 if (attrName != NULL)
4215 xmlFree(attrName);
4216 if (defaultValue != NULL)
4217 xmlFree(defaultValue);
4218 if (tree != NULL)
4219 xmlFreeEnumeration(tree);
4220 break;
4221 }
4222 SKIP_BLANKS;
4223 }
4224 if (check == CUR_PTR) {
4225 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4227 ctxt->sax->error(ctxt->userData,
4228 "xmlParseAttributeListDecl: detected internal error\n");
4229 if (attrName != NULL)
4230 xmlFree(attrName);
4231 if (defaultValue != NULL)
4232 xmlFree(defaultValue);
4233 if (tree != NULL)
4234 xmlFreeEnumeration(tree);
4235 break;
4236 }
4237 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4238 (ctxt->sax->attributeDecl != NULL))
4239 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4240 type, def, defaultValue, tree);
4241 if (attrName != NULL)
4242 xmlFree(attrName);
4243 if (defaultValue != NULL)
4244 xmlFree(defaultValue);
4245 GROW;
4246 }
4247 if (RAW == '>') {
4248 if (input != ctxt->input) {
4249 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4251 ctxt->sax->error(ctxt->userData,
4252"Attribute list declaration doesn't start and stop in the same entity\n");
4253 ctxt->wellFormed = 0;
4254 ctxt->disableSAX = 1;
4255 }
4256 NEXT;
4257 }
4258
4259 xmlFree(elemName);
4260 }
4261}
4262
4263/**
4264 * xmlParseElementMixedContentDecl:
4265 * @ctxt: an XML parser context
4266 *
4267 * parse the declaration for a Mixed Element content
4268 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4269 *
4270 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4271 * '(' S? '#PCDATA' S? ')'
4272 *
4273 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4274 *
4275 * [ VC: No Duplicate Types ]
4276 * The same name must not appear more than once in a single
4277 * mixed-content declaration.
4278 *
4279 * returns: the list of the xmlElementContentPtr describing the element choices
4280 */
4281xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004282xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004283 xmlElementContentPtr ret = NULL, cur = NULL, n;
4284 xmlChar *elem = NULL;
4285
4286 GROW;
4287 if ((RAW == '#') && (NXT(1) == 'P') &&
4288 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4289 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4290 (NXT(6) == 'A')) {
4291 SKIP(7);
4292 SKIP_BLANKS;
4293 SHRINK;
4294 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004295 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4296 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4297 if (ctxt->vctxt.error != NULL)
4298 ctxt->vctxt.error(ctxt->vctxt.userData,
4299"Element content declaration doesn't start and stop in the same entity\n");
4300 ctxt->valid = 0;
4301 }
Owen Taylor3473f882001-02-23 17:55:21 +00004302 NEXT;
4303 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4304 if (RAW == '*') {
4305 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4306 NEXT;
4307 }
4308 return(ret);
4309 }
4310 if ((RAW == '(') || (RAW == '|')) {
4311 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4312 if (ret == NULL) return(NULL);
4313 }
4314 while (RAW == '|') {
4315 NEXT;
4316 if (elem == NULL) {
4317 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4318 if (ret == NULL) return(NULL);
4319 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004320 if (cur != NULL)
4321 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004322 cur = ret;
4323 } else {
4324 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4325 if (n == NULL) return(NULL);
4326 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004327 if (n->c1 != NULL)
4328 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004329 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004330 if (n != NULL)
4331 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004332 cur = n;
4333 xmlFree(elem);
4334 }
4335 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004336 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004337 if (elem == NULL) {
4338 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4340 ctxt->sax->error(ctxt->userData,
4341 "xmlParseElementMixedContentDecl : Name expected\n");
4342 ctxt->wellFormed = 0;
4343 ctxt->disableSAX = 1;
4344 xmlFreeElementContent(cur);
4345 return(NULL);
4346 }
4347 SKIP_BLANKS;
4348 GROW;
4349 }
4350 if ((RAW == ')') && (NXT(1) == '*')) {
4351 if (elem != NULL) {
4352 cur->c2 = xmlNewElementContent(elem,
4353 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004354 if (cur->c2 != NULL)
4355 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004356 xmlFree(elem);
4357 }
4358 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004359 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4360 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4361 if (ctxt->vctxt.error != NULL)
4362 ctxt->vctxt.error(ctxt->vctxt.userData,
4363"Element content declaration doesn't start and stop in the same entity\n");
4364 ctxt->valid = 0;
4365 }
Owen Taylor3473f882001-02-23 17:55:21 +00004366 SKIP(2);
4367 } else {
4368 if (elem != NULL) xmlFree(elem);
4369 xmlFreeElementContent(ret);
4370 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4372 ctxt->sax->error(ctxt->userData,
4373 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4374 ctxt->wellFormed = 0;
4375 ctxt->disableSAX = 1;
4376 return(NULL);
4377 }
4378
4379 } else {
4380 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4382 ctxt->sax->error(ctxt->userData,
4383 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4384 ctxt->wellFormed = 0;
4385 ctxt->disableSAX = 1;
4386 }
4387 return(ret);
4388}
4389
4390/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004391 * xmlParseElementChildrenContentD:
4392 * @ctxt: an XML parser context
4393 *
4394 * VMS version of xmlParseElementChildrenContentDecl()
4395 *
4396 * Returns the tree of xmlElementContentPtr describing the element
4397 * hierarchy.
4398 */
4399/**
Owen Taylor3473f882001-02-23 17:55:21 +00004400 * xmlParseElementChildrenContentDecl:
4401 * @ctxt: an XML parser context
4402 *
4403 * parse the declaration for a Mixed Element content
4404 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4405 *
4406 *
4407 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4408 *
4409 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4410 *
4411 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4412 *
4413 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4414 *
4415 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4416 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004417 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004418 * opening or closing parentheses in a choice, seq, or Mixed
4419 * construct is contained in the replacement text for a parameter
4420 * entity, both must be contained in the same replacement text. For
4421 * interoperability, if a parameter-entity reference appears in a
4422 * choice, seq, or Mixed construct, its replacement text should not
4423 * be empty, and neither the first nor last non-blank character of
4424 * the replacement text should be a connector (| or ,).
4425 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004426 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004427 * hierarchy.
4428 */
4429xmlElementContentPtr
4430#ifdef VMS
4431xmlParseElementChildrenContentD
4432#else
4433xmlParseElementChildrenContentDecl
4434#endif
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004435(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004436 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4437 xmlChar *elem;
4438 xmlChar type = 0;
4439
4440 SKIP_BLANKS;
4441 GROW;
4442 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004443 xmlParserInputPtr input = ctxt->input;
4444
Owen Taylor3473f882001-02-23 17:55:21 +00004445 /* Recurse on first child */
4446 NEXT;
4447 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004448 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004449 SKIP_BLANKS;
4450 GROW;
4451 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004452 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004453 if (elem == NULL) {
4454 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4456 ctxt->sax->error(ctxt->userData,
4457 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4458 ctxt->wellFormed = 0;
4459 ctxt->disableSAX = 1;
4460 return(NULL);
4461 }
4462 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4463 GROW;
4464 if (RAW == '?') {
4465 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4466 NEXT;
4467 } else if (RAW == '*') {
4468 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4469 NEXT;
4470 } else if (RAW == '+') {
4471 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4472 NEXT;
4473 } else {
4474 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4475 }
4476 xmlFree(elem);
4477 GROW;
4478 }
4479 SKIP_BLANKS;
4480 SHRINK;
4481 while (RAW != ')') {
4482 /*
4483 * Each loop we parse one separator and one element.
4484 */
4485 if (RAW == ',') {
4486 if (type == 0) type = CUR;
4487
4488 /*
4489 * Detect "Name | Name , Name" error
4490 */
4491 else if (type != CUR) {
4492 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4493 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4494 ctxt->sax->error(ctxt->userData,
4495 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4496 type);
4497 ctxt->wellFormed = 0;
4498 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004499 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004500 xmlFreeElementContent(last);
4501 if (ret != NULL)
4502 xmlFreeElementContent(ret);
4503 return(NULL);
4504 }
4505 NEXT;
4506
4507 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4508 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004509 if ((last != NULL) && (last != ret))
4510 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004511 xmlFreeElementContent(ret);
4512 return(NULL);
4513 }
4514 if (last == NULL) {
4515 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004516 if (ret != NULL)
4517 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004518 ret = cur = op;
4519 } else {
4520 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004521 if (op != NULL)
4522 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004523 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004524 if (last != NULL)
4525 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004526 cur =op;
4527 last = NULL;
4528 }
4529 } else if (RAW == '|') {
4530 if (type == 0) type = CUR;
4531
4532 /*
4533 * Detect "Name , Name | Name" error
4534 */
4535 else if (type != CUR) {
4536 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4538 ctxt->sax->error(ctxt->userData,
4539 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4540 type);
4541 ctxt->wellFormed = 0;
4542 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004543 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004544 xmlFreeElementContent(last);
4545 if (ret != NULL)
4546 xmlFreeElementContent(ret);
4547 return(NULL);
4548 }
4549 NEXT;
4550
4551 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4552 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004553 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004554 xmlFreeElementContent(last);
4555 if (ret != NULL)
4556 xmlFreeElementContent(ret);
4557 return(NULL);
4558 }
4559 if (last == NULL) {
4560 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004561 if (ret != NULL)
4562 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004563 ret = cur = op;
4564 } else {
4565 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004566 if (op != NULL)
4567 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004568 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004569 if (last != NULL)
4570 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004571 cur =op;
4572 last = NULL;
4573 }
4574 } else {
4575 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4577 ctxt->sax->error(ctxt->userData,
4578 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4579 ctxt->wellFormed = 0;
4580 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004581 if (ret != NULL)
4582 xmlFreeElementContent(ret);
4583 return(NULL);
4584 }
4585 GROW;
4586 SKIP_BLANKS;
4587 GROW;
4588 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004589 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004590 /* Recurse on second child */
4591 NEXT;
4592 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004593 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004594 SKIP_BLANKS;
4595 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004596 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004597 if (elem == NULL) {
4598 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4600 ctxt->sax->error(ctxt->userData,
4601 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4602 ctxt->wellFormed = 0;
4603 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004604 if (ret != NULL)
4605 xmlFreeElementContent(ret);
4606 return(NULL);
4607 }
4608 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4609 xmlFree(elem);
4610 if (RAW == '?') {
4611 last->ocur = XML_ELEMENT_CONTENT_OPT;
4612 NEXT;
4613 } else if (RAW == '*') {
4614 last->ocur = XML_ELEMENT_CONTENT_MULT;
4615 NEXT;
4616 } else if (RAW == '+') {
4617 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4618 NEXT;
4619 } else {
4620 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4621 }
4622 }
4623 SKIP_BLANKS;
4624 GROW;
4625 }
4626 if ((cur != NULL) && (last != NULL)) {
4627 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004628 if (last != NULL)
4629 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004630 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004631 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4632 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4633 if (ctxt->vctxt.error != NULL)
4634 ctxt->vctxt.error(ctxt->vctxt.userData,
4635"Element content declaration doesn't start and stop in the same entity\n");
4636 ctxt->valid = 0;
4637 }
Owen Taylor3473f882001-02-23 17:55:21 +00004638 NEXT;
4639 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004640 if (ret != NULL)
4641 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004642 NEXT;
4643 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004644 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004645 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004646 cur = ret;
4647 /*
4648 * Some normalization:
4649 * (a | b* | c?)* == (a | b | c)*
4650 */
4651 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4652 if ((cur->c1 != NULL) &&
4653 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4654 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4655 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4656 if ((cur->c2 != NULL) &&
4657 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4658 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4659 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4660 cur = cur->c2;
4661 }
4662 }
Owen Taylor3473f882001-02-23 17:55:21 +00004663 NEXT;
4664 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004665 if (ret != NULL) {
4666 int found = 0;
4667
Daniel Veillarde470df72001-04-18 21:41:07 +00004668 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004669 /*
4670 * Some normalization:
4671 * (a | b*)+ == (a | b)*
4672 * (a | b?)+ == (a | b)*
4673 */
4674 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4675 if ((cur->c1 != NULL) &&
4676 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4677 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4678 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4679 found = 1;
4680 }
4681 if ((cur->c2 != NULL) &&
4682 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4683 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4684 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4685 found = 1;
4686 }
4687 cur = cur->c2;
4688 }
4689 if (found)
4690 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4691 }
Owen Taylor3473f882001-02-23 17:55:21 +00004692 NEXT;
4693 }
4694 return(ret);
4695}
4696
4697/**
4698 * xmlParseElementContentDecl:
4699 * @ctxt: an XML parser context
4700 * @name: the name of the element being defined.
4701 * @result: the Element Content pointer will be stored here if any
4702 *
4703 * parse the declaration for an Element content either Mixed or Children,
4704 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4705 *
4706 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4707 *
4708 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4709 */
4710
4711int
4712xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4713 xmlElementContentPtr *result) {
4714
4715 xmlElementContentPtr tree = NULL;
4716 xmlParserInputPtr input = ctxt->input;
4717 int res;
4718
4719 *result = NULL;
4720
4721 if (RAW != '(') {
4722 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4724 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004725 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004726 ctxt->wellFormed = 0;
4727 ctxt->disableSAX = 1;
4728 return(-1);
4729 }
4730 NEXT;
4731 GROW;
4732 SKIP_BLANKS;
4733 if ((RAW == '#') && (NXT(1) == 'P') &&
4734 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4735 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4736 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004737 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004738 res = XML_ELEMENT_TYPE_MIXED;
4739 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004740 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004741 res = XML_ELEMENT_TYPE_ELEMENT;
4742 }
Owen Taylor3473f882001-02-23 17:55:21 +00004743 SKIP_BLANKS;
4744 *result = tree;
4745 return(res);
4746}
4747
4748/**
4749 * xmlParseElementDecl:
4750 * @ctxt: an XML parser context
4751 *
4752 * parse an Element declaration.
4753 *
4754 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4755 *
4756 * [ VC: Unique Element Type Declaration ]
4757 * No element type may be declared more than once
4758 *
4759 * Returns the type of the element, or -1 in case of error
4760 */
4761int
4762xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4763 xmlChar *name;
4764 int ret = -1;
4765 xmlElementContentPtr content = NULL;
4766
4767 GROW;
4768 if ((RAW == '<') && (NXT(1) == '!') &&
4769 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4770 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4771 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4772 (NXT(8) == 'T')) {
4773 xmlParserInputPtr input = ctxt->input;
4774
4775 SKIP(9);
4776 if (!IS_BLANK(CUR)) {
4777 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4779 ctxt->sax->error(ctxt->userData,
4780 "Space required after 'ELEMENT'\n");
4781 ctxt->wellFormed = 0;
4782 ctxt->disableSAX = 1;
4783 }
4784 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004785 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004786 if (name == NULL) {
4787 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4789 ctxt->sax->error(ctxt->userData,
4790 "xmlParseElementDecl: no name for Element\n");
4791 ctxt->wellFormed = 0;
4792 ctxt->disableSAX = 1;
4793 return(-1);
4794 }
4795 while ((RAW == 0) && (ctxt->inputNr > 1))
4796 xmlPopInput(ctxt);
4797 if (!IS_BLANK(CUR)) {
4798 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4800 ctxt->sax->error(ctxt->userData,
4801 "Space required after the element name\n");
4802 ctxt->wellFormed = 0;
4803 ctxt->disableSAX = 1;
4804 }
4805 SKIP_BLANKS;
4806 if ((RAW == 'E') && (NXT(1) == 'M') &&
4807 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4808 (NXT(4) == 'Y')) {
4809 SKIP(5);
4810 /*
4811 * Element must always be empty.
4812 */
4813 ret = XML_ELEMENT_TYPE_EMPTY;
4814 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4815 (NXT(2) == 'Y')) {
4816 SKIP(3);
4817 /*
4818 * Element is a generic container.
4819 */
4820 ret = XML_ELEMENT_TYPE_ANY;
4821 } else if (RAW == '(') {
4822 ret = xmlParseElementContentDecl(ctxt, name, &content);
4823 } else {
4824 /*
4825 * [ WFC: PEs in Internal Subset ] error handling.
4826 */
4827 if ((RAW == '%') && (ctxt->external == 0) &&
4828 (ctxt->inputNr == 1)) {
4829 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4831 ctxt->sax->error(ctxt->userData,
4832 "PEReference: forbidden within markup decl in internal subset\n");
4833 } else {
4834 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4836 ctxt->sax->error(ctxt->userData,
4837 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4838 }
4839 ctxt->wellFormed = 0;
4840 ctxt->disableSAX = 1;
4841 if (name != NULL) xmlFree(name);
4842 return(-1);
4843 }
4844
4845 SKIP_BLANKS;
4846 /*
4847 * Pop-up of finished entities.
4848 */
4849 while ((RAW == 0) && (ctxt->inputNr > 1))
4850 xmlPopInput(ctxt);
4851 SKIP_BLANKS;
4852
4853 if (RAW != '>') {
4854 ctxt->errNo = XML_ERR_GT_REQUIRED;
4855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4856 ctxt->sax->error(ctxt->userData,
4857 "xmlParseElementDecl: expected '>' at the end\n");
4858 ctxt->wellFormed = 0;
4859 ctxt->disableSAX = 1;
4860 } else {
4861 if (input != ctxt->input) {
4862 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4864 ctxt->sax->error(ctxt->userData,
4865"Element declaration doesn't start and stop in the same entity\n");
4866 ctxt->wellFormed = 0;
4867 ctxt->disableSAX = 1;
4868 }
4869
4870 NEXT;
4871 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4872 (ctxt->sax->elementDecl != NULL))
4873 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4874 content);
4875 }
4876 if (content != NULL) {
4877 xmlFreeElementContent(content);
4878 }
4879 if (name != NULL) {
4880 xmlFree(name);
4881 }
4882 }
4883 return(ret);
4884}
4885
4886/**
Owen Taylor3473f882001-02-23 17:55:21 +00004887 * xmlParseConditionalSections
4888 * @ctxt: an XML parser context
4889 *
4890 * [61] conditionalSect ::= includeSect | ignoreSect
4891 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4892 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4893 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4894 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4895 */
4896
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004897static void
Owen Taylor3473f882001-02-23 17:55:21 +00004898xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4899 SKIP(3);
4900 SKIP_BLANKS;
4901 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4902 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4903 (NXT(6) == 'E')) {
4904 SKIP(7);
4905 SKIP_BLANKS;
4906 if (RAW != '[') {
4907 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4909 ctxt->sax->error(ctxt->userData,
4910 "XML conditional section '[' expected\n");
4911 ctxt->wellFormed = 0;
4912 ctxt->disableSAX = 1;
4913 } else {
4914 NEXT;
4915 }
4916 if (xmlParserDebugEntities) {
4917 if ((ctxt->input != NULL) && (ctxt->input->filename))
4918 xmlGenericError(xmlGenericErrorContext,
4919 "%s(%d): ", ctxt->input->filename,
4920 ctxt->input->line);
4921 xmlGenericError(xmlGenericErrorContext,
4922 "Entering INCLUDE Conditional Section\n");
4923 }
4924
4925 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4926 (NXT(2) != '>'))) {
4927 const xmlChar *check = CUR_PTR;
4928 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00004929
4930 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4931 xmlParseConditionalSections(ctxt);
4932 } else if (IS_BLANK(CUR)) {
4933 NEXT;
4934 } else if (RAW == '%') {
4935 xmlParsePEReference(ctxt);
4936 } else
4937 xmlParseMarkupDecl(ctxt);
4938
4939 /*
4940 * Pop-up of finished entities.
4941 */
4942 while ((RAW == 0) && (ctxt->inputNr > 1))
4943 xmlPopInput(ctxt);
4944
Daniel Veillardfdc91562002-07-01 21:52:03 +00004945 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004946 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4948 ctxt->sax->error(ctxt->userData,
4949 "Content error in the external subset\n");
4950 ctxt->wellFormed = 0;
4951 ctxt->disableSAX = 1;
4952 break;
4953 }
4954 }
4955 if (xmlParserDebugEntities) {
4956 if ((ctxt->input != NULL) && (ctxt->input->filename))
4957 xmlGenericError(xmlGenericErrorContext,
4958 "%s(%d): ", ctxt->input->filename,
4959 ctxt->input->line);
4960 xmlGenericError(xmlGenericErrorContext,
4961 "Leaving INCLUDE Conditional Section\n");
4962 }
4963
4964 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4965 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4966 int state;
4967 int instate;
4968 int depth = 0;
4969
4970 SKIP(6);
4971 SKIP_BLANKS;
4972 if (RAW != '[') {
4973 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4975 ctxt->sax->error(ctxt->userData,
4976 "XML conditional section '[' expected\n");
4977 ctxt->wellFormed = 0;
4978 ctxt->disableSAX = 1;
4979 } else {
4980 NEXT;
4981 }
4982 if (xmlParserDebugEntities) {
4983 if ((ctxt->input != NULL) && (ctxt->input->filename))
4984 xmlGenericError(xmlGenericErrorContext,
4985 "%s(%d): ", ctxt->input->filename,
4986 ctxt->input->line);
4987 xmlGenericError(xmlGenericErrorContext,
4988 "Entering IGNORE Conditional Section\n");
4989 }
4990
4991 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004992 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004993 * But disable SAX event generating DTD building in the meantime
4994 */
4995 state = ctxt->disableSAX;
4996 instate = ctxt->instate;
4997 ctxt->disableSAX = 1;
4998 ctxt->instate = XML_PARSER_IGNORE;
4999
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005000 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005001 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5002 depth++;
5003 SKIP(3);
5004 continue;
5005 }
5006 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5007 if (--depth >= 0) SKIP(3);
5008 continue;
5009 }
5010 NEXT;
5011 continue;
5012 }
5013
5014 ctxt->disableSAX = state;
5015 ctxt->instate = instate;
5016
5017 if (xmlParserDebugEntities) {
5018 if ((ctxt->input != NULL) && (ctxt->input->filename))
5019 xmlGenericError(xmlGenericErrorContext,
5020 "%s(%d): ", ctxt->input->filename,
5021 ctxt->input->line);
5022 xmlGenericError(xmlGenericErrorContext,
5023 "Leaving IGNORE Conditional Section\n");
5024 }
5025
5026 } else {
5027 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5029 ctxt->sax->error(ctxt->userData,
5030 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5031 ctxt->wellFormed = 0;
5032 ctxt->disableSAX = 1;
5033 }
5034
5035 if (RAW == 0)
5036 SHRINK;
5037
5038 if (RAW == 0) {
5039 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5041 ctxt->sax->error(ctxt->userData,
5042 "XML conditional section not closed\n");
5043 ctxt->wellFormed = 0;
5044 ctxt->disableSAX = 1;
5045 } else {
5046 SKIP(3);
5047 }
5048}
5049
5050/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005051 * xmlParseMarkupDecl:
5052 * @ctxt: an XML parser context
5053 *
5054 * parse Markup declarations
5055 *
5056 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5057 * NotationDecl | PI | Comment
5058 *
5059 * [ VC: Proper Declaration/PE Nesting ]
5060 * Parameter-entity replacement text must be properly nested with
5061 * markup declarations. That is to say, if either the first character
5062 * or the last character of a markup declaration (markupdecl above) is
5063 * contained in the replacement text for a parameter-entity reference,
5064 * both must be contained in the same replacement text.
5065 *
5066 * [ WFC: PEs in Internal Subset ]
5067 * In the internal DTD subset, parameter-entity references can occur
5068 * only where markup declarations can occur, not within markup declarations.
5069 * (This does not apply to references that occur in external parameter
5070 * entities or to the external subset.)
5071 */
5072void
5073xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5074 GROW;
5075 xmlParseElementDecl(ctxt);
5076 xmlParseAttributeListDecl(ctxt);
5077 xmlParseEntityDecl(ctxt);
5078 xmlParseNotationDecl(ctxt);
5079 xmlParsePI(ctxt);
5080 xmlParseComment(ctxt);
5081 /*
5082 * This is only for internal subset. On external entities,
5083 * the replacement is done before parsing stage
5084 */
5085 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5086 xmlParsePEReference(ctxt);
5087
5088 /*
5089 * Conditional sections are allowed from entities included
5090 * by PE References in the internal subset.
5091 */
5092 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5093 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5094 xmlParseConditionalSections(ctxt);
5095 }
5096 }
5097
5098 ctxt->instate = XML_PARSER_DTD;
5099}
5100
5101/**
5102 * xmlParseTextDecl:
5103 * @ctxt: an XML parser context
5104 *
5105 * parse an XML declaration header for external entities
5106 *
5107 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5108 *
5109 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5110 */
5111
5112void
5113xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5114 xmlChar *version;
5115
5116 /*
5117 * We know that '<?xml' is here.
5118 */
5119 if ((RAW == '<') && (NXT(1) == '?') &&
5120 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5121 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5122 SKIP(5);
5123 } else {
5124 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5126 ctxt->sax->error(ctxt->userData,
5127 "Text declaration '<?xml' required\n");
5128 ctxt->wellFormed = 0;
5129 ctxt->disableSAX = 1;
5130
5131 return;
5132 }
5133
5134 if (!IS_BLANK(CUR)) {
5135 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5136 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5137 ctxt->sax->error(ctxt->userData,
5138 "Space needed after '<?xml'\n");
5139 ctxt->wellFormed = 0;
5140 ctxt->disableSAX = 1;
5141 }
5142 SKIP_BLANKS;
5143
5144 /*
5145 * We may have the VersionInfo here.
5146 */
5147 version = xmlParseVersionInfo(ctxt);
5148 if (version == NULL)
5149 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005150 else {
5151 if (!IS_BLANK(CUR)) {
5152 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5154 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5155 ctxt->wellFormed = 0;
5156 ctxt->disableSAX = 1;
5157 }
5158 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005159 ctxt->input->version = version;
5160
5161 /*
5162 * We must have the encoding declaration
5163 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005164 xmlParseEncodingDecl(ctxt);
5165 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5166 /*
5167 * The XML REC instructs us to stop parsing right here
5168 */
5169 return;
5170 }
5171
5172 SKIP_BLANKS;
5173 if ((RAW == '?') && (NXT(1) == '>')) {
5174 SKIP(2);
5175 } else if (RAW == '>') {
5176 /* Deprecated old WD ... */
5177 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5179 ctxt->sax->error(ctxt->userData,
5180 "XML declaration must end-up with '?>'\n");
5181 ctxt->wellFormed = 0;
5182 ctxt->disableSAX = 1;
5183 NEXT;
5184 } else {
5185 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5187 ctxt->sax->error(ctxt->userData,
5188 "parsing XML declaration: '?>' expected\n");
5189 ctxt->wellFormed = 0;
5190 ctxt->disableSAX = 1;
5191 MOVETO_ENDTAG(CUR_PTR);
5192 NEXT;
5193 }
5194}
5195
5196/**
Owen Taylor3473f882001-02-23 17:55:21 +00005197 * xmlParseExternalSubset:
5198 * @ctxt: an XML parser context
5199 * @ExternalID: the external identifier
5200 * @SystemID: the system identifier (or URL)
5201 *
5202 * parse Markup declarations from an external subset
5203 *
5204 * [30] extSubset ::= textDecl? extSubsetDecl
5205 *
5206 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5207 */
5208void
5209xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5210 const xmlChar *SystemID) {
5211 GROW;
5212 if ((RAW == '<') && (NXT(1) == '?') &&
5213 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5214 (NXT(4) == 'l')) {
5215 xmlParseTextDecl(ctxt);
5216 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5217 /*
5218 * The XML REC instructs us to stop parsing right here
5219 */
5220 ctxt->instate = XML_PARSER_EOF;
5221 return;
5222 }
5223 }
5224 if (ctxt->myDoc == NULL) {
5225 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5226 }
5227 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5228 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5229
5230 ctxt->instate = XML_PARSER_DTD;
5231 ctxt->external = 1;
5232 while (((RAW == '<') && (NXT(1) == '?')) ||
5233 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005234 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005235 const xmlChar *check = CUR_PTR;
5236 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005237
5238 GROW;
5239 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5240 xmlParseConditionalSections(ctxt);
5241 } else if (IS_BLANK(CUR)) {
5242 NEXT;
5243 } else if (RAW == '%') {
5244 xmlParsePEReference(ctxt);
5245 } else
5246 xmlParseMarkupDecl(ctxt);
5247
5248 /*
5249 * Pop-up of finished entities.
5250 */
5251 while ((RAW == 0) && (ctxt->inputNr > 1))
5252 xmlPopInput(ctxt);
5253
Daniel Veillardfdc91562002-07-01 21:52:03 +00005254 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005255 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5257 ctxt->sax->error(ctxt->userData,
5258 "Content error in the external subset\n");
5259 ctxt->wellFormed = 0;
5260 ctxt->disableSAX = 1;
5261 break;
5262 }
5263 }
5264
5265 if (RAW != 0) {
5266 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5268 ctxt->sax->error(ctxt->userData,
5269 "Extra content at the end of the document\n");
5270 ctxt->wellFormed = 0;
5271 ctxt->disableSAX = 1;
5272 }
5273
5274}
5275
5276/**
5277 * xmlParseReference:
5278 * @ctxt: an XML parser context
5279 *
5280 * parse and handle entity references in content, depending on the SAX
5281 * interface, this may end-up in a call to character() if this is a
5282 * CharRef, a predefined entity, if there is no reference() callback.
5283 * or if the parser was asked to switch to that mode.
5284 *
5285 * [67] Reference ::= EntityRef | CharRef
5286 */
5287void
5288xmlParseReference(xmlParserCtxtPtr ctxt) {
5289 xmlEntityPtr ent;
5290 xmlChar *val;
5291 if (RAW != '&') return;
5292
5293 if (NXT(1) == '#') {
5294 int i = 0;
5295 xmlChar out[10];
5296 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005297 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005298
5299 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5300 /*
5301 * So we are using non-UTF-8 buffers
5302 * Check that the char fit on 8bits, if not
5303 * generate a CharRef.
5304 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005305 if (value <= 0xFF) {
5306 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005307 out[1] = 0;
5308 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5309 (!ctxt->disableSAX))
5310 ctxt->sax->characters(ctxt->userData, out, 1);
5311 } else {
5312 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005313 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005314 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005315 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005316 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5317 (!ctxt->disableSAX))
5318 ctxt->sax->reference(ctxt->userData, out);
5319 }
5320 } else {
5321 /*
5322 * Just encode the value in UTF-8
5323 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005324 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005325 out[i] = 0;
5326 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5327 (!ctxt->disableSAX))
5328 ctxt->sax->characters(ctxt->userData, out, i);
5329 }
5330 } else {
5331 ent = xmlParseEntityRef(ctxt);
5332 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005333 if (!ctxt->wellFormed)
5334 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005335 if ((ent->name != NULL) &&
5336 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5337 xmlNodePtr list = NULL;
5338 int ret;
5339
5340
5341 /*
5342 * The first reference to the entity trigger a parsing phase
5343 * where the ent->children is filled with the result from
5344 * the parsing.
5345 */
5346 if (ent->children == NULL) {
5347 xmlChar *value;
5348 value = ent->content;
5349
5350 /*
5351 * Check that this entity is well formed
5352 */
5353 if ((value != NULL) &&
5354 (value[1] == 0) && (value[0] == '<') &&
5355 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5356 /*
5357 * DONE: get definite answer on this !!!
5358 * Lots of entity decls are used to declare a single
5359 * char
5360 * <!ENTITY lt "<">
5361 * Which seems to be valid since
5362 * 2.4: The ampersand character (&) and the left angle
5363 * bracket (<) may appear in their literal form only
5364 * when used ... They are also legal within the literal
5365 * entity value of an internal entity declaration;i
5366 * see "4.3.2 Well-Formed Parsed Entities".
5367 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5368 * Looking at the OASIS test suite and James Clark
5369 * tests, this is broken. However the XML REC uses
5370 * it. Is the XML REC not well-formed ????
5371 * This is a hack to avoid this problem
5372 *
5373 * ANSWER: since lt gt amp .. are already defined,
5374 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005375 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005376 * is lousy but acceptable.
5377 */
5378 list = xmlNewDocText(ctxt->myDoc, value);
5379 if (list != NULL) {
5380 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5381 (ent->children == NULL)) {
5382 ent->children = list;
5383 ent->last = list;
5384 list->parent = (xmlNodePtr) ent;
5385 } else {
5386 xmlFreeNodeList(list);
5387 }
5388 } else if (list != NULL) {
5389 xmlFreeNodeList(list);
5390 }
5391 } else {
5392 /*
5393 * 4.3.2: An internal general parsed entity is well-formed
5394 * if its replacement text matches the production labeled
5395 * content.
5396 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005397
5398 void *user_data;
5399 /*
5400 * This is a bit hackish but this seems the best
5401 * way to make sure both SAX and DOM entity support
5402 * behaves okay.
5403 */
5404 if (ctxt->userData == ctxt)
5405 user_data = NULL;
5406 else
5407 user_data = ctxt->userData;
5408
Owen Taylor3473f882001-02-23 17:55:21 +00005409 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5410 ctxt->depth++;
5411 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005412 ctxt->sax, user_data, ctxt->depth,
Owen Taylor3473f882001-02-23 17:55:21 +00005413 value, &list);
5414 ctxt->depth--;
5415 } else if (ent->etype ==
5416 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5417 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005418 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005419 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005420 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005421 ctxt->depth--;
5422 } else {
5423 ret = -1;
5424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5425 ctxt->sax->error(ctxt->userData,
5426 "Internal: invalid entity type\n");
5427 }
5428 if (ret == XML_ERR_ENTITY_LOOP) {
5429 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5431 ctxt->sax->error(ctxt->userData,
5432 "Detected entity reference loop\n");
5433 ctxt->wellFormed = 0;
5434 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005435 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005436 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005437 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5438 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005439 (ent->children == NULL)) {
5440 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005441 if (ctxt->replaceEntities) {
5442 /*
5443 * Prune it directly in the generated document
5444 * except for single text nodes.
5445 */
5446 if ((list->type == XML_TEXT_NODE) &&
5447 (list->next == NULL)) {
5448 list->parent = (xmlNodePtr) ent;
5449 list = NULL;
5450 } else {
5451 while (list != NULL) {
5452 list->parent = (xmlNodePtr) ctxt->node;
5453 if (list->next == NULL)
5454 ent->last = list;
5455 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005456 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005457 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005458 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5459 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005460 }
5461 } else {
5462 while (list != NULL) {
5463 list->parent = (xmlNodePtr) ent;
5464 if (list->next == NULL)
5465 ent->last = list;
5466 list = list->next;
5467 }
Owen Taylor3473f882001-02-23 17:55:21 +00005468 }
5469 } else {
5470 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005471 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005472 }
5473 } else if (ret > 0) {
5474 ctxt->errNo = ret;
5475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5476 ctxt->sax->error(ctxt->userData,
5477 "Entity value required\n");
5478 ctxt->wellFormed = 0;
5479 ctxt->disableSAX = 1;
5480 } else if (list != NULL) {
5481 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005482 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005483 }
5484 }
5485 }
5486 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5487 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5488 /*
5489 * Create a node.
5490 */
5491 ctxt->sax->reference(ctxt->userData, ent->name);
5492 return;
5493 } else if (ctxt->replaceEntities) {
5494 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5495 /*
5496 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005497 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005498 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005499 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005500 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005501 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005502 cur = ent->children;
5503 while (cur != NULL) {
5504 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005505 if (firstChild == NULL){
5506 firstChild = new;
5507 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005508 xmlAddChild(ctxt->node, new);
5509 if (cur == ent->last)
5510 break;
5511 cur = cur->next;
5512 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005513 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5514 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005515 } else {
5516 /*
5517 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005518 * node with a possible previous text one which
5519 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005520 */
5521 if (ent->children->type == XML_TEXT_NODE)
5522 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5523 if ((ent->last != ent->children) &&
5524 (ent->last->type == XML_TEXT_NODE))
5525 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5526 xmlAddChildList(ctxt->node, ent->children);
5527 }
5528
Owen Taylor3473f882001-02-23 17:55:21 +00005529 /*
5530 * This is to avoid a nasty side effect, see
5531 * characters() in SAX.c
5532 */
5533 ctxt->nodemem = 0;
5534 ctxt->nodelen = 0;
5535 return;
5536 } else {
5537 /*
5538 * Probably running in SAX mode
5539 */
5540 xmlParserInputPtr input;
5541
5542 input = xmlNewEntityInputStream(ctxt, ent);
5543 xmlPushInput(ctxt, input);
5544 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5545 (RAW == '<') && (NXT(1) == '?') &&
5546 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5547 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5548 xmlParseTextDecl(ctxt);
5549 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5550 /*
5551 * The XML REC instructs us to stop parsing right here
5552 */
5553 ctxt->instate = XML_PARSER_EOF;
5554 return;
5555 }
5556 if (input->standalone == 1) {
5557 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5559 ctxt->sax->error(ctxt->userData,
5560 "external parsed entities cannot be standalone\n");
5561 ctxt->wellFormed = 0;
5562 ctxt->disableSAX = 1;
5563 }
5564 }
5565 return;
5566 }
5567 }
5568 } else {
5569 val = ent->content;
5570 if (val == NULL) return;
5571 /*
5572 * inline the entity.
5573 */
5574 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5575 (!ctxt->disableSAX))
5576 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5577 }
5578 }
5579}
5580
5581/**
5582 * xmlParseEntityRef:
5583 * @ctxt: an XML parser context
5584 *
5585 * parse ENTITY references declarations
5586 *
5587 * [68] EntityRef ::= '&' Name ';'
5588 *
5589 * [ WFC: Entity Declared ]
5590 * In a document without any DTD, a document with only an internal DTD
5591 * subset which contains no parameter entity references, or a document
5592 * with "standalone='yes'", the Name given in the entity reference
5593 * must match that in an entity declaration, except that well-formed
5594 * documents need not declare any of the following entities: amp, lt,
5595 * gt, apos, quot. The declaration of a parameter entity must precede
5596 * any reference to it. Similarly, the declaration of a general entity
5597 * must precede any reference to it which appears in a default value in an
5598 * attribute-list declaration. Note that if entities are declared in the
5599 * external subset or in external parameter entities, a non-validating
5600 * processor is not obligated to read and process their declarations;
5601 * for such documents, the rule that an entity must be declared is a
5602 * well-formedness constraint only if standalone='yes'.
5603 *
5604 * [ WFC: Parsed Entity ]
5605 * An entity reference must not contain the name of an unparsed entity
5606 *
5607 * Returns the xmlEntityPtr if found, or NULL otherwise.
5608 */
5609xmlEntityPtr
5610xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5611 xmlChar *name;
5612 xmlEntityPtr ent = NULL;
5613
5614 GROW;
5615
5616 if (RAW == '&') {
5617 NEXT;
5618 name = xmlParseName(ctxt);
5619 if (name == NULL) {
5620 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5622 ctxt->sax->error(ctxt->userData,
5623 "xmlParseEntityRef: no name\n");
5624 ctxt->wellFormed = 0;
5625 ctxt->disableSAX = 1;
5626 } else {
5627 if (RAW == ';') {
5628 NEXT;
5629 /*
5630 * Ask first SAX for entity resolution, otherwise try the
5631 * predefined set.
5632 */
5633 if (ctxt->sax != NULL) {
5634 if (ctxt->sax->getEntity != NULL)
5635 ent = ctxt->sax->getEntity(ctxt->userData, name);
5636 if (ent == NULL)
5637 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005638 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5639 ent = getEntity(ctxt, name);
5640 }
Owen Taylor3473f882001-02-23 17:55:21 +00005641 }
5642 /*
5643 * [ WFC: Entity Declared ]
5644 * In a document without any DTD, a document with only an
5645 * internal DTD subset which contains no parameter entity
5646 * references, or a document with "standalone='yes'", the
5647 * Name given in the entity reference must match that in an
5648 * entity declaration, except that well-formed documents
5649 * need not declare any of the following entities: amp, lt,
5650 * gt, apos, quot.
5651 * The declaration of a parameter entity must precede any
5652 * reference to it.
5653 * Similarly, the declaration of a general entity must
5654 * precede any reference to it which appears in a default
5655 * value in an attribute-list declaration. Note that if
5656 * entities are declared in the external subset or in
5657 * external parameter entities, a non-validating processor
5658 * is not obligated to read and process their declarations;
5659 * for such documents, the rule that an entity must be
5660 * declared is a well-formedness constraint only if
5661 * standalone='yes'.
5662 */
5663 if (ent == NULL) {
5664 if ((ctxt->standalone == 1) ||
5665 ((ctxt->hasExternalSubset == 0) &&
5666 (ctxt->hasPErefs == 0))) {
5667 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5669 ctxt->sax->error(ctxt->userData,
5670 "Entity '%s' not defined\n", name);
5671 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005672 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005673 ctxt->disableSAX = 1;
5674 } else {
5675 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005677 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005678 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005679 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005680 }
5681 }
5682
5683 /*
5684 * [ WFC: Parsed Entity ]
5685 * An entity reference must not contain the name of an
5686 * unparsed entity
5687 */
5688 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5689 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5691 ctxt->sax->error(ctxt->userData,
5692 "Entity reference to unparsed entity %s\n", name);
5693 ctxt->wellFormed = 0;
5694 ctxt->disableSAX = 1;
5695 }
5696
5697 /*
5698 * [ WFC: No External Entity References ]
5699 * Attribute values cannot contain direct or indirect
5700 * entity references to external entities.
5701 */
5702 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5703 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5704 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5706 ctxt->sax->error(ctxt->userData,
5707 "Attribute references external entity '%s'\n", name);
5708 ctxt->wellFormed = 0;
5709 ctxt->disableSAX = 1;
5710 }
5711 /*
5712 * [ WFC: No < in Attribute Values ]
5713 * The replacement text of any entity referred to directly or
5714 * indirectly in an attribute value (other than "&lt;") must
5715 * not contain a <.
5716 */
5717 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5718 (ent != NULL) &&
5719 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5720 (ent->content != NULL) &&
5721 (xmlStrchr(ent->content, '<'))) {
5722 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5724 ctxt->sax->error(ctxt->userData,
5725 "'<' in entity '%s' is not allowed in attributes values\n", name);
5726 ctxt->wellFormed = 0;
5727 ctxt->disableSAX = 1;
5728 }
5729
5730 /*
5731 * Internal check, no parameter entities here ...
5732 */
5733 else {
5734 switch (ent->etype) {
5735 case XML_INTERNAL_PARAMETER_ENTITY:
5736 case XML_EXTERNAL_PARAMETER_ENTITY:
5737 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5739 ctxt->sax->error(ctxt->userData,
5740 "Attempt to reference the parameter entity '%s'\n", name);
5741 ctxt->wellFormed = 0;
5742 ctxt->disableSAX = 1;
5743 break;
5744 default:
5745 break;
5746 }
5747 }
5748
5749 /*
5750 * [ WFC: No Recursion ]
5751 * A parsed entity must not contain a recursive reference
5752 * to itself, either directly or indirectly.
5753 * Done somewhere else
5754 */
5755
5756 } else {
5757 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5759 ctxt->sax->error(ctxt->userData,
5760 "xmlParseEntityRef: expecting ';'\n");
5761 ctxt->wellFormed = 0;
5762 ctxt->disableSAX = 1;
5763 }
5764 xmlFree(name);
5765 }
5766 }
5767 return(ent);
5768}
5769
5770/**
5771 * xmlParseStringEntityRef:
5772 * @ctxt: an XML parser context
5773 * @str: a pointer to an index in the string
5774 *
5775 * parse ENTITY references declarations, but this version parses it from
5776 * a string value.
5777 *
5778 * [68] EntityRef ::= '&' Name ';'
5779 *
5780 * [ WFC: Entity Declared ]
5781 * In a document without any DTD, a document with only an internal DTD
5782 * subset which contains no parameter entity references, or a document
5783 * with "standalone='yes'", the Name given in the entity reference
5784 * must match that in an entity declaration, except that well-formed
5785 * documents need not declare any of the following entities: amp, lt,
5786 * gt, apos, quot. The declaration of a parameter entity must precede
5787 * any reference to it. Similarly, the declaration of a general entity
5788 * must precede any reference to it which appears in a default value in an
5789 * attribute-list declaration. Note that if entities are declared in the
5790 * external subset or in external parameter entities, a non-validating
5791 * processor is not obligated to read and process their declarations;
5792 * for such documents, the rule that an entity must be declared is a
5793 * well-formedness constraint only if standalone='yes'.
5794 *
5795 * [ WFC: Parsed Entity ]
5796 * An entity reference must not contain the name of an unparsed entity
5797 *
5798 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5799 * is updated to the current location in the string.
5800 */
5801xmlEntityPtr
5802xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5803 xmlChar *name;
5804 const xmlChar *ptr;
5805 xmlChar cur;
5806 xmlEntityPtr ent = NULL;
5807
5808 if ((str == NULL) || (*str == NULL))
5809 return(NULL);
5810 ptr = *str;
5811 cur = *ptr;
5812 if (cur == '&') {
5813 ptr++;
5814 cur = *ptr;
5815 name = xmlParseStringName(ctxt, &ptr);
5816 if (name == NULL) {
5817 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5819 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005820 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005821 ctxt->wellFormed = 0;
5822 ctxt->disableSAX = 1;
5823 } else {
5824 if (*ptr == ';') {
5825 ptr++;
5826 /*
5827 * Ask first SAX for entity resolution, otherwise try the
5828 * predefined set.
5829 */
5830 if (ctxt->sax != NULL) {
5831 if (ctxt->sax->getEntity != NULL)
5832 ent = ctxt->sax->getEntity(ctxt->userData, name);
5833 if (ent == NULL)
5834 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005835 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5836 ent = getEntity(ctxt, name);
5837 }
Owen Taylor3473f882001-02-23 17:55:21 +00005838 }
5839 /*
5840 * [ WFC: Entity Declared ]
5841 * In a document without any DTD, a document with only an
5842 * internal DTD subset which contains no parameter entity
5843 * references, or a document with "standalone='yes'", the
5844 * Name given in the entity reference must match that in an
5845 * entity declaration, except that well-formed documents
5846 * need not declare any of the following entities: amp, lt,
5847 * gt, apos, quot.
5848 * The declaration of a parameter entity must precede any
5849 * reference to it.
5850 * Similarly, the declaration of a general entity must
5851 * precede any reference to it which appears in a default
5852 * value in an attribute-list declaration. Note that if
5853 * entities are declared in the external subset or in
5854 * external parameter entities, a non-validating processor
5855 * is not obligated to read and process their declarations;
5856 * for such documents, the rule that an entity must be
5857 * declared is a well-formedness constraint only if
5858 * standalone='yes'.
5859 */
5860 if (ent == NULL) {
5861 if ((ctxt->standalone == 1) ||
5862 ((ctxt->hasExternalSubset == 0) &&
5863 (ctxt->hasPErefs == 0))) {
5864 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5866 ctxt->sax->error(ctxt->userData,
5867 "Entity '%s' not defined\n", name);
5868 ctxt->wellFormed = 0;
5869 ctxt->disableSAX = 1;
5870 } else {
5871 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5872 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5873 ctxt->sax->warning(ctxt->userData,
5874 "Entity '%s' not defined\n", name);
5875 }
5876 }
5877
5878 /*
5879 * [ WFC: Parsed Entity ]
5880 * An entity reference must not contain the name of an
5881 * unparsed entity
5882 */
5883 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5884 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5885 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5886 ctxt->sax->error(ctxt->userData,
5887 "Entity reference to unparsed entity %s\n", name);
5888 ctxt->wellFormed = 0;
5889 ctxt->disableSAX = 1;
5890 }
5891
5892 /*
5893 * [ WFC: No External Entity References ]
5894 * Attribute values cannot contain direct or indirect
5895 * entity references to external entities.
5896 */
5897 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5898 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5899 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5901 ctxt->sax->error(ctxt->userData,
5902 "Attribute references external entity '%s'\n", name);
5903 ctxt->wellFormed = 0;
5904 ctxt->disableSAX = 1;
5905 }
5906 /*
5907 * [ WFC: No < in Attribute Values ]
5908 * The replacement text of any entity referred to directly or
5909 * indirectly in an attribute value (other than "&lt;") must
5910 * not contain a <.
5911 */
5912 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5913 (ent != NULL) &&
5914 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5915 (ent->content != NULL) &&
5916 (xmlStrchr(ent->content, '<'))) {
5917 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5919 ctxt->sax->error(ctxt->userData,
5920 "'<' in entity '%s' is not allowed in attributes values\n", name);
5921 ctxt->wellFormed = 0;
5922 ctxt->disableSAX = 1;
5923 }
5924
5925 /*
5926 * Internal check, no parameter entities here ...
5927 */
5928 else {
5929 switch (ent->etype) {
5930 case XML_INTERNAL_PARAMETER_ENTITY:
5931 case XML_EXTERNAL_PARAMETER_ENTITY:
5932 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5934 ctxt->sax->error(ctxt->userData,
5935 "Attempt to reference the parameter entity '%s'\n", name);
5936 ctxt->wellFormed = 0;
5937 ctxt->disableSAX = 1;
5938 break;
5939 default:
5940 break;
5941 }
5942 }
5943
5944 /*
5945 * [ WFC: No Recursion ]
5946 * A parsed entity must not contain a recursive reference
5947 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005948 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005949 */
5950
5951 } else {
5952 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5954 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005955 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005956 ctxt->wellFormed = 0;
5957 ctxt->disableSAX = 1;
5958 }
5959 xmlFree(name);
5960 }
5961 }
5962 *str = ptr;
5963 return(ent);
5964}
5965
5966/**
5967 * xmlParsePEReference:
5968 * @ctxt: an XML parser context
5969 *
5970 * parse PEReference declarations
5971 * The entity content is handled directly by pushing it's content as
5972 * a new input stream.
5973 *
5974 * [69] PEReference ::= '%' Name ';'
5975 *
5976 * [ WFC: No Recursion ]
5977 * A parsed entity must not contain a recursive
5978 * reference to itself, either directly or indirectly.
5979 *
5980 * [ WFC: Entity Declared ]
5981 * In a document without any DTD, a document with only an internal DTD
5982 * subset which contains no parameter entity references, or a document
5983 * with "standalone='yes'", ... ... The declaration of a parameter
5984 * entity must precede any reference to it...
5985 *
5986 * [ VC: Entity Declared ]
5987 * In a document with an external subset or external parameter entities
5988 * with "standalone='no'", ... ... The declaration of a parameter entity
5989 * must precede any reference to it...
5990 *
5991 * [ WFC: In DTD ]
5992 * Parameter-entity references may only appear in the DTD.
5993 * NOTE: misleading but this is handled.
5994 */
5995void
5996xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5997 xmlChar *name;
5998 xmlEntityPtr entity = NULL;
5999 xmlParserInputPtr input;
6000
6001 if (RAW == '%') {
6002 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006003 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006004 if (name == NULL) {
6005 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6007 ctxt->sax->error(ctxt->userData,
6008 "xmlParsePEReference: no name\n");
6009 ctxt->wellFormed = 0;
6010 ctxt->disableSAX = 1;
6011 } else {
6012 if (RAW == ';') {
6013 NEXT;
6014 if ((ctxt->sax != NULL) &&
6015 (ctxt->sax->getParameterEntity != NULL))
6016 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6017 name);
6018 if (entity == NULL) {
6019 /*
6020 * [ WFC: Entity Declared ]
6021 * In a document without any DTD, a document with only an
6022 * internal DTD subset which contains no parameter entity
6023 * references, or a document with "standalone='yes'", ...
6024 * ... The declaration of a parameter entity must precede
6025 * any reference to it...
6026 */
6027 if ((ctxt->standalone == 1) ||
6028 ((ctxt->hasExternalSubset == 0) &&
6029 (ctxt->hasPErefs == 0))) {
6030 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6031 if ((!ctxt->disableSAX) &&
6032 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6033 ctxt->sax->error(ctxt->userData,
6034 "PEReference: %%%s; not found\n", name);
6035 ctxt->wellFormed = 0;
6036 ctxt->disableSAX = 1;
6037 } else {
6038 /*
6039 * [ VC: Entity Declared ]
6040 * In a document with an external subset or external
6041 * parameter entities with "standalone='no'", ...
6042 * ... The declaration of a parameter entity must precede
6043 * any reference to it...
6044 */
6045 if ((!ctxt->disableSAX) &&
6046 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6047 ctxt->sax->warning(ctxt->userData,
6048 "PEReference: %%%s; not found\n", name);
6049 ctxt->valid = 0;
6050 }
6051 } else {
6052 /*
6053 * Internal checking in case the entity quest barfed
6054 */
6055 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6056 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6057 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6058 ctxt->sax->warning(ctxt->userData,
6059 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006060 } else if (ctxt->input->free != deallocblankswrapper) {
6061 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6062 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006063 } else {
6064 /*
6065 * TODO !!!
6066 * handle the extra spaces added before and after
6067 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6068 */
6069 input = xmlNewEntityInputStream(ctxt, entity);
6070 xmlPushInput(ctxt, input);
6071 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6072 (RAW == '<') && (NXT(1) == '?') &&
6073 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6074 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6075 xmlParseTextDecl(ctxt);
6076 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6077 /*
6078 * The XML REC instructs us to stop parsing
6079 * right here
6080 */
6081 ctxt->instate = XML_PARSER_EOF;
6082 xmlFree(name);
6083 return;
6084 }
6085 }
Owen Taylor3473f882001-02-23 17:55:21 +00006086 }
6087 }
6088 ctxt->hasPErefs = 1;
6089 } else {
6090 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6091 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6092 ctxt->sax->error(ctxt->userData,
6093 "xmlParsePEReference: expecting ';'\n");
6094 ctxt->wellFormed = 0;
6095 ctxt->disableSAX = 1;
6096 }
6097 xmlFree(name);
6098 }
6099 }
6100}
6101
6102/**
6103 * xmlParseStringPEReference:
6104 * @ctxt: an XML parser context
6105 * @str: a pointer to an index in the string
6106 *
6107 * parse PEReference declarations
6108 *
6109 * [69] PEReference ::= '%' Name ';'
6110 *
6111 * [ WFC: No Recursion ]
6112 * A parsed entity must not contain a recursive
6113 * reference to itself, either directly or indirectly.
6114 *
6115 * [ WFC: Entity Declared ]
6116 * In a document without any DTD, a document with only an internal DTD
6117 * subset which contains no parameter entity references, or a document
6118 * with "standalone='yes'", ... ... The declaration of a parameter
6119 * entity must precede any reference to it...
6120 *
6121 * [ VC: Entity Declared ]
6122 * In a document with an external subset or external parameter entities
6123 * with "standalone='no'", ... ... The declaration of a parameter entity
6124 * must precede any reference to it...
6125 *
6126 * [ WFC: In DTD ]
6127 * Parameter-entity references may only appear in the DTD.
6128 * NOTE: misleading but this is handled.
6129 *
6130 * Returns the string of the entity content.
6131 * str is updated to the current value of the index
6132 */
6133xmlEntityPtr
6134xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6135 const xmlChar *ptr;
6136 xmlChar cur;
6137 xmlChar *name;
6138 xmlEntityPtr entity = NULL;
6139
6140 if ((str == NULL) || (*str == NULL)) return(NULL);
6141 ptr = *str;
6142 cur = *ptr;
6143 if (cur == '%') {
6144 ptr++;
6145 cur = *ptr;
6146 name = xmlParseStringName(ctxt, &ptr);
6147 if (name == NULL) {
6148 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6150 ctxt->sax->error(ctxt->userData,
6151 "xmlParseStringPEReference: no name\n");
6152 ctxt->wellFormed = 0;
6153 ctxt->disableSAX = 1;
6154 } else {
6155 cur = *ptr;
6156 if (cur == ';') {
6157 ptr++;
6158 cur = *ptr;
6159 if ((ctxt->sax != NULL) &&
6160 (ctxt->sax->getParameterEntity != NULL))
6161 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6162 name);
6163 if (entity == NULL) {
6164 /*
6165 * [ WFC: Entity Declared ]
6166 * In a document without any DTD, a document with only an
6167 * internal DTD subset which contains no parameter entity
6168 * references, or a document with "standalone='yes'", ...
6169 * ... The declaration of a parameter entity must precede
6170 * any reference to it...
6171 */
6172 if ((ctxt->standalone == 1) ||
6173 ((ctxt->hasExternalSubset == 0) &&
6174 (ctxt->hasPErefs == 0))) {
6175 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6177 ctxt->sax->error(ctxt->userData,
6178 "PEReference: %%%s; not found\n", name);
6179 ctxt->wellFormed = 0;
6180 ctxt->disableSAX = 1;
6181 } else {
6182 /*
6183 * [ VC: Entity Declared ]
6184 * In a document with an external subset or external
6185 * parameter entities with "standalone='no'", ...
6186 * ... The declaration of a parameter entity must
6187 * precede any reference to it...
6188 */
6189 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6190 ctxt->sax->warning(ctxt->userData,
6191 "PEReference: %%%s; not found\n", name);
6192 ctxt->valid = 0;
6193 }
6194 } else {
6195 /*
6196 * Internal checking in case the entity quest barfed
6197 */
6198 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6199 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6200 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6201 ctxt->sax->warning(ctxt->userData,
6202 "Internal: %%%s; is not a parameter entity\n", name);
6203 }
6204 }
6205 ctxt->hasPErefs = 1;
6206 } else {
6207 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6209 ctxt->sax->error(ctxt->userData,
6210 "xmlParseStringPEReference: expecting ';'\n");
6211 ctxt->wellFormed = 0;
6212 ctxt->disableSAX = 1;
6213 }
6214 xmlFree(name);
6215 }
6216 }
6217 *str = ptr;
6218 return(entity);
6219}
6220
6221/**
6222 * xmlParseDocTypeDecl:
6223 * @ctxt: an XML parser context
6224 *
6225 * parse a DOCTYPE declaration
6226 *
6227 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6228 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6229 *
6230 * [ VC: Root Element Type ]
6231 * The Name in the document type declaration must match the element
6232 * type of the root element.
6233 */
6234
6235void
6236xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6237 xmlChar *name = NULL;
6238 xmlChar *ExternalID = NULL;
6239 xmlChar *URI = NULL;
6240
6241 /*
6242 * We know that '<!DOCTYPE' has been detected.
6243 */
6244 SKIP(9);
6245
6246 SKIP_BLANKS;
6247
6248 /*
6249 * Parse the DOCTYPE name.
6250 */
6251 name = xmlParseName(ctxt);
6252 if (name == NULL) {
6253 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6254 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6255 ctxt->sax->error(ctxt->userData,
6256 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6257 ctxt->wellFormed = 0;
6258 ctxt->disableSAX = 1;
6259 }
6260 ctxt->intSubName = name;
6261
6262 SKIP_BLANKS;
6263
6264 /*
6265 * Check for SystemID and ExternalID
6266 */
6267 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6268
6269 if ((URI != NULL) || (ExternalID != NULL)) {
6270 ctxt->hasExternalSubset = 1;
6271 }
6272 ctxt->extSubURI = URI;
6273 ctxt->extSubSystem = ExternalID;
6274
6275 SKIP_BLANKS;
6276
6277 /*
6278 * Create and update the internal subset.
6279 */
6280 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6281 (!ctxt->disableSAX))
6282 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6283
6284 /*
6285 * Is there any internal subset declarations ?
6286 * they are handled separately in xmlParseInternalSubset()
6287 */
6288 if (RAW == '[')
6289 return;
6290
6291 /*
6292 * We should be at the end of the DOCTYPE declaration.
6293 */
6294 if (RAW != '>') {
6295 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6296 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006297 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006298 ctxt->wellFormed = 0;
6299 ctxt->disableSAX = 1;
6300 }
6301 NEXT;
6302}
6303
6304/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006305 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006306 * @ctxt: an XML parser context
6307 *
6308 * parse the internal subset declaration
6309 *
6310 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6311 */
6312
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006313static void
Owen Taylor3473f882001-02-23 17:55:21 +00006314xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6315 /*
6316 * Is there any DTD definition ?
6317 */
6318 if (RAW == '[') {
6319 ctxt->instate = XML_PARSER_DTD;
6320 NEXT;
6321 /*
6322 * Parse the succession of Markup declarations and
6323 * PEReferences.
6324 * Subsequence (markupdecl | PEReference | S)*
6325 */
6326 while (RAW != ']') {
6327 const xmlChar *check = CUR_PTR;
6328 int cons = ctxt->input->consumed;
6329
6330 SKIP_BLANKS;
6331 xmlParseMarkupDecl(ctxt);
6332 xmlParsePEReference(ctxt);
6333
6334 /*
6335 * Pop-up of finished entities.
6336 */
6337 while ((RAW == 0) && (ctxt->inputNr > 1))
6338 xmlPopInput(ctxt);
6339
6340 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6341 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6343 ctxt->sax->error(ctxt->userData,
6344 "xmlParseInternalSubset: error detected in Markup declaration\n");
6345 ctxt->wellFormed = 0;
6346 ctxt->disableSAX = 1;
6347 break;
6348 }
6349 }
6350 if (RAW == ']') {
6351 NEXT;
6352 SKIP_BLANKS;
6353 }
6354 }
6355
6356 /*
6357 * We should be at the end of the DOCTYPE declaration.
6358 */
6359 if (RAW != '>') {
6360 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006362 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006363 ctxt->wellFormed = 0;
6364 ctxt->disableSAX = 1;
6365 }
6366 NEXT;
6367}
6368
6369/**
6370 * xmlParseAttribute:
6371 * @ctxt: an XML parser context
6372 * @value: a xmlChar ** used to store the value of the attribute
6373 *
6374 * parse an attribute
6375 *
6376 * [41] Attribute ::= Name Eq AttValue
6377 *
6378 * [ WFC: No External Entity References ]
6379 * Attribute values cannot contain direct or indirect entity references
6380 * to external entities.
6381 *
6382 * [ WFC: No < in Attribute Values ]
6383 * The replacement text of any entity referred to directly or indirectly in
6384 * an attribute value (other than "&lt;") must not contain a <.
6385 *
6386 * [ VC: Attribute Value Type ]
6387 * The attribute must have been declared; the value must be of the type
6388 * declared for it.
6389 *
6390 * [25] Eq ::= S? '=' S?
6391 *
6392 * With namespace:
6393 *
6394 * [NS 11] Attribute ::= QName Eq AttValue
6395 *
6396 * Also the case QName == xmlns:??? is handled independently as a namespace
6397 * definition.
6398 *
6399 * Returns the attribute name, and the value in *value.
6400 */
6401
6402xmlChar *
6403xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6404 xmlChar *name, *val;
6405
6406 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006407 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006408 name = xmlParseName(ctxt);
6409 if (name == NULL) {
6410 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6412 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6413 ctxt->wellFormed = 0;
6414 ctxt->disableSAX = 1;
6415 return(NULL);
6416 }
6417
6418 /*
6419 * read the value
6420 */
6421 SKIP_BLANKS;
6422 if (RAW == '=') {
6423 NEXT;
6424 SKIP_BLANKS;
6425 val = xmlParseAttValue(ctxt);
6426 ctxt->instate = XML_PARSER_CONTENT;
6427 } else {
6428 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6430 ctxt->sax->error(ctxt->userData,
6431 "Specification mandate value for attribute %s\n", name);
6432 ctxt->wellFormed = 0;
6433 ctxt->disableSAX = 1;
6434 xmlFree(name);
6435 return(NULL);
6436 }
6437
6438 /*
6439 * Check that xml:lang conforms to the specification
6440 * No more registered as an error, just generate a warning now
6441 * since this was deprecated in XML second edition
6442 */
6443 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6444 if (!xmlCheckLanguageID(val)) {
6445 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6446 ctxt->sax->warning(ctxt->userData,
6447 "Malformed value for xml:lang : %s\n", val);
6448 }
6449 }
6450
6451 /*
6452 * Check that xml:space conforms to the specification
6453 */
6454 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6455 if (xmlStrEqual(val, BAD_CAST "default"))
6456 *(ctxt->space) = 0;
6457 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6458 *(ctxt->space) = 1;
6459 else {
6460 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6462 ctxt->sax->error(ctxt->userData,
6463"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6464 val);
6465 ctxt->wellFormed = 0;
6466 ctxt->disableSAX = 1;
6467 }
6468 }
6469
6470 *value = val;
6471 return(name);
6472}
6473
6474/**
6475 * xmlParseStartTag:
6476 * @ctxt: an XML parser context
6477 *
6478 * parse a start of tag either for rule element or
6479 * EmptyElement. In both case we don't parse the tag closing chars.
6480 *
6481 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6482 *
6483 * [ WFC: Unique Att Spec ]
6484 * No attribute name may appear more than once in the same start-tag or
6485 * empty-element tag.
6486 *
6487 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6488 *
6489 * [ WFC: Unique Att Spec ]
6490 * No attribute name may appear more than once in the same start-tag or
6491 * empty-element tag.
6492 *
6493 * With namespace:
6494 *
6495 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6496 *
6497 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6498 *
6499 * Returns the element name parsed
6500 */
6501
6502xmlChar *
6503xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6504 xmlChar *name;
6505 xmlChar *attname;
6506 xmlChar *attvalue;
6507 const xmlChar **atts = NULL;
6508 int nbatts = 0;
6509 int maxatts = 0;
6510 int i;
6511
6512 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006513 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006514
6515 name = xmlParseName(ctxt);
6516 if (name == NULL) {
6517 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6518 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6519 ctxt->sax->error(ctxt->userData,
6520 "xmlParseStartTag: invalid element name\n");
6521 ctxt->wellFormed = 0;
6522 ctxt->disableSAX = 1;
6523 return(NULL);
6524 }
6525
6526 /*
6527 * Now parse the attributes, it ends up with the ending
6528 *
6529 * (S Attribute)* S?
6530 */
6531 SKIP_BLANKS;
6532 GROW;
6533
Daniel Veillard21a0f912001-02-25 19:54:14 +00006534 while ((RAW != '>') &&
6535 ((RAW != '/') || (NXT(1) != '>')) &&
6536 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006537 const xmlChar *q = CUR_PTR;
6538 int cons = ctxt->input->consumed;
6539
6540 attname = xmlParseAttribute(ctxt, &attvalue);
6541 if ((attname != NULL) && (attvalue != NULL)) {
6542 /*
6543 * [ WFC: Unique Att Spec ]
6544 * No attribute name may appear more than once in the same
6545 * start-tag or empty-element tag.
6546 */
6547 for (i = 0; i < nbatts;i += 2) {
6548 if (xmlStrEqual(atts[i], attname)) {
6549 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6551 ctxt->sax->error(ctxt->userData,
6552 "Attribute %s redefined\n",
6553 attname);
6554 ctxt->wellFormed = 0;
6555 ctxt->disableSAX = 1;
6556 xmlFree(attname);
6557 xmlFree(attvalue);
6558 goto failed;
6559 }
6560 }
6561
6562 /*
6563 * Add the pair to atts
6564 */
6565 if (atts == NULL) {
6566 maxatts = 10;
6567 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6568 if (atts == NULL) {
6569 xmlGenericError(xmlGenericErrorContext,
6570 "malloc of %ld byte failed\n",
6571 maxatts * (long)sizeof(xmlChar *));
6572 return(NULL);
6573 }
6574 } else if (nbatts + 4 > maxatts) {
6575 maxatts *= 2;
6576 atts = (const xmlChar **) xmlRealloc((void *) atts,
6577 maxatts * sizeof(xmlChar *));
6578 if (atts == NULL) {
6579 xmlGenericError(xmlGenericErrorContext,
6580 "realloc of %ld byte failed\n",
6581 maxatts * (long)sizeof(xmlChar *));
6582 return(NULL);
6583 }
6584 }
6585 atts[nbatts++] = attname;
6586 atts[nbatts++] = attvalue;
6587 atts[nbatts] = NULL;
6588 atts[nbatts + 1] = NULL;
6589 } else {
6590 if (attname != NULL)
6591 xmlFree(attname);
6592 if (attvalue != NULL)
6593 xmlFree(attvalue);
6594 }
6595
6596failed:
6597
6598 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6599 break;
6600 if (!IS_BLANK(RAW)) {
6601 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6603 ctxt->sax->error(ctxt->userData,
6604 "attributes construct error\n");
6605 ctxt->wellFormed = 0;
6606 ctxt->disableSAX = 1;
6607 }
6608 SKIP_BLANKS;
6609 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6610 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6612 ctxt->sax->error(ctxt->userData,
6613 "xmlParseStartTag: problem parsing attributes\n");
6614 ctxt->wellFormed = 0;
6615 ctxt->disableSAX = 1;
6616 break;
6617 }
6618 GROW;
6619 }
6620
6621 /*
6622 * SAX: Start of Element !
6623 */
6624 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6625 (!ctxt->disableSAX))
6626 ctxt->sax->startElement(ctxt->userData, name, atts);
6627
6628 if (atts != NULL) {
6629 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6630 xmlFree((void *) atts);
6631 }
6632 return(name);
6633}
6634
6635/**
6636 * xmlParseEndTag:
6637 * @ctxt: an XML parser context
6638 *
6639 * parse an end of tag
6640 *
6641 * [42] ETag ::= '</' Name S? '>'
6642 *
6643 * With namespace
6644 *
6645 * [NS 9] ETag ::= '</' QName S? '>'
6646 */
6647
6648void
6649xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6650 xmlChar *name;
6651 xmlChar *oldname;
6652
6653 GROW;
6654 if ((RAW != '<') || (NXT(1) != '/')) {
6655 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6657 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6658 ctxt->wellFormed = 0;
6659 ctxt->disableSAX = 1;
6660 return;
6661 }
6662 SKIP(2);
6663
Daniel Veillard46de64e2002-05-29 08:21:33 +00006664 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006665
6666 /*
6667 * We should definitely be at the ending "S? '>'" part
6668 */
6669 GROW;
6670 SKIP_BLANKS;
6671 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6672 ctxt->errNo = XML_ERR_GT_REQUIRED;
6673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6674 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6675 ctxt->wellFormed = 0;
6676 ctxt->disableSAX = 1;
6677 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006678 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006679
6680 /*
6681 * [ WFC: Element Type Match ]
6682 * The Name in an element's end-tag must match the element type in the
6683 * start-tag.
6684 *
6685 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006686 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006687 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006689 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006690 ctxt->sax->error(ctxt->userData,
6691 "Opening and ending tag mismatch: %s and %s\n",
6692 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006693 xmlFree(name);
6694 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006695 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006696 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006697 }
6698
6699 }
6700 ctxt->wellFormed = 0;
6701 ctxt->disableSAX = 1;
6702 }
6703
6704 /*
6705 * SAX: End of Tag
6706 */
6707 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6708 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006709 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006710
Owen Taylor3473f882001-02-23 17:55:21 +00006711 oldname = namePop(ctxt);
6712 spacePop(ctxt);
6713 if (oldname != NULL) {
6714#ifdef DEBUG_STACK
6715 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6716#endif
6717 xmlFree(oldname);
6718 }
6719 return;
6720}
6721
6722/**
6723 * xmlParseCDSect:
6724 * @ctxt: an XML parser context
6725 *
6726 * Parse escaped pure raw content.
6727 *
6728 * [18] CDSect ::= CDStart CData CDEnd
6729 *
6730 * [19] CDStart ::= '<![CDATA['
6731 *
6732 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6733 *
6734 * [21] CDEnd ::= ']]>'
6735 */
6736void
6737xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6738 xmlChar *buf = NULL;
6739 int len = 0;
6740 int size = XML_PARSER_BUFFER_SIZE;
6741 int r, rl;
6742 int s, sl;
6743 int cur, l;
6744 int count = 0;
6745
6746 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6747 (NXT(2) == '[') && (NXT(3) == 'C') &&
6748 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6749 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6750 (NXT(8) == '[')) {
6751 SKIP(9);
6752 } else
6753 return;
6754
6755 ctxt->instate = XML_PARSER_CDATA_SECTION;
6756 r = CUR_CHAR(rl);
6757 if (!IS_CHAR(r)) {
6758 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6759 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6760 ctxt->sax->error(ctxt->userData,
6761 "CData section not finished\n");
6762 ctxt->wellFormed = 0;
6763 ctxt->disableSAX = 1;
6764 ctxt->instate = XML_PARSER_CONTENT;
6765 return;
6766 }
6767 NEXTL(rl);
6768 s = CUR_CHAR(sl);
6769 if (!IS_CHAR(s)) {
6770 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6772 ctxt->sax->error(ctxt->userData,
6773 "CData section not finished\n");
6774 ctxt->wellFormed = 0;
6775 ctxt->disableSAX = 1;
6776 ctxt->instate = XML_PARSER_CONTENT;
6777 return;
6778 }
6779 NEXTL(sl);
6780 cur = CUR_CHAR(l);
6781 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6782 if (buf == NULL) {
6783 xmlGenericError(xmlGenericErrorContext,
6784 "malloc of %d byte failed\n", size);
6785 return;
6786 }
6787 while (IS_CHAR(cur) &&
6788 ((r != ']') || (s != ']') || (cur != '>'))) {
6789 if (len + 5 >= size) {
6790 size *= 2;
6791 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6792 if (buf == NULL) {
6793 xmlGenericError(xmlGenericErrorContext,
6794 "realloc of %d byte failed\n", size);
6795 return;
6796 }
6797 }
6798 COPY_BUF(rl,buf,len,r);
6799 r = s;
6800 rl = sl;
6801 s = cur;
6802 sl = l;
6803 count++;
6804 if (count > 50) {
6805 GROW;
6806 count = 0;
6807 }
6808 NEXTL(l);
6809 cur = CUR_CHAR(l);
6810 }
6811 buf[len] = 0;
6812 ctxt->instate = XML_PARSER_CONTENT;
6813 if (cur != '>') {
6814 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6815 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6816 ctxt->sax->error(ctxt->userData,
6817 "CData section not finished\n%.50s\n", buf);
6818 ctxt->wellFormed = 0;
6819 ctxt->disableSAX = 1;
6820 xmlFree(buf);
6821 return;
6822 }
6823 NEXTL(l);
6824
6825 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006826 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006827 */
6828 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6829 if (ctxt->sax->cdataBlock != NULL)
6830 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006831 else if (ctxt->sax->characters != NULL)
6832 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006833 }
6834 xmlFree(buf);
6835}
6836
6837/**
6838 * xmlParseContent:
6839 * @ctxt: an XML parser context
6840 *
6841 * Parse a content:
6842 *
6843 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6844 */
6845
6846void
6847xmlParseContent(xmlParserCtxtPtr ctxt) {
6848 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006849 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006850 ((RAW != '<') || (NXT(1) != '/'))) {
6851 const xmlChar *test = CUR_PTR;
6852 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006853 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006854
6855 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006856 * First case : a Processing Instruction.
6857 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006858 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006859 xmlParsePI(ctxt);
6860 }
6861
6862 /*
6863 * Second case : a CDSection
6864 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006865 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006866 (NXT(2) == '[') && (NXT(3) == 'C') &&
6867 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6868 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6869 (NXT(8) == '[')) {
6870 xmlParseCDSect(ctxt);
6871 }
6872
6873 /*
6874 * Third case : a comment
6875 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006876 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006877 (NXT(2) == '-') && (NXT(3) == '-')) {
6878 xmlParseComment(ctxt);
6879 ctxt->instate = XML_PARSER_CONTENT;
6880 }
6881
6882 /*
6883 * Fourth case : a sub-element.
6884 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006885 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006886 xmlParseElement(ctxt);
6887 }
6888
6889 /*
6890 * Fifth case : a reference. If if has not been resolved,
6891 * parsing returns it's Name, create the node
6892 */
6893
Daniel Veillard21a0f912001-02-25 19:54:14 +00006894 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006895 xmlParseReference(ctxt);
6896 }
6897
6898 /*
6899 * Last case, text. Note that References are handled directly.
6900 */
6901 else {
6902 xmlParseCharData(ctxt, 0);
6903 }
6904
6905 GROW;
6906 /*
6907 * Pop-up of finished entities.
6908 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00006909 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00006910 xmlPopInput(ctxt);
6911 SHRINK;
6912
Daniel Veillardfdc91562002-07-01 21:52:03 +00006913 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006914 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6916 ctxt->sax->error(ctxt->userData,
6917 "detected an error in element content\n");
6918 ctxt->wellFormed = 0;
6919 ctxt->disableSAX = 1;
6920 ctxt->instate = XML_PARSER_EOF;
6921 break;
6922 }
6923 }
6924}
6925
6926/**
6927 * xmlParseElement:
6928 * @ctxt: an XML parser context
6929 *
6930 * parse an XML element, this is highly recursive
6931 *
6932 * [39] element ::= EmptyElemTag | STag content ETag
6933 *
6934 * [ WFC: Element Type Match ]
6935 * The Name in an element's end-tag must match the element type in the
6936 * start-tag.
6937 *
6938 * [ VC: Element Valid ]
6939 * An element is valid if there is a declaration matching elementdecl
6940 * where the Name matches the element type and one of the following holds:
6941 * - The declaration matches EMPTY and the element has no content.
6942 * - The declaration matches children and the sequence of child elements
6943 * belongs to the language generated by the regular expression in the
6944 * content model, with optional white space (characters matching the
6945 * nonterminal S) between each pair of child elements.
6946 * - The declaration matches Mixed and the content consists of character
6947 * data and child elements whose types match names in the content model.
6948 * - The declaration matches ANY, and the types of any child elements have
6949 * been declared.
6950 */
6951
6952void
6953xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00006954 xmlChar *name;
6955 xmlChar *oldname;
6956 xmlParserNodeInfo node_info;
6957 xmlNodePtr ret;
6958
6959 /* Capture start position */
6960 if (ctxt->record_info) {
6961 node_info.begin_pos = ctxt->input->consumed +
6962 (CUR_PTR - ctxt->input->base);
6963 node_info.begin_line = ctxt->input->line;
6964 }
6965
6966 if (ctxt->spaceNr == 0)
6967 spacePush(ctxt, -1);
6968 else
6969 spacePush(ctxt, *ctxt->space);
6970
6971 name = xmlParseStartTag(ctxt);
6972 if (name == NULL) {
6973 spacePop(ctxt);
6974 return;
6975 }
6976 namePush(ctxt, name);
6977 ret = ctxt->node;
6978
6979 /*
6980 * [ VC: Root Element Type ]
6981 * The Name in the document type declaration must match the element
6982 * type of the root element.
6983 */
6984 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6985 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6986 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6987
6988 /*
6989 * Check for an Empty Element.
6990 */
6991 if ((RAW == '/') && (NXT(1) == '>')) {
6992 SKIP(2);
6993 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6994 (!ctxt->disableSAX))
6995 ctxt->sax->endElement(ctxt->userData, name);
6996 oldname = namePop(ctxt);
6997 spacePop(ctxt);
6998 if (oldname != NULL) {
6999#ifdef DEBUG_STACK
7000 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7001#endif
7002 xmlFree(oldname);
7003 }
7004 if ( ret != NULL && ctxt->record_info ) {
7005 node_info.end_pos = ctxt->input->consumed +
7006 (CUR_PTR - ctxt->input->base);
7007 node_info.end_line = ctxt->input->line;
7008 node_info.node = ret;
7009 xmlParserAddNodeInfo(ctxt, &node_info);
7010 }
7011 return;
7012 }
7013 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007014 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007015 } else {
7016 ctxt->errNo = XML_ERR_GT_REQUIRED;
7017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7018 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007019 "Couldn't find end of Start Tag %s\n",
7020 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007021 ctxt->wellFormed = 0;
7022 ctxt->disableSAX = 1;
7023
7024 /*
7025 * end of parsing of this node.
7026 */
7027 nodePop(ctxt);
7028 oldname = namePop(ctxt);
7029 spacePop(ctxt);
7030 if (oldname != NULL) {
7031#ifdef DEBUG_STACK
7032 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7033#endif
7034 xmlFree(oldname);
7035 }
7036
7037 /*
7038 * Capture end position and add node
7039 */
7040 if ( ret != NULL && ctxt->record_info ) {
7041 node_info.end_pos = ctxt->input->consumed +
7042 (CUR_PTR - ctxt->input->base);
7043 node_info.end_line = ctxt->input->line;
7044 node_info.node = ret;
7045 xmlParserAddNodeInfo(ctxt, &node_info);
7046 }
7047 return;
7048 }
7049
7050 /*
7051 * Parse the content of the element:
7052 */
7053 xmlParseContent(ctxt);
7054 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007055 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7057 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007058 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007059 ctxt->wellFormed = 0;
7060 ctxt->disableSAX = 1;
7061
7062 /*
7063 * end of parsing of this node.
7064 */
7065 nodePop(ctxt);
7066 oldname = namePop(ctxt);
7067 spacePop(ctxt);
7068 if (oldname != NULL) {
7069#ifdef DEBUG_STACK
7070 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7071#endif
7072 xmlFree(oldname);
7073 }
7074 return;
7075 }
7076
7077 /*
7078 * parse the end of tag: '</' should be here.
7079 */
7080 xmlParseEndTag(ctxt);
7081
7082 /*
7083 * Capture end position and add node
7084 */
7085 if ( ret != NULL && ctxt->record_info ) {
7086 node_info.end_pos = ctxt->input->consumed +
7087 (CUR_PTR - ctxt->input->base);
7088 node_info.end_line = ctxt->input->line;
7089 node_info.node = ret;
7090 xmlParserAddNodeInfo(ctxt, &node_info);
7091 }
7092}
7093
7094/**
7095 * xmlParseVersionNum:
7096 * @ctxt: an XML parser context
7097 *
7098 * parse the XML version value.
7099 *
7100 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7101 *
7102 * Returns the string giving the XML version number, or NULL
7103 */
7104xmlChar *
7105xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7106 xmlChar *buf = NULL;
7107 int len = 0;
7108 int size = 10;
7109 xmlChar cur;
7110
7111 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7112 if (buf == NULL) {
7113 xmlGenericError(xmlGenericErrorContext,
7114 "malloc of %d byte failed\n", size);
7115 return(NULL);
7116 }
7117 cur = CUR;
7118 while (((cur >= 'a') && (cur <= 'z')) ||
7119 ((cur >= 'A') && (cur <= 'Z')) ||
7120 ((cur >= '0') && (cur <= '9')) ||
7121 (cur == '_') || (cur == '.') ||
7122 (cur == ':') || (cur == '-')) {
7123 if (len + 1 >= size) {
7124 size *= 2;
7125 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7126 if (buf == NULL) {
7127 xmlGenericError(xmlGenericErrorContext,
7128 "realloc of %d byte failed\n", size);
7129 return(NULL);
7130 }
7131 }
7132 buf[len++] = cur;
7133 NEXT;
7134 cur=CUR;
7135 }
7136 buf[len] = 0;
7137 return(buf);
7138}
7139
7140/**
7141 * xmlParseVersionInfo:
7142 * @ctxt: an XML parser context
7143 *
7144 * parse the XML version.
7145 *
7146 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7147 *
7148 * [25] Eq ::= S? '=' S?
7149 *
7150 * Returns the version string, e.g. "1.0"
7151 */
7152
7153xmlChar *
7154xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7155 xmlChar *version = NULL;
7156 const xmlChar *q;
7157
7158 if ((RAW == 'v') && (NXT(1) == 'e') &&
7159 (NXT(2) == 'r') && (NXT(3) == 's') &&
7160 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7161 (NXT(6) == 'n')) {
7162 SKIP(7);
7163 SKIP_BLANKS;
7164 if (RAW != '=') {
7165 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7167 ctxt->sax->error(ctxt->userData,
7168 "xmlParseVersionInfo : expected '='\n");
7169 ctxt->wellFormed = 0;
7170 ctxt->disableSAX = 1;
7171 return(NULL);
7172 }
7173 NEXT;
7174 SKIP_BLANKS;
7175 if (RAW == '"') {
7176 NEXT;
7177 q = CUR_PTR;
7178 version = xmlParseVersionNum(ctxt);
7179 if (RAW != '"') {
7180 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7181 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7182 ctxt->sax->error(ctxt->userData,
7183 "String not closed\n%.50s\n", q);
7184 ctxt->wellFormed = 0;
7185 ctxt->disableSAX = 1;
7186 } else
7187 NEXT;
7188 } else if (RAW == '\''){
7189 NEXT;
7190 q = CUR_PTR;
7191 version = xmlParseVersionNum(ctxt);
7192 if (RAW != '\'') {
7193 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7194 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7195 ctxt->sax->error(ctxt->userData,
7196 "String not closed\n%.50s\n", q);
7197 ctxt->wellFormed = 0;
7198 ctxt->disableSAX = 1;
7199 } else
7200 NEXT;
7201 } else {
7202 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7204 ctxt->sax->error(ctxt->userData,
7205 "xmlParseVersionInfo : expected ' or \"\n");
7206 ctxt->wellFormed = 0;
7207 ctxt->disableSAX = 1;
7208 }
7209 }
7210 return(version);
7211}
7212
7213/**
7214 * xmlParseEncName:
7215 * @ctxt: an XML parser context
7216 *
7217 * parse the XML encoding name
7218 *
7219 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7220 *
7221 * Returns the encoding name value or NULL
7222 */
7223xmlChar *
7224xmlParseEncName(xmlParserCtxtPtr ctxt) {
7225 xmlChar *buf = NULL;
7226 int len = 0;
7227 int size = 10;
7228 xmlChar cur;
7229
7230 cur = CUR;
7231 if (((cur >= 'a') && (cur <= 'z')) ||
7232 ((cur >= 'A') && (cur <= 'Z'))) {
7233 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7234 if (buf == NULL) {
7235 xmlGenericError(xmlGenericErrorContext,
7236 "malloc of %d byte failed\n", size);
7237 return(NULL);
7238 }
7239
7240 buf[len++] = cur;
7241 NEXT;
7242 cur = CUR;
7243 while (((cur >= 'a') && (cur <= 'z')) ||
7244 ((cur >= 'A') && (cur <= 'Z')) ||
7245 ((cur >= '0') && (cur <= '9')) ||
7246 (cur == '.') || (cur == '_') ||
7247 (cur == '-')) {
7248 if (len + 1 >= size) {
7249 size *= 2;
7250 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7251 if (buf == NULL) {
7252 xmlGenericError(xmlGenericErrorContext,
7253 "realloc of %d byte failed\n", size);
7254 return(NULL);
7255 }
7256 }
7257 buf[len++] = cur;
7258 NEXT;
7259 cur = CUR;
7260 if (cur == 0) {
7261 SHRINK;
7262 GROW;
7263 cur = CUR;
7264 }
7265 }
7266 buf[len] = 0;
7267 } else {
7268 ctxt->errNo = XML_ERR_ENCODING_NAME;
7269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7270 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7271 ctxt->wellFormed = 0;
7272 ctxt->disableSAX = 1;
7273 }
7274 return(buf);
7275}
7276
7277/**
7278 * xmlParseEncodingDecl:
7279 * @ctxt: an XML parser context
7280 *
7281 * parse the XML encoding declaration
7282 *
7283 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7284 *
7285 * this setups the conversion filters.
7286 *
7287 * Returns the encoding value or NULL
7288 */
7289
7290xmlChar *
7291xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7292 xmlChar *encoding = NULL;
7293 const xmlChar *q;
7294
7295 SKIP_BLANKS;
7296 if ((RAW == 'e') && (NXT(1) == 'n') &&
7297 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7298 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7299 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7300 SKIP(8);
7301 SKIP_BLANKS;
7302 if (RAW != '=') {
7303 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7305 ctxt->sax->error(ctxt->userData,
7306 "xmlParseEncodingDecl : expected '='\n");
7307 ctxt->wellFormed = 0;
7308 ctxt->disableSAX = 1;
7309 return(NULL);
7310 }
7311 NEXT;
7312 SKIP_BLANKS;
7313 if (RAW == '"') {
7314 NEXT;
7315 q = CUR_PTR;
7316 encoding = xmlParseEncName(ctxt);
7317 if (RAW != '"') {
7318 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7320 ctxt->sax->error(ctxt->userData,
7321 "String not closed\n%.50s\n", q);
7322 ctxt->wellFormed = 0;
7323 ctxt->disableSAX = 1;
7324 } else
7325 NEXT;
7326 } else if (RAW == '\''){
7327 NEXT;
7328 q = CUR_PTR;
7329 encoding = xmlParseEncName(ctxt);
7330 if (RAW != '\'') {
7331 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7333 ctxt->sax->error(ctxt->userData,
7334 "String not closed\n%.50s\n", q);
7335 ctxt->wellFormed = 0;
7336 ctxt->disableSAX = 1;
7337 } else
7338 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007339 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007340 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7342 ctxt->sax->error(ctxt->userData,
7343 "xmlParseEncodingDecl : expected ' or \"\n");
7344 ctxt->wellFormed = 0;
7345 ctxt->disableSAX = 1;
7346 }
7347 if (encoding != NULL) {
7348 xmlCharEncoding enc;
7349 xmlCharEncodingHandlerPtr handler;
7350
7351 if (ctxt->input->encoding != NULL)
7352 xmlFree((xmlChar *) ctxt->input->encoding);
7353 ctxt->input->encoding = encoding;
7354
7355 enc = xmlParseCharEncoding((const char *) encoding);
7356 /*
7357 * registered set of known encodings
7358 */
7359 if (enc != XML_CHAR_ENCODING_ERROR) {
7360 xmlSwitchEncoding(ctxt, enc);
7361 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007362 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007363 xmlFree(encoding);
7364 return(NULL);
7365 }
7366 } else {
7367 /*
7368 * fallback for unknown encodings
7369 */
7370 handler = xmlFindCharEncodingHandler((const char *) encoding);
7371 if (handler != NULL) {
7372 xmlSwitchToEncoding(ctxt, handler);
7373 } else {
7374 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7376 ctxt->sax->error(ctxt->userData,
7377 "Unsupported encoding %s\n", encoding);
7378 return(NULL);
7379 }
7380 }
7381 }
7382 }
7383 return(encoding);
7384}
7385
7386/**
7387 * xmlParseSDDecl:
7388 * @ctxt: an XML parser context
7389 *
7390 * parse the XML standalone declaration
7391 *
7392 * [32] SDDecl ::= S 'standalone' Eq
7393 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7394 *
7395 * [ VC: Standalone Document Declaration ]
7396 * TODO The standalone document declaration must have the value "no"
7397 * if any external markup declarations contain declarations of:
7398 * - attributes with default values, if elements to which these
7399 * attributes apply appear in the document without specifications
7400 * of values for these attributes, or
7401 * - entities (other than amp, lt, gt, apos, quot), if references
7402 * to those entities appear in the document, or
7403 * - attributes with values subject to normalization, where the
7404 * attribute appears in the document with a value which will change
7405 * as a result of normalization, or
7406 * - element types with element content, if white space occurs directly
7407 * within any instance of those types.
7408 *
7409 * Returns 1 if standalone, 0 otherwise
7410 */
7411
7412int
7413xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7414 int standalone = -1;
7415
7416 SKIP_BLANKS;
7417 if ((RAW == 's') && (NXT(1) == 't') &&
7418 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7419 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7420 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7421 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7422 SKIP(10);
7423 SKIP_BLANKS;
7424 if (RAW != '=') {
7425 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7427 ctxt->sax->error(ctxt->userData,
7428 "XML standalone declaration : expected '='\n");
7429 ctxt->wellFormed = 0;
7430 ctxt->disableSAX = 1;
7431 return(standalone);
7432 }
7433 NEXT;
7434 SKIP_BLANKS;
7435 if (RAW == '\''){
7436 NEXT;
7437 if ((RAW == 'n') && (NXT(1) == 'o')) {
7438 standalone = 0;
7439 SKIP(2);
7440 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7441 (NXT(2) == 's')) {
7442 standalone = 1;
7443 SKIP(3);
7444 } else {
7445 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7446 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7447 ctxt->sax->error(ctxt->userData,
7448 "standalone accepts only 'yes' or 'no'\n");
7449 ctxt->wellFormed = 0;
7450 ctxt->disableSAX = 1;
7451 }
7452 if (RAW != '\'') {
7453 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7455 ctxt->sax->error(ctxt->userData, "String not closed\n");
7456 ctxt->wellFormed = 0;
7457 ctxt->disableSAX = 1;
7458 } else
7459 NEXT;
7460 } else if (RAW == '"'){
7461 NEXT;
7462 if ((RAW == 'n') && (NXT(1) == 'o')) {
7463 standalone = 0;
7464 SKIP(2);
7465 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7466 (NXT(2) == 's')) {
7467 standalone = 1;
7468 SKIP(3);
7469 } else {
7470 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7472 ctxt->sax->error(ctxt->userData,
7473 "standalone accepts only 'yes' or 'no'\n");
7474 ctxt->wellFormed = 0;
7475 ctxt->disableSAX = 1;
7476 }
7477 if (RAW != '"') {
7478 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7480 ctxt->sax->error(ctxt->userData, "String not closed\n");
7481 ctxt->wellFormed = 0;
7482 ctxt->disableSAX = 1;
7483 } else
7484 NEXT;
7485 } else {
7486 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7488 ctxt->sax->error(ctxt->userData,
7489 "Standalone value not found\n");
7490 ctxt->wellFormed = 0;
7491 ctxt->disableSAX = 1;
7492 }
7493 }
7494 return(standalone);
7495}
7496
7497/**
7498 * xmlParseXMLDecl:
7499 * @ctxt: an XML parser context
7500 *
7501 * parse an XML declaration header
7502 *
7503 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7504 */
7505
7506void
7507xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7508 xmlChar *version;
7509
7510 /*
7511 * We know that '<?xml' is here.
7512 */
7513 SKIP(5);
7514
7515 if (!IS_BLANK(RAW)) {
7516 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7518 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7519 ctxt->wellFormed = 0;
7520 ctxt->disableSAX = 1;
7521 }
7522 SKIP_BLANKS;
7523
7524 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007525 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007526 */
7527 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007528 if (version == NULL) {
7529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7530 ctxt->sax->error(ctxt->userData,
7531 "Malformed declaration expecting version\n");
7532 ctxt->wellFormed = 0;
7533 ctxt->disableSAX = 1;
7534 } else {
7535 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7536 /*
7537 * TODO: Blueberry should be detected here
7538 */
7539 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7540 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7541 version);
7542 }
7543 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007544 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007545 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007546 }
Owen Taylor3473f882001-02-23 17:55:21 +00007547
7548 /*
7549 * We may have the encoding declaration
7550 */
7551 if (!IS_BLANK(RAW)) {
7552 if ((RAW == '?') && (NXT(1) == '>')) {
7553 SKIP(2);
7554 return;
7555 }
7556 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7558 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7559 ctxt->wellFormed = 0;
7560 ctxt->disableSAX = 1;
7561 }
7562 xmlParseEncodingDecl(ctxt);
7563 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7564 /*
7565 * The XML REC instructs us to stop parsing right here
7566 */
7567 return;
7568 }
7569
7570 /*
7571 * We may have the standalone status.
7572 */
7573 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7574 if ((RAW == '?') && (NXT(1) == '>')) {
7575 SKIP(2);
7576 return;
7577 }
7578 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7579 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7580 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7581 ctxt->wellFormed = 0;
7582 ctxt->disableSAX = 1;
7583 }
7584 SKIP_BLANKS;
7585 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7586
7587 SKIP_BLANKS;
7588 if ((RAW == '?') && (NXT(1) == '>')) {
7589 SKIP(2);
7590 } else if (RAW == '>') {
7591 /* Deprecated old WD ... */
7592 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7594 ctxt->sax->error(ctxt->userData,
7595 "XML declaration must end-up with '?>'\n");
7596 ctxt->wellFormed = 0;
7597 ctxt->disableSAX = 1;
7598 NEXT;
7599 } else {
7600 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7602 ctxt->sax->error(ctxt->userData,
7603 "parsing XML declaration: '?>' expected\n");
7604 ctxt->wellFormed = 0;
7605 ctxt->disableSAX = 1;
7606 MOVETO_ENDTAG(CUR_PTR);
7607 NEXT;
7608 }
7609}
7610
7611/**
7612 * xmlParseMisc:
7613 * @ctxt: an XML parser context
7614 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007615 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007616 *
7617 * [27] Misc ::= Comment | PI | S
7618 */
7619
7620void
7621xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007622 while (((RAW == '<') && (NXT(1) == '?')) ||
7623 ((RAW == '<') && (NXT(1) == '!') &&
7624 (NXT(2) == '-') && (NXT(3) == '-')) ||
7625 IS_BLANK(CUR)) {
7626 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007627 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007628 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007629 NEXT;
7630 } else
7631 xmlParseComment(ctxt);
7632 }
7633}
7634
7635/**
7636 * xmlParseDocument:
7637 * @ctxt: an XML parser context
7638 *
7639 * parse an XML document (and build a tree if using the standard SAX
7640 * interface).
7641 *
7642 * [1] document ::= prolog element Misc*
7643 *
7644 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7645 *
7646 * Returns 0, -1 in case of error. the parser context is augmented
7647 * as a result of the parsing.
7648 */
7649
7650int
7651xmlParseDocument(xmlParserCtxtPtr ctxt) {
7652 xmlChar start[4];
7653 xmlCharEncoding enc;
7654
7655 xmlInitParser();
7656
7657 GROW;
7658
7659 /*
7660 * SAX: beginning of the document processing.
7661 */
7662 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7663 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7664
Daniel Veillard50f34372001-08-03 12:06:36 +00007665 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007666 /*
7667 * Get the 4 first bytes and decode the charset
7668 * if enc != XML_CHAR_ENCODING_NONE
7669 * plug some encoding conversion routines.
7670 */
7671 start[0] = RAW;
7672 start[1] = NXT(1);
7673 start[2] = NXT(2);
7674 start[3] = NXT(3);
7675 enc = xmlDetectCharEncoding(start, 4);
7676 if (enc != XML_CHAR_ENCODING_NONE) {
7677 xmlSwitchEncoding(ctxt, enc);
7678 }
Owen Taylor3473f882001-02-23 17:55:21 +00007679 }
7680
7681
7682 if (CUR == 0) {
7683 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7684 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7685 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7686 ctxt->wellFormed = 0;
7687 ctxt->disableSAX = 1;
7688 }
7689
7690 /*
7691 * Check for the XMLDecl in the Prolog.
7692 */
7693 GROW;
7694 if ((RAW == '<') && (NXT(1) == '?') &&
7695 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7696 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7697
7698 /*
7699 * Note that we will switch encoding on the fly.
7700 */
7701 xmlParseXMLDecl(ctxt);
7702 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7703 /*
7704 * The XML REC instructs us to stop parsing right here
7705 */
7706 return(-1);
7707 }
7708 ctxt->standalone = ctxt->input->standalone;
7709 SKIP_BLANKS;
7710 } else {
7711 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7712 }
7713 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7714 ctxt->sax->startDocument(ctxt->userData);
7715
7716 /*
7717 * The Misc part of the Prolog
7718 */
7719 GROW;
7720 xmlParseMisc(ctxt);
7721
7722 /*
7723 * Then possibly doc type declaration(s) and more Misc
7724 * (doctypedecl Misc*)?
7725 */
7726 GROW;
7727 if ((RAW == '<') && (NXT(1) == '!') &&
7728 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7729 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7730 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7731 (NXT(8) == 'E')) {
7732
7733 ctxt->inSubset = 1;
7734 xmlParseDocTypeDecl(ctxt);
7735 if (RAW == '[') {
7736 ctxt->instate = XML_PARSER_DTD;
7737 xmlParseInternalSubset(ctxt);
7738 }
7739
7740 /*
7741 * Create and update the external subset.
7742 */
7743 ctxt->inSubset = 2;
7744 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7745 (!ctxt->disableSAX))
7746 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7747 ctxt->extSubSystem, ctxt->extSubURI);
7748 ctxt->inSubset = 0;
7749
7750
7751 ctxt->instate = XML_PARSER_PROLOG;
7752 xmlParseMisc(ctxt);
7753 }
7754
7755 /*
7756 * Time to start parsing the tree itself
7757 */
7758 GROW;
7759 if (RAW != '<') {
7760 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7762 ctxt->sax->error(ctxt->userData,
7763 "Start tag expected, '<' not found\n");
7764 ctxt->wellFormed = 0;
7765 ctxt->disableSAX = 1;
7766 ctxt->instate = XML_PARSER_EOF;
7767 } else {
7768 ctxt->instate = XML_PARSER_CONTENT;
7769 xmlParseElement(ctxt);
7770 ctxt->instate = XML_PARSER_EPILOG;
7771
7772
7773 /*
7774 * The Misc part at the end
7775 */
7776 xmlParseMisc(ctxt);
7777
Daniel Veillard561b7f82002-03-20 21:55:57 +00007778 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007779 ctxt->errNo = XML_ERR_DOCUMENT_END;
7780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7781 ctxt->sax->error(ctxt->userData,
7782 "Extra content at the end of the document\n");
7783 ctxt->wellFormed = 0;
7784 ctxt->disableSAX = 1;
7785 }
7786 ctxt->instate = XML_PARSER_EOF;
7787 }
7788
7789 /*
7790 * SAX: end of the document processing.
7791 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007792 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007793 ctxt->sax->endDocument(ctxt->userData);
7794
Daniel Veillard5997aca2002-03-18 18:36:20 +00007795 /*
7796 * Remove locally kept entity definitions if the tree was not built
7797 */
7798 if ((ctxt->myDoc != NULL) &&
7799 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7800 xmlFreeDoc(ctxt->myDoc);
7801 ctxt->myDoc = NULL;
7802 }
7803
Daniel Veillardc7612992002-02-17 22:47:37 +00007804 if (! ctxt->wellFormed) {
7805 ctxt->valid = 0;
7806 return(-1);
7807 }
Owen Taylor3473f882001-02-23 17:55:21 +00007808 return(0);
7809}
7810
7811/**
7812 * xmlParseExtParsedEnt:
7813 * @ctxt: an XML parser context
7814 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007815 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007816 * An external general parsed entity is well-formed if it matches the
7817 * production labeled extParsedEnt.
7818 *
7819 * [78] extParsedEnt ::= TextDecl? content
7820 *
7821 * Returns 0, -1 in case of error. the parser context is augmented
7822 * as a result of the parsing.
7823 */
7824
7825int
7826xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7827 xmlChar start[4];
7828 xmlCharEncoding enc;
7829
7830 xmlDefaultSAXHandlerInit();
7831
7832 GROW;
7833
7834 /*
7835 * SAX: beginning of the document processing.
7836 */
7837 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7838 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7839
7840 /*
7841 * Get the 4 first bytes and decode the charset
7842 * if enc != XML_CHAR_ENCODING_NONE
7843 * plug some encoding conversion routines.
7844 */
7845 start[0] = RAW;
7846 start[1] = NXT(1);
7847 start[2] = NXT(2);
7848 start[3] = NXT(3);
7849 enc = xmlDetectCharEncoding(start, 4);
7850 if (enc != XML_CHAR_ENCODING_NONE) {
7851 xmlSwitchEncoding(ctxt, enc);
7852 }
7853
7854
7855 if (CUR == 0) {
7856 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7858 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7859 ctxt->wellFormed = 0;
7860 ctxt->disableSAX = 1;
7861 }
7862
7863 /*
7864 * Check for the XMLDecl in the Prolog.
7865 */
7866 GROW;
7867 if ((RAW == '<') && (NXT(1) == '?') &&
7868 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7869 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7870
7871 /*
7872 * Note that we will switch encoding on the fly.
7873 */
7874 xmlParseXMLDecl(ctxt);
7875 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7876 /*
7877 * The XML REC instructs us to stop parsing right here
7878 */
7879 return(-1);
7880 }
7881 SKIP_BLANKS;
7882 } else {
7883 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7884 }
7885 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7886 ctxt->sax->startDocument(ctxt->userData);
7887
7888 /*
7889 * Doing validity checking on chunk doesn't make sense
7890 */
7891 ctxt->instate = XML_PARSER_CONTENT;
7892 ctxt->validate = 0;
7893 ctxt->loadsubset = 0;
7894 ctxt->depth = 0;
7895
7896 xmlParseContent(ctxt);
7897
7898 if ((RAW == '<') && (NXT(1) == '/')) {
7899 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7901 ctxt->sax->error(ctxt->userData,
7902 "chunk is not well balanced\n");
7903 ctxt->wellFormed = 0;
7904 ctxt->disableSAX = 1;
7905 } else if (RAW != 0) {
7906 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7908 ctxt->sax->error(ctxt->userData,
7909 "extra content at the end of well balanced chunk\n");
7910 ctxt->wellFormed = 0;
7911 ctxt->disableSAX = 1;
7912 }
7913
7914 /*
7915 * SAX: end of the document processing.
7916 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007917 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007918 ctxt->sax->endDocument(ctxt->userData);
7919
7920 if (! ctxt->wellFormed) return(-1);
7921 return(0);
7922}
7923
7924/************************************************************************
7925 * *
7926 * Progressive parsing interfaces *
7927 * *
7928 ************************************************************************/
7929
7930/**
7931 * xmlParseLookupSequence:
7932 * @ctxt: an XML parser context
7933 * @first: the first char to lookup
7934 * @next: the next char to lookup or zero
7935 * @third: the next char to lookup or zero
7936 *
7937 * Try to find if a sequence (first, next, third) or just (first next) or
7938 * (first) is available in the input stream.
7939 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7940 * to avoid rescanning sequences of bytes, it DOES change the state of the
7941 * parser, do not use liberally.
7942 *
7943 * Returns the index to the current parsing point if the full sequence
7944 * is available, -1 otherwise.
7945 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007946static int
Owen Taylor3473f882001-02-23 17:55:21 +00007947xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7948 xmlChar next, xmlChar third) {
7949 int base, len;
7950 xmlParserInputPtr in;
7951 const xmlChar *buf;
7952
7953 in = ctxt->input;
7954 if (in == NULL) return(-1);
7955 base = in->cur - in->base;
7956 if (base < 0) return(-1);
7957 if (ctxt->checkIndex > base)
7958 base = ctxt->checkIndex;
7959 if (in->buf == NULL) {
7960 buf = in->base;
7961 len = in->length;
7962 } else {
7963 buf = in->buf->buffer->content;
7964 len = in->buf->buffer->use;
7965 }
7966 /* take into account the sequence length */
7967 if (third) len -= 2;
7968 else if (next) len --;
7969 for (;base < len;base++) {
7970 if (buf[base] == first) {
7971 if (third != 0) {
7972 if ((buf[base + 1] != next) ||
7973 (buf[base + 2] != third)) continue;
7974 } else if (next != 0) {
7975 if (buf[base + 1] != next) continue;
7976 }
7977 ctxt->checkIndex = 0;
7978#ifdef DEBUG_PUSH
7979 if (next == 0)
7980 xmlGenericError(xmlGenericErrorContext,
7981 "PP: lookup '%c' found at %d\n",
7982 first, base);
7983 else if (third == 0)
7984 xmlGenericError(xmlGenericErrorContext,
7985 "PP: lookup '%c%c' found at %d\n",
7986 first, next, base);
7987 else
7988 xmlGenericError(xmlGenericErrorContext,
7989 "PP: lookup '%c%c%c' found at %d\n",
7990 first, next, third, base);
7991#endif
7992 return(base - (in->cur - in->base));
7993 }
7994 }
7995 ctxt->checkIndex = base;
7996#ifdef DEBUG_PUSH
7997 if (next == 0)
7998 xmlGenericError(xmlGenericErrorContext,
7999 "PP: lookup '%c' failed\n", first);
8000 else if (third == 0)
8001 xmlGenericError(xmlGenericErrorContext,
8002 "PP: lookup '%c%c' failed\n", first, next);
8003 else
8004 xmlGenericError(xmlGenericErrorContext,
8005 "PP: lookup '%c%c%c' failed\n", first, next, third);
8006#endif
8007 return(-1);
8008}
8009
8010/**
8011 * xmlParseTryOrFinish:
8012 * @ctxt: an XML parser context
8013 * @terminate: last chunk indicator
8014 *
8015 * Try to progress on parsing
8016 *
8017 * Returns zero if no parsing was possible
8018 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008019static int
Owen Taylor3473f882001-02-23 17:55:21 +00008020xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8021 int ret = 0;
8022 int avail;
8023 xmlChar cur, next;
8024
8025#ifdef DEBUG_PUSH
8026 switch (ctxt->instate) {
8027 case XML_PARSER_EOF:
8028 xmlGenericError(xmlGenericErrorContext,
8029 "PP: try EOF\n"); break;
8030 case XML_PARSER_START:
8031 xmlGenericError(xmlGenericErrorContext,
8032 "PP: try START\n"); break;
8033 case XML_PARSER_MISC:
8034 xmlGenericError(xmlGenericErrorContext,
8035 "PP: try MISC\n");break;
8036 case XML_PARSER_COMMENT:
8037 xmlGenericError(xmlGenericErrorContext,
8038 "PP: try COMMENT\n");break;
8039 case XML_PARSER_PROLOG:
8040 xmlGenericError(xmlGenericErrorContext,
8041 "PP: try PROLOG\n");break;
8042 case XML_PARSER_START_TAG:
8043 xmlGenericError(xmlGenericErrorContext,
8044 "PP: try START_TAG\n");break;
8045 case XML_PARSER_CONTENT:
8046 xmlGenericError(xmlGenericErrorContext,
8047 "PP: try CONTENT\n");break;
8048 case XML_PARSER_CDATA_SECTION:
8049 xmlGenericError(xmlGenericErrorContext,
8050 "PP: try CDATA_SECTION\n");break;
8051 case XML_PARSER_END_TAG:
8052 xmlGenericError(xmlGenericErrorContext,
8053 "PP: try END_TAG\n");break;
8054 case XML_PARSER_ENTITY_DECL:
8055 xmlGenericError(xmlGenericErrorContext,
8056 "PP: try ENTITY_DECL\n");break;
8057 case XML_PARSER_ENTITY_VALUE:
8058 xmlGenericError(xmlGenericErrorContext,
8059 "PP: try ENTITY_VALUE\n");break;
8060 case XML_PARSER_ATTRIBUTE_VALUE:
8061 xmlGenericError(xmlGenericErrorContext,
8062 "PP: try ATTRIBUTE_VALUE\n");break;
8063 case XML_PARSER_DTD:
8064 xmlGenericError(xmlGenericErrorContext,
8065 "PP: try DTD\n");break;
8066 case XML_PARSER_EPILOG:
8067 xmlGenericError(xmlGenericErrorContext,
8068 "PP: try EPILOG\n");break;
8069 case XML_PARSER_PI:
8070 xmlGenericError(xmlGenericErrorContext,
8071 "PP: try PI\n");break;
8072 case XML_PARSER_IGNORE:
8073 xmlGenericError(xmlGenericErrorContext,
8074 "PP: try IGNORE\n");break;
8075 }
8076#endif
8077
8078 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008079 SHRINK;
8080
Owen Taylor3473f882001-02-23 17:55:21 +00008081 /*
8082 * Pop-up of finished entities.
8083 */
8084 while ((RAW == 0) && (ctxt->inputNr > 1))
8085 xmlPopInput(ctxt);
8086
8087 if (ctxt->input ==NULL) break;
8088 if (ctxt->input->buf == NULL)
8089 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008090 else {
8091 /*
8092 * If we are operating on converted input, try to flush
8093 * remainng chars to avoid them stalling in the non-converted
8094 * buffer.
8095 */
8096 if ((ctxt->input->buf->raw != NULL) &&
8097 (ctxt->input->buf->raw->use > 0)) {
8098 int base = ctxt->input->base -
8099 ctxt->input->buf->buffer->content;
8100 int current = ctxt->input->cur - ctxt->input->base;
8101
8102 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8103 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8104 ctxt->input->cur = ctxt->input->base + current;
8105 ctxt->input->end =
8106 &ctxt->input->buf->buffer->content[
8107 ctxt->input->buf->buffer->use];
8108 }
8109 avail = ctxt->input->buf->buffer->use -
8110 (ctxt->input->cur - ctxt->input->base);
8111 }
Owen Taylor3473f882001-02-23 17:55:21 +00008112 if (avail < 1)
8113 goto done;
8114 switch (ctxt->instate) {
8115 case XML_PARSER_EOF:
8116 /*
8117 * Document parsing is done !
8118 */
8119 goto done;
8120 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008121 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8122 xmlChar start[4];
8123 xmlCharEncoding enc;
8124
8125 /*
8126 * Very first chars read from the document flow.
8127 */
8128 if (avail < 4)
8129 goto done;
8130
8131 /*
8132 * Get the 4 first bytes and decode the charset
8133 * if enc != XML_CHAR_ENCODING_NONE
8134 * plug some encoding conversion routines.
8135 */
8136 start[0] = RAW;
8137 start[1] = NXT(1);
8138 start[2] = NXT(2);
8139 start[3] = NXT(3);
8140 enc = xmlDetectCharEncoding(start, 4);
8141 if (enc != XML_CHAR_ENCODING_NONE) {
8142 xmlSwitchEncoding(ctxt, enc);
8143 }
8144 break;
8145 }
Owen Taylor3473f882001-02-23 17:55:21 +00008146
8147 cur = ctxt->input->cur[0];
8148 next = ctxt->input->cur[1];
8149 if (cur == 0) {
8150 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8151 ctxt->sax->setDocumentLocator(ctxt->userData,
8152 &xmlDefaultSAXLocator);
8153 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8154 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8155 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8156 ctxt->wellFormed = 0;
8157 ctxt->disableSAX = 1;
8158 ctxt->instate = XML_PARSER_EOF;
8159#ifdef DEBUG_PUSH
8160 xmlGenericError(xmlGenericErrorContext,
8161 "PP: entering EOF\n");
8162#endif
8163 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8164 ctxt->sax->endDocument(ctxt->userData);
8165 goto done;
8166 }
8167 if ((cur == '<') && (next == '?')) {
8168 /* PI or XML decl */
8169 if (avail < 5) return(ret);
8170 if ((!terminate) &&
8171 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8172 return(ret);
8173 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8174 ctxt->sax->setDocumentLocator(ctxt->userData,
8175 &xmlDefaultSAXLocator);
8176 if ((ctxt->input->cur[2] == 'x') &&
8177 (ctxt->input->cur[3] == 'm') &&
8178 (ctxt->input->cur[4] == 'l') &&
8179 (IS_BLANK(ctxt->input->cur[5]))) {
8180 ret += 5;
8181#ifdef DEBUG_PUSH
8182 xmlGenericError(xmlGenericErrorContext,
8183 "PP: Parsing XML Decl\n");
8184#endif
8185 xmlParseXMLDecl(ctxt);
8186 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8187 /*
8188 * The XML REC instructs us to stop parsing right
8189 * here
8190 */
8191 ctxt->instate = XML_PARSER_EOF;
8192 return(0);
8193 }
8194 ctxt->standalone = ctxt->input->standalone;
8195 if ((ctxt->encoding == NULL) &&
8196 (ctxt->input->encoding != NULL))
8197 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8198 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8199 (!ctxt->disableSAX))
8200 ctxt->sax->startDocument(ctxt->userData);
8201 ctxt->instate = XML_PARSER_MISC;
8202#ifdef DEBUG_PUSH
8203 xmlGenericError(xmlGenericErrorContext,
8204 "PP: entering MISC\n");
8205#endif
8206 } else {
8207 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8208 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8209 (!ctxt->disableSAX))
8210 ctxt->sax->startDocument(ctxt->userData);
8211 ctxt->instate = XML_PARSER_MISC;
8212#ifdef DEBUG_PUSH
8213 xmlGenericError(xmlGenericErrorContext,
8214 "PP: entering MISC\n");
8215#endif
8216 }
8217 } else {
8218 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8219 ctxt->sax->setDocumentLocator(ctxt->userData,
8220 &xmlDefaultSAXLocator);
8221 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8222 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8223 (!ctxt->disableSAX))
8224 ctxt->sax->startDocument(ctxt->userData);
8225 ctxt->instate = XML_PARSER_MISC;
8226#ifdef DEBUG_PUSH
8227 xmlGenericError(xmlGenericErrorContext,
8228 "PP: entering MISC\n");
8229#endif
8230 }
8231 break;
8232 case XML_PARSER_MISC:
8233 SKIP_BLANKS;
8234 if (ctxt->input->buf == NULL)
8235 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8236 else
8237 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8238 if (avail < 2)
8239 goto done;
8240 cur = ctxt->input->cur[0];
8241 next = ctxt->input->cur[1];
8242 if ((cur == '<') && (next == '?')) {
8243 if ((!terminate) &&
8244 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8245 goto done;
8246#ifdef DEBUG_PUSH
8247 xmlGenericError(xmlGenericErrorContext,
8248 "PP: Parsing PI\n");
8249#endif
8250 xmlParsePI(ctxt);
8251 } else if ((cur == '<') && (next == '!') &&
8252 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8253 if ((!terminate) &&
8254 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8255 goto done;
8256#ifdef DEBUG_PUSH
8257 xmlGenericError(xmlGenericErrorContext,
8258 "PP: Parsing Comment\n");
8259#endif
8260 xmlParseComment(ctxt);
8261 ctxt->instate = XML_PARSER_MISC;
8262 } else if ((cur == '<') && (next == '!') &&
8263 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8264 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8265 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8266 (ctxt->input->cur[8] == 'E')) {
8267 if ((!terminate) &&
8268 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8269 goto done;
8270#ifdef DEBUG_PUSH
8271 xmlGenericError(xmlGenericErrorContext,
8272 "PP: Parsing internal subset\n");
8273#endif
8274 ctxt->inSubset = 1;
8275 xmlParseDocTypeDecl(ctxt);
8276 if (RAW == '[') {
8277 ctxt->instate = XML_PARSER_DTD;
8278#ifdef DEBUG_PUSH
8279 xmlGenericError(xmlGenericErrorContext,
8280 "PP: entering DTD\n");
8281#endif
8282 } else {
8283 /*
8284 * Create and update the external subset.
8285 */
8286 ctxt->inSubset = 2;
8287 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8288 (ctxt->sax->externalSubset != NULL))
8289 ctxt->sax->externalSubset(ctxt->userData,
8290 ctxt->intSubName, ctxt->extSubSystem,
8291 ctxt->extSubURI);
8292 ctxt->inSubset = 0;
8293 ctxt->instate = XML_PARSER_PROLOG;
8294#ifdef DEBUG_PUSH
8295 xmlGenericError(xmlGenericErrorContext,
8296 "PP: entering PROLOG\n");
8297#endif
8298 }
8299 } else if ((cur == '<') && (next == '!') &&
8300 (avail < 9)) {
8301 goto done;
8302 } else {
8303 ctxt->instate = XML_PARSER_START_TAG;
8304#ifdef DEBUG_PUSH
8305 xmlGenericError(xmlGenericErrorContext,
8306 "PP: entering START_TAG\n");
8307#endif
8308 }
8309 break;
8310 case XML_PARSER_IGNORE:
8311 xmlGenericError(xmlGenericErrorContext,
8312 "PP: internal error, state == IGNORE");
8313 ctxt->instate = XML_PARSER_DTD;
8314#ifdef DEBUG_PUSH
8315 xmlGenericError(xmlGenericErrorContext,
8316 "PP: entering DTD\n");
8317#endif
8318 break;
8319 case XML_PARSER_PROLOG:
8320 SKIP_BLANKS;
8321 if (ctxt->input->buf == NULL)
8322 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8323 else
8324 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8325 if (avail < 2)
8326 goto done;
8327 cur = ctxt->input->cur[0];
8328 next = ctxt->input->cur[1];
8329 if ((cur == '<') && (next == '?')) {
8330 if ((!terminate) &&
8331 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8332 goto done;
8333#ifdef DEBUG_PUSH
8334 xmlGenericError(xmlGenericErrorContext,
8335 "PP: Parsing PI\n");
8336#endif
8337 xmlParsePI(ctxt);
8338 } else if ((cur == '<') && (next == '!') &&
8339 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8340 if ((!terminate) &&
8341 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8342 goto done;
8343#ifdef DEBUG_PUSH
8344 xmlGenericError(xmlGenericErrorContext,
8345 "PP: Parsing Comment\n");
8346#endif
8347 xmlParseComment(ctxt);
8348 ctxt->instate = XML_PARSER_PROLOG;
8349 } else if ((cur == '<') && (next == '!') &&
8350 (avail < 4)) {
8351 goto done;
8352 } else {
8353 ctxt->instate = XML_PARSER_START_TAG;
8354#ifdef DEBUG_PUSH
8355 xmlGenericError(xmlGenericErrorContext,
8356 "PP: entering START_TAG\n");
8357#endif
8358 }
8359 break;
8360 case XML_PARSER_EPILOG:
8361 SKIP_BLANKS;
8362 if (ctxt->input->buf == NULL)
8363 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8364 else
8365 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8366 if (avail < 2)
8367 goto done;
8368 cur = ctxt->input->cur[0];
8369 next = ctxt->input->cur[1];
8370 if ((cur == '<') && (next == '?')) {
8371 if ((!terminate) &&
8372 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8373 goto done;
8374#ifdef DEBUG_PUSH
8375 xmlGenericError(xmlGenericErrorContext,
8376 "PP: Parsing PI\n");
8377#endif
8378 xmlParsePI(ctxt);
8379 ctxt->instate = XML_PARSER_EPILOG;
8380 } else if ((cur == '<') && (next == '!') &&
8381 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8382 if ((!terminate) &&
8383 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8384 goto done;
8385#ifdef DEBUG_PUSH
8386 xmlGenericError(xmlGenericErrorContext,
8387 "PP: Parsing Comment\n");
8388#endif
8389 xmlParseComment(ctxt);
8390 ctxt->instate = XML_PARSER_EPILOG;
8391 } else if ((cur == '<') && (next == '!') &&
8392 (avail < 4)) {
8393 goto done;
8394 } else {
8395 ctxt->errNo = XML_ERR_DOCUMENT_END;
8396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8397 ctxt->sax->error(ctxt->userData,
8398 "Extra content at the end of the document\n");
8399 ctxt->wellFormed = 0;
8400 ctxt->disableSAX = 1;
8401 ctxt->instate = XML_PARSER_EOF;
8402#ifdef DEBUG_PUSH
8403 xmlGenericError(xmlGenericErrorContext,
8404 "PP: entering EOF\n");
8405#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008406 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008407 ctxt->sax->endDocument(ctxt->userData);
8408 goto done;
8409 }
8410 break;
8411 case XML_PARSER_START_TAG: {
8412 xmlChar *name, *oldname;
8413
8414 if ((avail < 2) && (ctxt->inputNr == 1))
8415 goto done;
8416 cur = ctxt->input->cur[0];
8417 if (cur != '<') {
8418 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8419 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8420 ctxt->sax->error(ctxt->userData,
8421 "Start tag expect, '<' not found\n");
8422 ctxt->wellFormed = 0;
8423 ctxt->disableSAX = 1;
8424 ctxt->instate = XML_PARSER_EOF;
8425#ifdef DEBUG_PUSH
8426 xmlGenericError(xmlGenericErrorContext,
8427 "PP: entering EOF\n");
8428#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008429 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008430 ctxt->sax->endDocument(ctxt->userData);
8431 goto done;
8432 }
8433 if ((!terminate) &&
8434 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8435 goto done;
8436 if (ctxt->spaceNr == 0)
8437 spacePush(ctxt, -1);
8438 else
8439 spacePush(ctxt, *ctxt->space);
8440 name = xmlParseStartTag(ctxt);
8441 if (name == NULL) {
8442 spacePop(ctxt);
8443 ctxt->instate = XML_PARSER_EOF;
8444#ifdef DEBUG_PUSH
8445 xmlGenericError(xmlGenericErrorContext,
8446 "PP: entering EOF\n");
8447#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008448 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008449 ctxt->sax->endDocument(ctxt->userData);
8450 goto done;
8451 }
8452 namePush(ctxt, xmlStrdup(name));
8453
8454 /*
8455 * [ VC: Root Element Type ]
8456 * The Name in the document type declaration must match
8457 * the element type of the root element.
8458 */
8459 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8460 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8461 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8462
8463 /*
8464 * Check for an Empty Element.
8465 */
8466 if ((RAW == '/') && (NXT(1) == '>')) {
8467 SKIP(2);
8468 if ((ctxt->sax != NULL) &&
8469 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8470 ctxt->sax->endElement(ctxt->userData, name);
8471 xmlFree(name);
8472 oldname = namePop(ctxt);
8473 spacePop(ctxt);
8474 if (oldname != NULL) {
8475#ifdef DEBUG_STACK
8476 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8477#endif
8478 xmlFree(oldname);
8479 }
8480 if (ctxt->name == NULL) {
8481 ctxt->instate = XML_PARSER_EPILOG;
8482#ifdef DEBUG_PUSH
8483 xmlGenericError(xmlGenericErrorContext,
8484 "PP: entering EPILOG\n");
8485#endif
8486 } else {
8487 ctxt->instate = XML_PARSER_CONTENT;
8488#ifdef DEBUG_PUSH
8489 xmlGenericError(xmlGenericErrorContext,
8490 "PP: entering CONTENT\n");
8491#endif
8492 }
8493 break;
8494 }
8495 if (RAW == '>') {
8496 NEXT;
8497 } else {
8498 ctxt->errNo = XML_ERR_GT_REQUIRED;
8499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8500 ctxt->sax->error(ctxt->userData,
8501 "Couldn't find end of Start Tag %s\n",
8502 name);
8503 ctxt->wellFormed = 0;
8504 ctxt->disableSAX = 1;
8505
8506 /*
8507 * end of parsing of this node.
8508 */
8509 nodePop(ctxt);
8510 oldname = namePop(ctxt);
8511 spacePop(ctxt);
8512 if (oldname != NULL) {
8513#ifdef DEBUG_STACK
8514 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8515#endif
8516 xmlFree(oldname);
8517 }
8518 }
8519 xmlFree(name);
8520 ctxt->instate = XML_PARSER_CONTENT;
8521#ifdef DEBUG_PUSH
8522 xmlGenericError(xmlGenericErrorContext,
8523 "PP: entering CONTENT\n");
8524#endif
8525 break;
8526 }
8527 case XML_PARSER_CONTENT: {
8528 const xmlChar *test;
8529 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008530 if ((avail < 2) && (ctxt->inputNr == 1))
8531 goto done;
8532 cur = ctxt->input->cur[0];
8533 next = ctxt->input->cur[1];
8534
8535 test = CUR_PTR;
8536 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008537 if ((cur == '<') && (next == '?')) {
8538 if ((!terminate) &&
8539 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8540 goto done;
8541#ifdef DEBUG_PUSH
8542 xmlGenericError(xmlGenericErrorContext,
8543 "PP: Parsing PI\n");
8544#endif
8545 xmlParsePI(ctxt);
8546 } else if ((cur == '<') && (next == '!') &&
8547 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8548 if ((!terminate) &&
8549 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8550 goto done;
8551#ifdef DEBUG_PUSH
8552 xmlGenericError(xmlGenericErrorContext,
8553 "PP: Parsing Comment\n");
8554#endif
8555 xmlParseComment(ctxt);
8556 ctxt->instate = XML_PARSER_CONTENT;
8557 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8558 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8559 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8560 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8561 (ctxt->input->cur[8] == '[')) {
8562 SKIP(9);
8563 ctxt->instate = XML_PARSER_CDATA_SECTION;
8564#ifdef DEBUG_PUSH
8565 xmlGenericError(xmlGenericErrorContext,
8566 "PP: entering CDATA_SECTION\n");
8567#endif
8568 break;
8569 } else if ((cur == '<') && (next == '!') &&
8570 (avail < 9)) {
8571 goto done;
8572 } else if ((cur == '<') && (next == '/')) {
8573 ctxt->instate = XML_PARSER_END_TAG;
8574#ifdef DEBUG_PUSH
8575 xmlGenericError(xmlGenericErrorContext,
8576 "PP: entering END_TAG\n");
8577#endif
8578 break;
8579 } else if (cur == '<') {
8580 ctxt->instate = XML_PARSER_START_TAG;
8581#ifdef DEBUG_PUSH
8582 xmlGenericError(xmlGenericErrorContext,
8583 "PP: entering START_TAG\n");
8584#endif
8585 break;
8586 } else if (cur == '&') {
8587 if ((!terminate) &&
8588 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8589 goto done;
8590#ifdef DEBUG_PUSH
8591 xmlGenericError(xmlGenericErrorContext,
8592 "PP: Parsing Reference\n");
8593#endif
8594 xmlParseReference(ctxt);
8595 } else {
8596 /* TODO Avoid the extra copy, handle directly !!! */
8597 /*
8598 * Goal of the following test is:
8599 * - minimize calls to the SAX 'character' callback
8600 * when they are mergeable
8601 * - handle an problem for isBlank when we only parse
8602 * a sequence of blank chars and the next one is
8603 * not available to check against '<' presence.
8604 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008605 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008606 * of the parser.
8607 */
8608 if ((ctxt->inputNr == 1) &&
8609 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8610 if ((!terminate) &&
8611 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8612 goto done;
8613 }
8614 ctxt->checkIndex = 0;
8615#ifdef DEBUG_PUSH
8616 xmlGenericError(xmlGenericErrorContext,
8617 "PP: Parsing char data\n");
8618#endif
8619 xmlParseCharData(ctxt, 0);
8620 }
8621 /*
8622 * Pop-up of finished entities.
8623 */
8624 while ((RAW == 0) && (ctxt->inputNr > 1))
8625 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008626 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008627 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8629 ctxt->sax->error(ctxt->userData,
8630 "detected an error in element content\n");
8631 ctxt->wellFormed = 0;
8632 ctxt->disableSAX = 1;
8633 ctxt->instate = XML_PARSER_EOF;
8634 break;
8635 }
8636 break;
8637 }
8638 case XML_PARSER_CDATA_SECTION: {
8639 /*
8640 * The Push mode need to have the SAX callback for
8641 * cdataBlock merge back contiguous callbacks.
8642 */
8643 int base;
8644
8645 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8646 if (base < 0) {
8647 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8648 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8649 if (ctxt->sax->cdataBlock != NULL)
8650 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8651 XML_PARSER_BIG_BUFFER_SIZE);
8652 }
8653 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8654 ctxt->checkIndex = 0;
8655 }
8656 goto done;
8657 } else {
8658 if ((ctxt->sax != NULL) && (base > 0) &&
8659 (!ctxt->disableSAX)) {
8660 if (ctxt->sax->cdataBlock != NULL)
8661 ctxt->sax->cdataBlock(ctxt->userData,
8662 ctxt->input->cur, base);
8663 }
8664 SKIP(base + 3);
8665 ctxt->checkIndex = 0;
8666 ctxt->instate = XML_PARSER_CONTENT;
8667#ifdef DEBUG_PUSH
8668 xmlGenericError(xmlGenericErrorContext,
8669 "PP: entering CONTENT\n");
8670#endif
8671 }
8672 break;
8673 }
8674 case XML_PARSER_END_TAG:
8675 if (avail < 2)
8676 goto done;
8677 if ((!terminate) &&
8678 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8679 goto done;
8680 xmlParseEndTag(ctxt);
8681 if (ctxt->name == NULL) {
8682 ctxt->instate = XML_PARSER_EPILOG;
8683#ifdef DEBUG_PUSH
8684 xmlGenericError(xmlGenericErrorContext,
8685 "PP: entering EPILOG\n");
8686#endif
8687 } else {
8688 ctxt->instate = XML_PARSER_CONTENT;
8689#ifdef DEBUG_PUSH
8690 xmlGenericError(xmlGenericErrorContext,
8691 "PP: entering CONTENT\n");
8692#endif
8693 }
8694 break;
8695 case XML_PARSER_DTD: {
8696 /*
8697 * Sorry but progressive parsing of the internal subset
8698 * is not expected to be supported. We first check that
8699 * the full content of the internal subset is available and
8700 * the parsing is launched only at that point.
8701 * Internal subset ends up with "']' S? '>'" in an unescaped
8702 * section and not in a ']]>' sequence which are conditional
8703 * sections (whoever argued to keep that crap in XML deserve
8704 * a place in hell !).
8705 */
8706 int base, i;
8707 xmlChar *buf;
8708 xmlChar quote = 0;
8709
8710 base = ctxt->input->cur - ctxt->input->base;
8711 if (base < 0) return(0);
8712 if (ctxt->checkIndex > base)
8713 base = ctxt->checkIndex;
8714 buf = ctxt->input->buf->buffer->content;
8715 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8716 base++) {
8717 if (quote != 0) {
8718 if (buf[base] == quote)
8719 quote = 0;
8720 continue;
8721 }
8722 if (buf[base] == '"') {
8723 quote = '"';
8724 continue;
8725 }
8726 if (buf[base] == '\'') {
8727 quote = '\'';
8728 continue;
8729 }
8730 if (buf[base] == ']') {
8731 if ((unsigned int) base +1 >=
8732 ctxt->input->buf->buffer->use)
8733 break;
8734 if (buf[base + 1] == ']') {
8735 /* conditional crap, skip both ']' ! */
8736 base++;
8737 continue;
8738 }
8739 for (i = 0;
8740 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8741 i++) {
8742 if (buf[base + i] == '>')
8743 goto found_end_int_subset;
8744 }
8745 break;
8746 }
8747 }
8748 /*
8749 * We didn't found the end of the Internal subset
8750 */
8751 if (quote == 0)
8752 ctxt->checkIndex = base;
8753#ifdef DEBUG_PUSH
8754 if (next == 0)
8755 xmlGenericError(xmlGenericErrorContext,
8756 "PP: lookup of int subset end filed\n");
8757#endif
8758 goto done;
8759
8760found_end_int_subset:
8761 xmlParseInternalSubset(ctxt);
8762 ctxt->inSubset = 2;
8763 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8764 (ctxt->sax->externalSubset != NULL))
8765 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8766 ctxt->extSubSystem, ctxt->extSubURI);
8767 ctxt->inSubset = 0;
8768 ctxt->instate = XML_PARSER_PROLOG;
8769 ctxt->checkIndex = 0;
8770#ifdef DEBUG_PUSH
8771 xmlGenericError(xmlGenericErrorContext,
8772 "PP: entering PROLOG\n");
8773#endif
8774 break;
8775 }
8776 case XML_PARSER_COMMENT:
8777 xmlGenericError(xmlGenericErrorContext,
8778 "PP: internal error, state == COMMENT\n");
8779 ctxt->instate = XML_PARSER_CONTENT;
8780#ifdef DEBUG_PUSH
8781 xmlGenericError(xmlGenericErrorContext,
8782 "PP: entering CONTENT\n");
8783#endif
8784 break;
8785 case XML_PARSER_PI:
8786 xmlGenericError(xmlGenericErrorContext,
8787 "PP: internal error, state == PI\n");
8788 ctxt->instate = XML_PARSER_CONTENT;
8789#ifdef DEBUG_PUSH
8790 xmlGenericError(xmlGenericErrorContext,
8791 "PP: entering CONTENT\n");
8792#endif
8793 break;
8794 case XML_PARSER_ENTITY_DECL:
8795 xmlGenericError(xmlGenericErrorContext,
8796 "PP: internal error, state == ENTITY_DECL\n");
8797 ctxt->instate = XML_PARSER_DTD;
8798#ifdef DEBUG_PUSH
8799 xmlGenericError(xmlGenericErrorContext,
8800 "PP: entering DTD\n");
8801#endif
8802 break;
8803 case XML_PARSER_ENTITY_VALUE:
8804 xmlGenericError(xmlGenericErrorContext,
8805 "PP: internal error, state == ENTITY_VALUE\n");
8806 ctxt->instate = XML_PARSER_CONTENT;
8807#ifdef DEBUG_PUSH
8808 xmlGenericError(xmlGenericErrorContext,
8809 "PP: entering DTD\n");
8810#endif
8811 break;
8812 case XML_PARSER_ATTRIBUTE_VALUE:
8813 xmlGenericError(xmlGenericErrorContext,
8814 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8815 ctxt->instate = XML_PARSER_START_TAG;
8816#ifdef DEBUG_PUSH
8817 xmlGenericError(xmlGenericErrorContext,
8818 "PP: entering START_TAG\n");
8819#endif
8820 break;
8821 case XML_PARSER_SYSTEM_LITERAL:
8822 xmlGenericError(xmlGenericErrorContext,
8823 "PP: internal error, state == SYSTEM_LITERAL\n");
8824 ctxt->instate = XML_PARSER_START_TAG;
8825#ifdef DEBUG_PUSH
8826 xmlGenericError(xmlGenericErrorContext,
8827 "PP: entering START_TAG\n");
8828#endif
8829 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008830 case XML_PARSER_PUBLIC_LITERAL:
8831 xmlGenericError(xmlGenericErrorContext,
8832 "PP: internal error, state == PUBLIC_LITERAL\n");
8833 ctxt->instate = XML_PARSER_START_TAG;
8834#ifdef DEBUG_PUSH
8835 xmlGenericError(xmlGenericErrorContext,
8836 "PP: entering START_TAG\n");
8837#endif
8838 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008839 }
8840 }
8841done:
8842#ifdef DEBUG_PUSH
8843 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8844#endif
8845 return(ret);
8846}
8847
8848/**
Owen Taylor3473f882001-02-23 17:55:21 +00008849 * xmlParseChunk:
8850 * @ctxt: an XML parser context
8851 * @chunk: an char array
8852 * @size: the size in byte of the chunk
8853 * @terminate: last chunk indicator
8854 *
8855 * Parse a Chunk of memory
8856 *
8857 * Returns zero if no error, the xmlParserErrors otherwise.
8858 */
8859int
8860xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8861 int terminate) {
8862 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8863 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8864 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8865 int cur = ctxt->input->cur - ctxt->input->base;
8866
8867 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8868 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8869 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008870 ctxt->input->end =
8871 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008872#ifdef DEBUG_PUSH
8873 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8874#endif
8875
8876 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8877 xmlParseTryOrFinish(ctxt, terminate);
8878 } else if (ctxt->instate != XML_PARSER_EOF) {
8879 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8880 xmlParserInputBufferPtr in = ctxt->input->buf;
8881 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8882 (in->raw != NULL)) {
8883 int nbchars;
8884
8885 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8886 if (nbchars < 0) {
8887 xmlGenericError(xmlGenericErrorContext,
8888 "xmlParseChunk: encoder error\n");
8889 return(XML_ERR_INVALID_ENCODING);
8890 }
8891 }
8892 }
8893 }
8894 xmlParseTryOrFinish(ctxt, terminate);
8895 if (terminate) {
8896 /*
8897 * Check for termination
8898 */
8899 if ((ctxt->instate != XML_PARSER_EOF) &&
8900 (ctxt->instate != XML_PARSER_EPILOG)) {
8901 ctxt->errNo = XML_ERR_DOCUMENT_END;
8902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8903 ctxt->sax->error(ctxt->userData,
8904 "Extra content at the end of the document\n");
8905 ctxt->wellFormed = 0;
8906 ctxt->disableSAX = 1;
8907 }
8908 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008909 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008910 ctxt->sax->endDocument(ctxt->userData);
8911 }
8912 ctxt->instate = XML_PARSER_EOF;
8913 }
8914 return((xmlParserErrors) ctxt->errNo);
8915}
8916
8917/************************************************************************
8918 * *
8919 * I/O front end functions to the parser *
8920 * *
8921 ************************************************************************/
8922
8923/**
8924 * xmlStopParser:
8925 * @ctxt: an XML parser context
8926 *
8927 * Blocks further parser processing
8928 */
8929void
8930xmlStopParser(xmlParserCtxtPtr ctxt) {
8931 ctxt->instate = XML_PARSER_EOF;
8932 if (ctxt->input != NULL)
8933 ctxt->input->cur = BAD_CAST"";
8934}
8935
8936/**
8937 * xmlCreatePushParserCtxt:
8938 * @sax: a SAX handler
8939 * @user_data: The user data returned on SAX callbacks
8940 * @chunk: a pointer to an array of chars
8941 * @size: number of chars in the array
8942 * @filename: an optional file name or URI
8943 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00008944 * Create a parser context for using the XML parser in push mode.
8945 * If @buffer and @size are non-NULL, the data is used to detect
8946 * the encoding. The remaining characters will be parsed so they
8947 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00008948 * To allow content encoding detection, @size should be >= 4
8949 * The value of @filename is used for fetching external entities
8950 * and error/warning reports.
8951 *
8952 * Returns the new parser context or NULL
8953 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00008954
Owen Taylor3473f882001-02-23 17:55:21 +00008955xmlParserCtxtPtr
8956xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8957 const char *chunk, int size, const char *filename) {
8958 xmlParserCtxtPtr ctxt;
8959 xmlParserInputPtr inputStream;
8960 xmlParserInputBufferPtr buf;
8961 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8962
8963 /*
8964 * plug some encoding conversion routines
8965 */
8966 if ((chunk != NULL) && (size >= 4))
8967 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8968
8969 buf = xmlAllocParserInputBuffer(enc);
8970 if (buf == NULL) return(NULL);
8971
8972 ctxt = xmlNewParserCtxt();
8973 if (ctxt == NULL) {
8974 xmlFree(buf);
8975 return(NULL);
8976 }
8977 if (sax != NULL) {
8978 if (ctxt->sax != &xmlDefaultSAXHandler)
8979 xmlFree(ctxt->sax);
8980 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8981 if (ctxt->sax == NULL) {
8982 xmlFree(buf);
8983 xmlFree(ctxt);
8984 return(NULL);
8985 }
8986 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8987 if (user_data != NULL)
8988 ctxt->userData = user_data;
8989 }
8990 if (filename == NULL) {
8991 ctxt->directory = NULL;
8992 } else {
8993 ctxt->directory = xmlParserGetDirectory(filename);
8994 }
8995
8996 inputStream = xmlNewInputStream(ctxt);
8997 if (inputStream == NULL) {
8998 xmlFreeParserCtxt(ctxt);
8999 return(NULL);
9000 }
9001
9002 if (filename == NULL)
9003 inputStream->filename = NULL;
9004 else
9005 inputStream->filename = xmlMemStrdup(filename);
9006 inputStream->buf = buf;
9007 inputStream->base = inputStream->buf->buffer->content;
9008 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009009 inputStream->end =
9010 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009011
9012 inputPush(ctxt, inputStream);
9013
9014 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9015 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009016 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9017 int cur = ctxt->input->cur - ctxt->input->base;
9018
Owen Taylor3473f882001-02-23 17:55:21 +00009019 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009020
9021 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9022 ctxt->input->cur = ctxt->input->base + cur;
9023 ctxt->input->end =
9024 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009025#ifdef DEBUG_PUSH
9026 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9027#endif
9028 }
9029
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009030 if (enc != XML_CHAR_ENCODING_NONE) {
9031 xmlSwitchEncoding(ctxt, enc);
9032 }
9033
Owen Taylor3473f882001-02-23 17:55:21 +00009034 return(ctxt);
9035}
9036
9037/**
9038 * xmlCreateIOParserCtxt:
9039 * @sax: a SAX handler
9040 * @user_data: The user data returned on SAX callbacks
9041 * @ioread: an I/O read function
9042 * @ioclose: an I/O close function
9043 * @ioctx: an I/O handler
9044 * @enc: the charset encoding if known
9045 *
9046 * Create a parser context for using the XML parser with an existing
9047 * I/O stream
9048 *
9049 * Returns the new parser context or NULL
9050 */
9051xmlParserCtxtPtr
9052xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9053 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9054 void *ioctx, xmlCharEncoding enc) {
9055 xmlParserCtxtPtr ctxt;
9056 xmlParserInputPtr inputStream;
9057 xmlParserInputBufferPtr buf;
9058
9059 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9060 if (buf == NULL) return(NULL);
9061
9062 ctxt = xmlNewParserCtxt();
9063 if (ctxt == NULL) {
9064 xmlFree(buf);
9065 return(NULL);
9066 }
9067 if (sax != NULL) {
9068 if (ctxt->sax != &xmlDefaultSAXHandler)
9069 xmlFree(ctxt->sax);
9070 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9071 if (ctxt->sax == NULL) {
9072 xmlFree(buf);
9073 xmlFree(ctxt);
9074 return(NULL);
9075 }
9076 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9077 if (user_data != NULL)
9078 ctxt->userData = user_data;
9079 }
9080
9081 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9082 if (inputStream == NULL) {
9083 xmlFreeParserCtxt(ctxt);
9084 return(NULL);
9085 }
9086 inputPush(ctxt, inputStream);
9087
9088 return(ctxt);
9089}
9090
9091/************************************************************************
9092 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009093 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009094 * *
9095 ************************************************************************/
9096
9097/**
9098 * xmlIOParseDTD:
9099 * @sax: the SAX handler block or NULL
9100 * @input: an Input Buffer
9101 * @enc: the charset encoding if known
9102 *
9103 * Load and parse a DTD
9104 *
9105 * Returns the resulting xmlDtdPtr or NULL in case of error.
9106 * @input will be freed at parsing end.
9107 */
9108
9109xmlDtdPtr
9110xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9111 xmlCharEncoding enc) {
9112 xmlDtdPtr ret = NULL;
9113 xmlParserCtxtPtr ctxt;
9114 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009115 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009116
9117 if (input == NULL)
9118 return(NULL);
9119
9120 ctxt = xmlNewParserCtxt();
9121 if (ctxt == NULL) {
9122 return(NULL);
9123 }
9124
9125 /*
9126 * Set-up the SAX context
9127 */
9128 if (sax != NULL) {
9129 if (ctxt->sax != NULL)
9130 xmlFree(ctxt->sax);
9131 ctxt->sax = sax;
9132 ctxt->userData = NULL;
9133 }
9134
9135 /*
9136 * generate a parser input from the I/O handler
9137 */
9138
9139 pinput = xmlNewIOInputStream(ctxt, input, enc);
9140 if (pinput == NULL) {
9141 if (sax != NULL) ctxt->sax = NULL;
9142 xmlFreeParserCtxt(ctxt);
9143 return(NULL);
9144 }
9145
9146 /*
9147 * plug some encoding conversion routines here.
9148 */
9149 xmlPushInput(ctxt, pinput);
9150
9151 pinput->filename = NULL;
9152 pinput->line = 1;
9153 pinput->col = 1;
9154 pinput->base = ctxt->input->cur;
9155 pinput->cur = ctxt->input->cur;
9156 pinput->free = NULL;
9157
9158 /*
9159 * let's parse that entity knowing it's an external subset.
9160 */
9161 ctxt->inSubset = 2;
9162 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9163 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9164 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009165
9166 if (enc == XML_CHAR_ENCODING_NONE) {
9167 /*
9168 * Get the 4 first bytes and decode the charset
9169 * if enc != XML_CHAR_ENCODING_NONE
9170 * plug some encoding conversion routines.
9171 */
9172 start[0] = RAW;
9173 start[1] = NXT(1);
9174 start[2] = NXT(2);
9175 start[3] = NXT(3);
9176 enc = xmlDetectCharEncoding(start, 4);
9177 if (enc != XML_CHAR_ENCODING_NONE) {
9178 xmlSwitchEncoding(ctxt, enc);
9179 }
9180 }
9181
Owen Taylor3473f882001-02-23 17:55:21 +00009182 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9183
9184 if (ctxt->myDoc != NULL) {
9185 if (ctxt->wellFormed) {
9186 ret = ctxt->myDoc->extSubset;
9187 ctxt->myDoc->extSubset = NULL;
9188 } else {
9189 ret = NULL;
9190 }
9191 xmlFreeDoc(ctxt->myDoc);
9192 ctxt->myDoc = NULL;
9193 }
9194 if (sax != NULL) ctxt->sax = NULL;
9195 xmlFreeParserCtxt(ctxt);
9196
9197 return(ret);
9198}
9199
9200/**
9201 * xmlSAXParseDTD:
9202 * @sax: the SAX handler block
9203 * @ExternalID: a NAME* containing the External ID of the DTD
9204 * @SystemID: a NAME* containing the URL to the DTD
9205 *
9206 * Load and parse an external subset.
9207 *
9208 * Returns the resulting xmlDtdPtr or NULL in case of error.
9209 */
9210
9211xmlDtdPtr
9212xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9213 const xmlChar *SystemID) {
9214 xmlDtdPtr ret = NULL;
9215 xmlParserCtxtPtr ctxt;
9216 xmlParserInputPtr input = NULL;
9217 xmlCharEncoding enc;
9218
9219 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9220
9221 ctxt = xmlNewParserCtxt();
9222 if (ctxt == NULL) {
9223 return(NULL);
9224 }
9225
9226 /*
9227 * Set-up the SAX context
9228 */
9229 if (sax != NULL) {
9230 if (ctxt->sax != NULL)
9231 xmlFree(ctxt->sax);
9232 ctxt->sax = sax;
9233 ctxt->userData = NULL;
9234 }
9235
9236 /*
9237 * Ask the Entity resolver to load the damn thing
9238 */
9239
9240 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9241 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9242 if (input == NULL) {
9243 if (sax != NULL) ctxt->sax = NULL;
9244 xmlFreeParserCtxt(ctxt);
9245 return(NULL);
9246 }
9247
9248 /*
9249 * plug some encoding conversion routines here.
9250 */
9251 xmlPushInput(ctxt, input);
9252 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9253 xmlSwitchEncoding(ctxt, enc);
9254
9255 if (input->filename == NULL)
9256 input->filename = (char *) xmlStrdup(SystemID);
9257 input->line = 1;
9258 input->col = 1;
9259 input->base = ctxt->input->cur;
9260 input->cur = ctxt->input->cur;
9261 input->free = NULL;
9262
9263 /*
9264 * let's parse that entity knowing it's an external subset.
9265 */
9266 ctxt->inSubset = 2;
9267 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9268 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9269 ExternalID, SystemID);
9270 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9271
9272 if (ctxt->myDoc != NULL) {
9273 if (ctxt->wellFormed) {
9274 ret = ctxt->myDoc->extSubset;
9275 ctxt->myDoc->extSubset = NULL;
9276 } else {
9277 ret = NULL;
9278 }
9279 xmlFreeDoc(ctxt->myDoc);
9280 ctxt->myDoc = NULL;
9281 }
9282 if (sax != NULL) ctxt->sax = NULL;
9283 xmlFreeParserCtxt(ctxt);
9284
9285 return(ret);
9286}
9287
9288/**
9289 * xmlParseDTD:
9290 * @ExternalID: a NAME* containing the External ID of the DTD
9291 * @SystemID: a NAME* containing the URL to the DTD
9292 *
9293 * Load and parse an external subset.
9294 *
9295 * Returns the resulting xmlDtdPtr or NULL in case of error.
9296 */
9297
9298xmlDtdPtr
9299xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9300 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9301}
9302
9303/************************************************************************
9304 * *
9305 * Front ends when parsing an Entity *
9306 * *
9307 ************************************************************************/
9308
9309/**
Owen Taylor3473f882001-02-23 17:55:21 +00009310 * xmlParseCtxtExternalEntity:
9311 * @ctx: the existing parsing context
9312 * @URL: the URL for the entity to load
9313 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009314 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009315 *
9316 * Parse an external general entity within an existing parsing context
9317 * An external general parsed entity is well-formed if it matches the
9318 * production labeled extParsedEnt.
9319 *
9320 * [78] extParsedEnt ::= TextDecl? content
9321 *
9322 * Returns 0 if the entity is well formed, -1 in case of args problem and
9323 * the parser error code otherwise
9324 */
9325
9326int
9327xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009328 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009329 xmlParserCtxtPtr ctxt;
9330 xmlDocPtr newDoc;
9331 xmlSAXHandlerPtr oldsax = NULL;
9332 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009333 xmlChar start[4];
9334 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009335
9336 if (ctx->depth > 40) {
9337 return(XML_ERR_ENTITY_LOOP);
9338 }
9339
Daniel Veillardcda96922001-08-21 10:56:31 +00009340 if (lst != NULL)
9341 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009342 if ((URL == NULL) && (ID == NULL))
9343 return(-1);
9344 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9345 return(-1);
9346
9347
9348 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9349 if (ctxt == NULL) return(-1);
9350 ctxt->userData = ctxt;
9351 oldsax = ctxt->sax;
9352 ctxt->sax = ctx->sax;
9353 newDoc = xmlNewDoc(BAD_CAST "1.0");
9354 if (newDoc == NULL) {
9355 xmlFreeParserCtxt(ctxt);
9356 return(-1);
9357 }
9358 if (ctx->myDoc != NULL) {
9359 newDoc->intSubset = ctx->myDoc->intSubset;
9360 newDoc->extSubset = ctx->myDoc->extSubset;
9361 }
9362 if (ctx->myDoc->URL != NULL) {
9363 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9364 }
9365 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9366 if (newDoc->children == NULL) {
9367 ctxt->sax = oldsax;
9368 xmlFreeParserCtxt(ctxt);
9369 newDoc->intSubset = NULL;
9370 newDoc->extSubset = NULL;
9371 xmlFreeDoc(newDoc);
9372 return(-1);
9373 }
9374 nodePush(ctxt, newDoc->children);
9375 if (ctx->myDoc == NULL) {
9376 ctxt->myDoc = newDoc;
9377 } else {
9378 ctxt->myDoc = ctx->myDoc;
9379 newDoc->children->doc = ctx->myDoc;
9380 }
9381
Daniel Veillard87a764e2001-06-20 17:41:10 +00009382 /*
9383 * Get the 4 first bytes and decode the charset
9384 * if enc != XML_CHAR_ENCODING_NONE
9385 * plug some encoding conversion routines.
9386 */
9387 GROW
9388 start[0] = RAW;
9389 start[1] = NXT(1);
9390 start[2] = NXT(2);
9391 start[3] = NXT(3);
9392 enc = xmlDetectCharEncoding(start, 4);
9393 if (enc != XML_CHAR_ENCODING_NONE) {
9394 xmlSwitchEncoding(ctxt, enc);
9395 }
9396
Owen Taylor3473f882001-02-23 17:55:21 +00009397 /*
9398 * Parse a possible text declaration first
9399 */
Owen Taylor3473f882001-02-23 17:55:21 +00009400 if ((RAW == '<') && (NXT(1) == '?') &&
9401 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9402 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9403 xmlParseTextDecl(ctxt);
9404 }
9405
9406 /*
9407 * Doing validity checking on chunk doesn't make sense
9408 */
9409 ctxt->instate = XML_PARSER_CONTENT;
9410 ctxt->validate = ctx->validate;
9411 ctxt->loadsubset = ctx->loadsubset;
9412 ctxt->depth = ctx->depth + 1;
9413 ctxt->replaceEntities = ctx->replaceEntities;
9414 if (ctxt->validate) {
9415 ctxt->vctxt.error = ctx->vctxt.error;
9416 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009417 } else {
9418 ctxt->vctxt.error = NULL;
9419 ctxt->vctxt.warning = NULL;
9420 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009421 ctxt->vctxt.nodeTab = NULL;
9422 ctxt->vctxt.nodeNr = 0;
9423 ctxt->vctxt.nodeMax = 0;
9424 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009425
9426 xmlParseContent(ctxt);
9427
9428 if ((RAW == '<') && (NXT(1) == '/')) {
9429 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9431 ctxt->sax->error(ctxt->userData,
9432 "chunk is not well balanced\n");
9433 ctxt->wellFormed = 0;
9434 ctxt->disableSAX = 1;
9435 } else if (RAW != 0) {
9436 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9437 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9438 ctxt->sax->error(ctxt->userData,
9439 "extra content at the end of well balanced chunk\n");
9440 ctxt->wellFormed = 0;
9441 ctxt->disableSAX = 1;
9442 }
9443 if (ctxt->node != newDoc->children) {
9444 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9445 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9446 ctxt->sax->error(ctxt->userData,
9447 "chunk is not well balanced\n");
9448 ctxt->wellFormed = 0;
9449 ctxt->disableSAX = 1;
9450 }
9451
9452 if (!ctxt->wellFormed) {
9453 if (ctxt->errNo == 0)
9454 ret = 1;
9455 else
9456 ret = ctxt->errNo;
9457 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009458 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009459 xmlNodePtr cur;
9460
9461 /*
9462 * Return the newly created nodeset after unlinking it from
9463 * they pseudo parent.
9464 */
9465 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009466 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009467 while (cur != NULL) {
9468 cur->parent = NULL;
9469 cur = cur->next;
9470 }
9471 newDoc->children->children = NULL;
9472 }
9473 ret = 0;
9474 }
9475 ctxt->sax = oldsax;
9476 xmlFreeParserCtxt(ctxt);
9477 newDoc->intSubset = NULL;
9478 newDoc->extSubset = NULL;
9479 xmlFreeDoc(newDoc);
9480
9481 return(ret);
9482}
9483
9484/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009485 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009486 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009487 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009488 * @sax: the SAX handler bloc (possibly NULL)
9489 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9490 * @depth: Used for loop detection, use 0
9491 * @URL: the URL for the entity to load
9492 * @ID: the System ID for the entity to load
9493 * @list: the return value for the set of parsed nodes
9494 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009495 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009496 *
9497 * Returns 0 if the entity is well formed, -1 in case of args problem and
9498 * the parser error code otherwise
9499 */
9500
Daniel Veillard257d9102001-05-08 10:41:44 +00009501static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009502xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9503 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009504 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009505 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009506 xmlParserCtxtPtr ctxt;
9507 xmlDocPtr newDoc;
9508 xmlSAXHandlerPtr oldsax = NULL;
9509 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009510 xmlChar start[4];
9511 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009512
9513 if (depth > 40) {
9514 return(XML_ERR_ENTITY_LOOP);
9515 }
9516
9517
9518
9519 if (list != NULL)
9520 *list = NULL;
9521 if ((URL == NULL) && (ID == NULL))
9522 return(-1);
9523 if (doc == NULL) /* @@ relax but check for dereferences */
9524 return(-1);
9525
9526
9527 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9528 if (ctxt == NULL) return(-1);
9529 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009530 if (oldctxt != NULL) {
9531 ctxt->_private = oldctxt->_private;
9532 ctxt->loadsubset = oldctxt->loadsubset;
9533 ctxt->validate = oldctxt->validate;
9534 ctxt->external = oldctxt->external;
9535 } else {
9536 /*
9537 * Doing validity checking on chunk without context
9538 * doesn't make sense
9539 */
9540 ctxt->_private = NULL;
9541 ctxt->validate = 0;
9542 ctxt->external = 2;
9543 ctxt->loadsubset = 0;
9544 }
Owen Taylor3473f882001-02-23 17:55:21 +00009545 if (sax != NULL) {
9546 oldsax = ctxt->sax;
9547 ctxt->sax = sax;
9548 if (user_data != NULL)
9549 ctxt->userData = user_data;
9550 }
9551 newDoc = xmlNewDoc(BAD_CAST "1.0");
9552 if (newDoc == NULL) {
9553 xmlFreeParserCtxt(ctxt);
9554 return(-1);
9555 }
9556 if (doc != NULL) {
9557 newDoc->intSubset = doc->intSubset;
9558 newDoc->extSubset = doc->extSubset;
9559 }
9560 if (doc->URL != NULL) {
9561 newDoc->URL = xmlStrdup(doc->URL);
9562 }
9563 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9564 if (newDoc->children == NULL) {
9565 if (sax != NULL)
9566 ctxt->sax = oldsax;
9567 xmlFreeParserCtxt(ctxt);
9568 newDoc->intSubset = NULL;
9569 newDoc->extSubset = NULL;
9570 xmlFreeDoc(newDoc);
9571 return(-1);
9572 }
9573 nodePush(ctxt, newDoc->children);
9574 if (doc == NULL) {
9575 ctxt->myDoc = newDoc;
9576 } else {
9577 ctxt->myDoc = doc;
9578 newDoc->children->doc = doc;
9579 }
9580
Daniel Veillard87a764e2001-06-20 17:41:10 +00009581 /*
9582 * Get the 4 first bytes and decode the charset
9583 * if enc != XML_CHAR_ENCODING_NONE
9584 * plug some encoding conversion routines.
9585 */
9586 GROW;
9587 start[0] = RAW;
9588 start[1] = NXT(1);
9589 start[2] = NXT(2);
9590 start[3] = NXT(3);
9591 enc = xmlDetectCharEncoding(start, 4);
9592 if (enc != XML_CHAR_ENCODING_NONE) {
9593 xmlSwitchEncoding(ctxt, enc);
9594 }
9595
Owen Taylor3473f882001-02-23 17:55:21 +00009596 /*
9597 * Parse a possible text declaration first
9598 */
Owen Taylor3473f882001-02-23 17:55:21 +00009599 if ((RAW == '<') && (NXT(1) == '?') &&
9600 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9601 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9602 xmlParseTextDecl(ctxt);
9603 }
9604
Owen Taylor3473f882001-02-23 17:55:21 +00009605 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009606 ctxt->depth = depth;
9607
9608 xmlParseContent(ctxt);
9609
Daniel Veillard561b7f82002-03-20 21:55:57 +00009610 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009611 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9613 ctxt->sax->error(ctxt->userData,
9614 "chunk is not well balanced\n");
9615 ctxt->wellFormed = 0;
9616 ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009617 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009618 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9620 ctxt->sax->error(ctxt->userData,
9621 "extra content at the end of well balanced chunk\n");
9622 ctxt->wellFormed = 0;
9623 ctxt->disableSAX = 1;
9624 }
9625 if (ctxt->node != newDoc->children) {
9626 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9628 ctxt->sax->error(ctxt->userData,
9629 "chunk is not well balanced\n");
9630 ctxt->wellFormed = 0;
9631 ctxt->disableSAX = 1;
9632 }
9633
9634 if (!ctxt->wellFormed) {
9635 if (ctxt->errNo == 0)
9636 ret = 1;
9637 else
9638 ret = ctxt->errNo;
9639 } else {
9640 if (list != NULL) {
9641 xmlNodePtr cur;
9642
9643 /*
9644 * Return the newly created nodeset after unlinking it from
9645 * they pseudo parent.
9646 */
9647 cur = newDoc->children->children;
9648 *list = cur;
9649 while (cur != NULL) {
9650 cur->parent = NULL;
9651 cur = cur->next;
9652 }
9653 newDoc->children->children = NULL;
9654 }
9655 ret = 0;
9656 }
9657 if (sax != NULL)
9658 ctxt->sax = oldsax;
9659 xmlFreeParserCtxt(ctxt);
9660 newDoc->intSubset = NULL;
9661 newDoc->extSubset = NULL;
9662 xmlFreeDoc(newDoc);
9663
9664 return(ret);
9665}
9666
9667/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009668 * xmlParseExternalEntity:
9669 * @doc: the document the chunk pertains to
9670 * @sax: the SAX handler bloc (possibly NULL)
9671 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9672 * @depth: Used for loop detection, use 0
9673 * @URL: the URL for the entity to load
9674 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009675 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009676 *
9677 * Parse an external general entity
9678 * An external general parsed entity is well-formed if it matches the
9679 * production labeled extParsedEnt.
9680 *
9681 * [78] extParsedEnt ::= TextDecl? content
9682 *
9683 * Returns 0 if the entity is well formed, -1 in case of args problem and
9684 * the parser error code otherwise
9685 */
9686
9687int
9688xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009689 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009690 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009691 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009692}
9693
9694/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009695 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009696 * @doc: the document the chunk pertains to
9697 * @sax: the SAX handler bloc (possibly NULL)
9698 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9699 * @depth: Used for loop detection, use 0
9700 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009701 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009702 *
9703 * Parse a well-balanced chunk of an XML document
9704 * called by the parser
9705 * The allowed sequence for the Well Balanced Chunk is the one defined by
9706 * the content production in the XML grammar:
9707 *
9708 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9709 *
9710 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9711 * the parser error code otherwise
9712 */
9713
9714int
9715xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009716 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009717 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9718 depth, string, lst, 0 );
9719}
9720
9721/**
9722 * xmlParseBalancedChunkMemoryRecover:
9723 * @doc: the document the chunk pertains to
9724 * @sax: the SAX handler bloc (possibly NULL)
9725 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9726 * @depth: Used for loop detection, use 0
9727 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9728 * @lst: the return value for the set of parsed nodes
9729 * @recover: return nodes even if the data is broken (use 0)
9730 *
9731 *
9732 * Parse a well-balanced chunk of an XML document
9733 * called by the parser
9734 * The allowed sequence for the Well Balanced Chunk is the one defined by
9735 * the content production in the XML grammar:
9736 *
9737 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9738 *
9739 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9740 * the parser error code otherwise
9741 *
9742 * In case recover is set to 1, the nodelist will not be empty even if
9743 * the parsed chunk is not well balanced.
9744 */
9745int
9746xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9747 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
9748 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +00009749 xmlParserCtxtPtr ctxt;
9750 xmlDocPtr newDoc;
9751 xmlSAXHandlerPtr oldsax = NULL;
9752 int size;
9753 int ret = 0;
9754
9755 if (depth > 40) {
9756 return(XML_ERR_ENTITY_LOOP);
9757 }
9758
9759
Daniel Veillardcda96922001-08-21 10:56:31 +00009760 if (lst != NULL)
9761 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009762 if (string == NULL)
9763 return(-1);
9764
9765 size = xmlStrlen(string);
9766
9767 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9768 if (ctxt == NULL) return(-1);
9769 ctxt->userData = ctxt;
9770 if (sax != NULL) {
9771 oldsax = ctxt->sax;
9772 ctxt->sax = sax;
9773 if (user_data != NULL)
9774 ctxt->userData = user_data;
9775 }
9776 newDoc = xmlNewDoc(BAD_CAST "1.0");
9777 if (newDoc == NULL) {
9778 xmlFreeParserCtxt(ctxt);
9779 return(-1);
9780 }
9781 if (doc != NULL) {
9782 newDoc->intSubset = doc->intSubset;
9783 newDoc->extSubset = doc->extSubset;
9784 }
9785 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9786 if (newDoc->children == NULL) {
9787 if (sax != NULL)
9788 ctxt->sax = oldsax;
9789 xmlFreeParserCtxt(ctxt);
9790 newDoc->intSubset = NULL;
9791 newDoc->extSubset = NULL;
9792 xmlFreeDoc(newDoc);
9793 return(-1);
9794 }
9795 nodePush(ctxt, newDoc->children);
9796 if (doc == NULL) {
9797 ctxt->myDoc = newDoc;
9798 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +00009799 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +00009800 newDoc->children->doc = doc;
9801 }
9802 ctxt->instate = XML_PARSER_CONTENT;
9803 ctxt->depth = depth;
9804
9805 /*
9806 * Doing validity checking on chunk doesn't make sense
9807 */
9808 ctxt->validate = 0;
9809 ctxt->loadsubset = 0;
9810
9811 xmlParseContent(ctxt);
9812
9813 if ((RAW == '<') && (NXT(1) == '/')) {
9814 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9815 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9816 ctxt->sax->error(ctxt->userData,
9817 "chunk is not well balanced\n");
9818 ctxt->wellFormed = 0;
9819 ctxt->disableSAX = 1;
9820 } else if (RAW != 0) {
9821 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9823 ctxt->sax->error(ctxt->userData,
9824 "extra content at the end of well balanced chunk\n");
9825 ctxt->wellFormed = 0;
9826 ctxt->disableSAX = 1;
9827 }
9828 if (ctxt->node != newDoc->children) {
9829 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9831 ctxt->sax->error(ctxt->userData,
9832 "chunk is not well balanced\n");
9833 ctxt->wellFormed = 0;
9834 ctxt->disableSAX = 1;
9835 }
9836
9837 if (!ctxt->wellFormed) {
9838 if (ctxt->errNo == 0)
9839 ret = 1;
9840 else
9841 ret = ctxt->errNo;
9842 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009843 ret = 0;
9844 }
9845
9846 if (lst != NULL && (ret == 0 || recover == 1)) {
9847 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009848
9849 /*
9850 * Return the newly created nodeset after unlinking it from
9851 * they pseudo parent.
9852 */
9853 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009854 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009855 while (cur != NULL) {
9856 cur->parent = NULL;
9857 cur = cur->next;
9858 }
9859 newDoc->children->children = NULL;
9860 }
Daniel Veillard58e44c92002-08-02 22:19:49 +00009861
Owen Taylor3473f882001-02-23 17:55:21 +00009862 if (sax != NULL)
9863 ctxt->sax = oldsax;
9864 xmlFreeParserCtxt(ctxt);
9865 newDoc->intSubset = NULL;
9866 newDoc->extSubset = NULL;
9867 xmlFreeDoc(newDoc);
9868
9869 return(ret);
9870}
9871
9872/**
9873 * xmlSAXParseEntity:
9874 * @sax: the SAX handler block
9875 * @filename: the filename
9876 *
9877 * parse an XML external entity out of context and build a tree.
9878 * It use the given SAX function block to handle the parsing callback.
9879 * If sax is NULL, fallback to the default DOM tree building routines.
9880 *
9881 * [78] extParsedEnt ::= TextDecl? content
9882 *
9883 * This correspond to a "Well Balanced" chunk
9884 *
9885 * Returns the resulting document tree
9886 */
9887
9888xmlDocPtr
9889xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9890 xmlDocPtr ret;
9891 xmlParserCtxtPtr ctxt;
9892 char *directory = NULL;
9893
9894 ctxt = xmlCreateFileParserCtxt(filename);
9895 if (ctxt == NULL) {
9896 return(NULL);
9897 }
9898 if (sax != NULL) {
9899 if (ctxt->sax != NULL)
9900 xmlFree(ctxt->sax);
9901 ctxt->sax = sax;
9902 ctxt->userData = NULL;
9903 }
9904
9905 if ((ctxt->directory == NULL) && (directory == NULL))
9906 directory = xmlParserGetDirectory(filename);
9907
9908 xmlParseExtParsedEnt(ctxt);
9909
9910 if (ctxt->wellFormed)
9911 ret = ctxt->myDoc;
9912 else {
9913 ret = NULL;
9914 xmlFreeDoc(ctxt->myDoc);
9915 ctxt->myDoc = NULL;
9916 }
9917 if (sax != NULL)
9918 ctxt->sax = NULL;
9919 xmlFreeParserCtxt(ctxt);
9920
9921 return(ret);
9922}
9923
9924/**
9925 * xmlParseEntity:
9926 * @filename: the filename
9927 *
9928 * parse an XML external entity out of context and build a tree.
9929 *
9930 * [78] extParsedEnt ::= TextDecl? content
9931 *
9932 * This correspond to a "Well Balanced" chunk
9933 *
9934 * Returns the resulting document tree
9935 */
9936
9937xmlDocPtr
9938xmlParseEntity(const char *filename) {
9939 return(xmlSAXParseEntity(NULL, filename));
9940}
9941
9942/**
9943 * xmlCreateEntityParserCtxt:
9944 * @URL: the entity URL
9945 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009946 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009947 *
9948 * Create a parser context for an external entity
9949 * Automatic support for ZLIB/Compress compressed document is provided
9950 * by default if found at compile-time.
9951 *
9952 * Returns the new parser context or NULL
9953 */
9954xmlParserCtxtPtr
9955xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9956 const xmlChar *base) {
9957 xmlParserCtxtPtr ctxt;
9958 xmlParserInputPtr inputStream;
9959 char *directory = NULL;
9960 xmlChar *uri;
9961
9962 ctxt = xmlNewParserCtxt();
9963 if (ctxt == NULL) {
9964 return(NULL);
9965 }
9966
9967 uri = xmlBuildURI(URL, base);
9968
9969 if (uri == NULL) {
9970 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9971 if (inputStream == NULL) {
9972 xmlFreeParserCtxt(ctxt);
9973 return(NULL);
9974 }
9975
9976 inputPush(ctxt, inputStream);
9977
9978 if ((ctxt->directory == NULL) && (directory == NULL))
9979 directory = xmlParserGetDirectory((char *)URL);
9980 if ((ctxt->directory == NULL) && (directory != NULL))
9981 ctxt->directory = directory;
9982 } else {
9983 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9984 if (inputStream == NULL) {
9985 xmlFree(uri);
9986 xmlFreeParserCtxt(ctxt);
9987 return(NULL);
9988 }
9989
9990 inputPush(ctxt, inputStream);
9991
9992 if ((ctxt->directory == NULL) && (directory == NULL))
9993 directory = xmlParserGetDirectory((char *)uri);
9994 if ((ctxt->directory == NULL) && (directory != NULL))
9995 ctxt->directory = directory;
9996 xmlFree(uri);
9997 }
9998
9999 return(ctxt);
10000}
10001
10002/************************************************************************
10003 * *
10004 * Front ends when parsing from a file *
10005 * *
10006 ************************************************************************/
10007
10008/**
10009 * xmlCreateFileParserCtxt:
10010 * @filename: the filename
10011 *
10012 * Create a parser context for a file content.
10013 * Automatic support for ZLIB/Compress compressed document is provided
10014 * by default if found at compile-time.
10015 *
10016 * Returns the new parser context or NULL
10017 */
10018xmlParserCtxtPtr
10019xmlCreateFileParserCtxt(const char *filename)
10020{
10021 xmlParserCtxtPtr ctxt;
10022 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010023 char *directory = NULL;
10024
Owen Taylor3473f882001-02-23 17:55:21 +000010025 ctxt = xmlNewParserCtxt();
10026 if (ctxt == NULL) {
10027 if (xmlDefaultSAXHandler.error != NULL) {
10028 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10029 }
10030 return(NULL);
10031 }
10032
Daniel Veillard9f7b84b2001-08-23 15:31:19 +000010033 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010034 if (inputStream == NULL) {
10035 xmlFreeParserCtxt(ctxt);
10036 return(NULL);
10037 }
10038
Owen Taylor3473f882001-02-23 17:55:21 +000010039 inputPush(ctxt, inputStream);
10040 if ((ctxt->directory == NULL) && (directory == NULL))
10041 directory = xmlParserGetDirectory(filename);
10042 if ((ctxt->directory == NULL) && (directory != NULL))
10043 ctxt->directory = directory;
10044
10045 return(ctxt);
10046}
10047
10048/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010049 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010050 * @sax: the SAX handler block
10051 * @filename: the filename
10052 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10053 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010054 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010055 *
10056 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10057 * compressed document is provided by default if found at compile-time.
10058 * It use the given SAX function block to handle the parsing callback.
10059 * If sax is NULL, fallback to the default DOM tree building routines.
10060 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010061 * User data (void *) is stored within the parser context in the
10062 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010063 *
Owen Taylor3473f882001-02-23 17:55:21 +000010064 * Returns the resulting document tree
10065 */
10066
10067xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010068xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10069 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010070 xmlDocPtr ret;
10071 xmlParserCtxtPtr ctxt;
10072 char *directory = NULL;
10073
Daniel Veillard635ef722001-10-29 11:48:19 +000010074 xmlInitParser();
10075
Owen Taylor3473f882001-02-23 17:55:21 +000010076 ctxt = xmlCreateFileParserCtxt(filename);
10077 if (ctxt == NULL) {
10078 return(NULL);
10079 }
10080 if (sax != NULL) {
10081 if (ctxt->sax != NULL)
10082 xmlFree(ctxt->sax);
10083 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010084 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010085 if (data!=NULL) {
10086 ctxt->_private=data;
10087 }
Owen Taylor3473f882001-02-23 17:55:21 +000010088
10089 if ((ctxt->directory == NULL) && (directory == NULL))
10090 directory = xmlParserGetDirectory(filename);
10091 if ((ctxt->directory == NULL) && (directory != NULL))
10092 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10093
10094 xmlParseDocument(ctxt);
10095
10096 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10097 else {
10098 ret = NULL;
10099 xmlFreeDoc(ctxt->myDoc);
10100 ctxt->myDoc = NULL;
10101 }
10102 if (sax != NULL)
10103 ctxt->sax = NULL;
10104 xmlFreeParserCtxt(ctxt);
10105
10106 return(ret);
10107}
10108
10109/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010110 * xmlSAXParseFile:
10111 * @sax: the SAX handler block
10112 * @filename: the filename
10113 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10114 * documents
10115 *
10116 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10117 * compressed document is provided by default if found at compile-time.
10118 * It use the given SAX function block to handle the parsing callback.
10119 * If sax is NULL, fallback to the default DOM tree building routines.
10120 *
10121 * Returns the resulting document tree
10122 */
10123
10124xmlDocPtr
10125xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10126 int recovery) {
10127 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10128}
10129
10130/**
Owen Taylor3473f882001-02-23 17:55:21 +000010131 * xmlRecoverDoc:
10132 * @cur: a pointer to an array of xmlChar
10133 *
10134 * parse an XML in-memory document and build a tree.
10135 * In the case the document is not Well Formed, a tree is built anyway
10136 *
10137 * Returns the resulting document tree
10138 */
10139
10140xmlDocPtr
10141xmlRecoverDoc(xmlChar *cur) {
10142 return(xmlSAXParseDoc(NULL, cur, 1));
10143}
10144
10145/**
10146 * xmlParseFile:
10147 * @filename: the filename
10148 *
10149 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10150 * compressed document is provided by default if found at compile-time.
10151 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010152 * Returns the resulting document tree if the file was wellformed,
10153 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010154 */
10155
10156xmlDocPtr
10157xmlParseFile(const char *filename) {
10158 return(xmlSAXParseFile(NULL, filename, 0));
10159}
10160
10161/**
10162 * xmlRecoverFile:
10163 * @filename: the filename
10164 *
10165 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10166 * compressed document is provided by default if found at compile-time.
10167 * In the case the document is not Well Formed, a tree is built anyway
10168 *
10169 * Returns the resulting document tree
10170 */
10171
10172xmlDocPtr
10173xmlRecoverFile(const char *filename) {
10174 return(xmlSAXParseFile(NULL, filename, 1));
10175}
10176
10177
10178/**
10179 * xmlSetupParserForBuffer:
10180 * @ctxt: an XML parser context
10181 * @buffer: a xmlChar * buffer
10182 * @filename: a file name
10183 *
10184 * Setup the parser context to parse a new buffer; Clears any prior
10185 * contents from the parser context. The buffer parameter must not be
10186 * NULL, but the filename parameter can be
10187 */
10188void
10189xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10190 const char* filename)
10191{
10192 xmlParserInputPtr input;
10193
10194 input = xmlNewInputStream(ctxt);
10195 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010196 xmlGenericError(xmlGenericErrorContext,
10197 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010198 xmlFree(ctxt);
10199 return;
10200 }
10201
10202 xmlClearParserCtxt(ctxt);
10203 if (filename != NULL)
10204 input->filename = xmlMemStrdup(filename);
10205 input->base = buffer;
10206 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010207 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010208 inputPush(ctxt, input);
10209}
10210
10211/**
10212 * xmlSAXUserParseFile:
10213 * @sax: a SAX handler
10214 * @user_data: The user data returned on SAX callbacks
10215 * @filename: a file name
10216 *
10217 * parse an XML file and call the given SAX handler routines.
10218 * Automatic support for ZLIB/Compress compressed document is provided
10219 *
10220 * Returns 0 in case of success or a error number otherwise
10221 */
10222int
10223xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10224 const char *filename) {
10225 int ret = 0;
10226 xmlParserCtxtPtr ctxt;
10227
10228 ctxt = xmlCreateFileParserCtxt(filename);
10229 if (ctxt == NULL) return -1;
10230 if (ctxt->sax != &xmlDefaultSAXHandler)
10231 xmlFree(ctxt->sax);
10232 ctxt->sax = sax;
10233 if (user_data != NULL)
10234 ctxt->userData = user_data;
10235
10236 xmlParseDocument(ctxt);
10237
10238 if (ctxt->wellFormed)
10239 ret = 0;
10240 else {
10241 if (ctxt->errNo != 0)
10242 ret = ctxt->errNo;
10243 else
10244 ret = -1;
10245 }
10246 if (sax != NULL)
10247 ctxt->sax = NULL;
10248 xmlFreeParserCtxt(ctxt);
10249
10250 return ret;
10251}
10252
10253/************************************************************************
10254 * *
10255 * Front ends when parsing from memory *
10256 * *
10257 ************************************************************************/
10258
10259/**
10260 * xmlCreateMemoryParserCtxt:
10261 * @buffer: a pointer to a char array
10262 * @size: the size of the array
10263 *
10264 * Create a parser context for an XML in-memory document.
10265 *
10266 * Returns the new parser context or NULL
10267 */
10268xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010269xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010270 xmlParserCtxtPtr ctxt;
10271 xmlParserInputPtr input;
10272 xmlParserInputBufferPtr buf;
10273
10274 if (buffer == NULL)
10275 return(NULL);
10276 if (size <= 0)
10277 return(NULL);
10278
10279 ctxt = xmlNewParserCtxt();
10280 if (ctxt == NULL)
10281 return(NULL);
10282
10283 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10284 if (buf == NULL) return(NULL);
10285
10286 input = xmlNewInputStream(ctxt);
10287 if (input == NULL) {
10288 xmlFreeParserCtxt(ctxt);
10289 return(NULL);
10290 }
10291
10292 input->filename = NULL;
10293 input->buf = buf;
10294 input->base = input->buf->buffer->content;
10295 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010296 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010297
10298 inputPush(ctxt, input);
10299 return(ctxt);
10300}
10301
10302/**
10303 * xmlSAXParseMemory:
10304 * @sax: the SAX handler block
10305 * @buffer: an pointer to a char array
10306 * @size: the size of the array
10307 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10308 * documents
10309 *
10310 * parse an XML in-memory block and use the given SAX function block
10311 * to handle the parsing callback. If sax is NULL, fallback to the default
10312 * DOM tree building routines.
10313 *
10314 * Returns the resulting document tree
10315 */
10316xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010317xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10318 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010319 xmlDocPtr ret;
10320 xmlParserCtxtPtr ctxt;
10321
10322 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10323 if (ctxt == NULL) return(NULL);
10324 if (sax != NULL) {
10325 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010326 }
10327
10328 xmlParseDocument(ctxt);
10329
10330 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10331 else {
10332 ret = NULL;
10333 xmlFreeDoc(ctxt->myDoc);
10334 ctxt->myDoc = NULL;
10335 }
10336 if (sax != NULL)
10337 ctxt->sax = NULL;
10338 xmlFreeParserCtxt(ctxt);
10339
10340 return(ret);
10341}
10342
10343/**
10344 * xmlParseMemory:
10345 * @buffer: an pointer to a char array
10346 * @size: the size of the array
10347 *
10348 * parse an XML in-memory block and build a tree.
10349 *
10350 * Returns the resulting document tree
10351 */
10352
Daniel Veillard50822cb2001-07-26 20:05:51 +000010353xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010354 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10355}
10356
10357/**
10358 * xmlRecoverMemory:
10359 * @buffer: an pointer to a char array
10360 * @size: the size of the array
10361 *
10362 * parse an XML in-memory block and build a tree.
10363 * In the case the document is not Well Formed, a tree is built anyway
10364 *
10365 * Returns the resulting document tree
10366 */
10367
Daniel Veillard50822cb2001-07-26 20:05:51 +000010368xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010369 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10370}
10371
10372/**
10373 * xmlSAXUserParseMemory:
10374 * @sax: a SAX handler
10375 * @user_data: The user data returned on SAX callbacks
10376 * @buffer: an in-memory XML document input
10377 * @size: the length of the XML document in bytes
10378 *
10379 * A better SAX parsing routine.
10380 * parse an XML in-memory buffer and call the given SAX handler routines.
10381 *
10382 * Returns 0 in case of success or a error number otherwise
10383 */
10384int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010385 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010386 int ret = 0;
10387 xmlParserCtxtPtr ctxt;
10388 xmlSAXHandlerPtr oldsax = NULL;
10389
Daniel Veillard9e923512002-08-14 08:48:52 +000010390 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010391 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10392 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010393 oldsax = ctxt->sax;
10394 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010395 if (user_data != NULL)
10396 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010397
10398 xmlParseDocument(ctxt);
10399
10400 if (ctxt->wellFormed)
10401 ret = 0;
10402 else {
10403 if (ctxt->errNo != 0)
10404 ret = ctxt->errNo;
10405 else
10406 ret = -1;
10407 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010408 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010409 xmlFreeParserCtxt(ctxt);
10410
10411 return ret;
10412}
10413
10414/**
10415 * xmlCreateDocParserCtxt:
10416 * @cur: a pointer to an array of xmlChar
10417 *
10418 * Creates a parser context for an XML in-memory document.
10419 *
10420 * Returns the new parser context or NULL
10421 */
10422xmlParserCtxtPtr
10423xmlCreateDocParserCtxt(xmlChar *cur) {
10424 int len;
10425
10426 if (cur == NULL)
10427 return(NULL);
10428 len = xmlStrlen(cur);
10429 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10430}
10431
10432/**
10433 * xmlSAXParseDoc:
10434 * @sax: the SAX handler block
10435 * @cur: a pointer to an array of xmlChar
10436 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10437 * documents
10438 *
10439 * parse an XML in-memory document and build a tree.
10440 * It use the given SAX function block to handle the parsing callback.
10441 * If sax is NULL, fallback to the default DOM tree building routines.
10442 *
10443 * Returns the resulting document tree
10444 */
10445
10446xmlDocPtr
10447xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10448 xmlDocPtr ret;
10449 xmlParserCtxtPtr ctxt;
10450
10451 if (cur == NULL) return(NULL);
10452
10453
10454 ctxt = xmlCreateDocParserCtxt(cur);
10455 if (ctxt == NULL) return(NULL);
10456 if (sax != NULL) {
10457 ctxt->sax = sax;
10458 ctxt->userData = NULL;
10459 }
10460
10461 xmlParseDocument(ctxt);
10462 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10463 else {
10464 ret = NULL;
10465 xmlFreeDoc(ctxt->myDoc);
10466 ctxt->myDoc = NULL;
10467 }
10468 if (sax != NULL)
10469 ctxt->sax = NULL;
10470 xmlFreeParserCtxt(ctxt);
10471
10472 return(ret);
10473}
10474
10475/**
10476 * xmlParseDoc:
10477 * @cur: a pointer to an array of xmlChar
10478 *
10479 * parse an XML in-memory document and build a tree.
10480 *
10481 * Returns the resulting document tree
10482 */
10483
10484xmlDocPtr
10485xmlParseDoc(xmlChar *cur) {
10486 return(xmlSAXParseDoc(NULL, cur, 0));
10487}
10488
Daniel Veillard8107a222002-01-13 14:10:10 +000010489/************************************************************************
10490 * *
10491 * Specific function to keep track of entities references *
10492 * and used by the XSLT debugger *
10493 * *
10494 ************************************************************************/
10495
10496static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10497
10498/**
10499 * xmlAddEntityReference:
10500 * @ent : A valid entity
10501 * @firstNode : A valid first node for children of entity
10502 * @lastNode : A valid last node of children entity
10503 *
10504 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10505 */
10506static void
10507xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10508 xmlNodePtr lastNode)
10509{
10510 if (xmlEntityRefFunc != NULL) {
10511 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10512 }
10513}
10514
10515
10516/**
10517 * xmlSetEntityReferenceFunc:
10518 * @func : A valid function
10519 *
10520 * Set the function to call call back when a xml reference has been made
10521 */
10522void
10523xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10524{
10525 xmlEntityRefFunc = func;
10526}
Owen Taylor3473f882001-02-23 17:55:21 +000010527
10528/************************************************************************
10529 * *
10530 * Miscellaneous *
10531 * *
10532 ************************************************************************/
10533
10534#ifdef LIBXML_XPATH_ENABLED
10535#include <libxml/xpath.h>
10536#endif
10537
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010538extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010539static int xmlParserInitialized = 0;
10540
10541/**
10542 * xmlInitParser:
10543 *
10544 * Initialization function for the XML parser.
10545 * This is not reentrant. Call once before processing in case of
10546 * use in multithreaded programs.
10547 */
10548
10549void
10550xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010551 if (xmlParserInitialized != 0)
10552 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010553
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010554 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10555 (xmlGenericError == NULL))
10556 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010557 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010558 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010559 xmlInitCharEncodingHandlers();
10560 xmlInitializePredefinedEntities();
10561 xmlDefaultSAXHandlerInit();
10562 xmlRegisterDefaultInputCallbacks();
10563 xmlRegisterDefaultOutputCallbacks();
10564#ifdef LIBXML_HTML_ENABLED
10565 htmlInitAutoClose();
10566 htmlDefaultSAXHandlerInit();
10567#endif
10568#ifdef LIBXML_XPATH_ENABLED
10569 xmlXPathInit();
10570#endif
10571 xmlParserInitialized = 1;
10572}
10573
10574/**
10575 * xmlCleanupParser:
10576 *
10577 * Cleanup function for the XML parser. It tries to reclaim all
10578 * parsing related global memory allocated for the parser processing.
10579 * It doesn't deallocate any document related memory. Calling this
10580 * function should not prevent reusing the parser.
10581 */
10582
10583void
10584xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010585 xmlCleanupCharEncodingHandlers();
10586 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010587#ifdef LIBXML_CATALOG_ENABLED
10588 xmlCatalogCleanup();
10589#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010590 xmlCleanupThreads();
10591 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010592}