blob: a3f77869569e080cd86c02f4676dfca2d2e5ac76 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Owen Taylor3473f882001-02-23 17:55:21 +0000108/************************************************************************
109 * *
110 * Parser stacks related functions and macros *
111 * *
112 ************************************************************************/
113
114xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
115 const xmlChar ** str);
116
117/*
118 * Generic function for accessing stacks in the Parser Context
119 */
120
121#define PUSH_AND_POP(scope, type, name) \
122scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
123 if (ctxt->name##Nr >= ctxt->name##Max) { \
124 ctxt->name##Max *= 2; \
125 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
126 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
127 if (ctxt->name##Tab == NULL) { \
128 xmlGenericError(xmlGenericErrorContext, \
129 "realloc failed !\n"); \
130 return(0); \
131 } \
132 } \
133 ctxt->name##Tab[ctxt->name##Nr] = value; \
134 ctxt->name = value; \
135 return(ctxt->name##Nr++); \
136} \
137scope type name##Pop(xmlParserCtxtPtr ctxt) { \
138 type ret; \
139 if (ctxt->name##Nr <= 0) return(0); \
140 ctxt->name##Nr--; \
141 if (ctxt->name##Nr > 0) \
142 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
143 else \
144 ctxt->name = NULL; \
145 ret = ctxt->name##Tab[ctxt->name##Nr]; \
146 ctxt->name##Tab[ctxt->name##Nr] = 0; \
147 return(ret); \
148} \
149
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000150/**
151 * inputPop:
152 * @ctxt: an XML parser context
153 *
154 * Pops the top parser input from the input stack
155 *
156 * Returns the input just removed
157 */
158/**
159 * inputPush:
160 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 *
163 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000164 *
165 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000166 */
167/**
168 * namePop:
169 * @ctxt: an XML parser context
170 *
171 * Pops the top element name from the name stack
172 *
173 * Returns the name just removed
174 */
175/**
176 * namePush:
177 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 *
180 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000181 *
182 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 */
184/**
185 * nodePop:
186 * @ctxt: an XML parser context
187 *
188 * Pops the top element node from the node stack
189 *
190 * Returns the node just removed
191 */
192/**
193 * nodePush:
194 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000196 *
197 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000198 *
199 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000200 */
Owen Taylor3473f882001-02-23 17:55:21 +0000201/*
202 * Those macros actually generate the functions
203 */
204PUSH_AND_POP(extern, xmlParserInputPtr, input)
205PUSH_AND_POP(extern, xmlNodePtr, node)
206PUSH_AND_POP(extern, xmlChar*, name)
207
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000208static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000209 if (ctxt->spaceNr >= ctxt->spaceMax) {
210 ctxt->spaceMax *= 2;
211 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
212 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
213 if (ctxt->spaceTab == NULL) {
214 xmlGenericError(xmlGenericErrorContext,
215 "realloc failed !\n");
216 return(0);
217 }
218 }
219 ctxt->spaceTab[ctxt->spaceNr] = val;
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
221 return(ctxt->spaceNr++);
222}
223
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000224static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000225 int ret;
226 if (ctxt->spaceNr <= 0) return(0);
227 ctxt->spaceNr--;
228 if (ctxt->spaceNr > 0)
229 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
230 else
231 ctxt->space = NULL;
232 ret = ctxt->spaceTab[ctxt->spaceNr];
233 ctxt->spaceTab[ctxt->spaceNr] = -1;
234 return(ret);
235}
236
237/*
238 * Macros for accessing the content. Those should be used only by the parser,
239 * and not exported.
240 *
241 * Dirty macros, i.e. one often need to make assumption on the context to
242 * use them
243 *
244 * CUR_PTR return the current pointer to the xmlChar to be parsed.
245 * To be used with extreme caution since operations consuming
246 * characters may move the input buffer to a different location !
247 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
248 * This should be used internally by the parser
249 * only to compare to ASCII values otherwise it would break when
250 * running with UTF-8 encoding.
251 * RAW same as CUR but in the input buffer, bypass any token
252 * extraction that may have been done
253 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
254 * to compare on ASCII based substring.
255 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
256 * strings within the parser.
257 *
258 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
259 *
260 * NEXT Skip to the next character, this does the proper decoding
261 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000262 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000263 * CUR_CHAR(l) returns the current unicode character (int), set l
264 * to the number of xmlChars used for the encoding [0-5].
265 * CUR_SCHAR same but operate on a string instead of the context
266 * COPY_BUF copy the current unicode char to the target buffer, increment
267 * the index
268 * GROW, SHRINK handling of input buffers
269 */
270
Daniel Veillard561b7f82002-03-20 21:55:57 +0000271#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
272#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
Owen Taylor3473f882001-02-23 17:55:21 +0000273#define NXT(val) ctxt->input->cur[(val)]
274#define CUR_PTR ctxt->input->cur
275
276#define SKIP(val) do { \
277 ctxt->nbChars += (val),ctxt->input->cur += (val); \
278 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000279 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
281 xmlPopInput(ctxt); \
282 } while (0)
283
Daniel Veillard46de64e2002-05-29 08:21:33 +0000284#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
285 xmlSHRINK (ctxt);
286
287static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
288 xmlParserInputShrink(ctxt->input);
289 if ((*ctxt->input->cur == 0) &&
290 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
291 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000292 }
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard46de64e2002-05-29 08:21:33 +0000294#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
295 xmlGROW (ctxt);
296
297static void xmlGROW (xmlParserCtxtPtr ctxt) {
298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
299 if ((*ctxt->input->cur == 0) &&
300 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
301 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000302 }
Owen Taylor3473f882001-02-23 17:55:21 +0000303
304#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
305
306#define NEXT xmlNextChar(ctxt)
307
Daniel Veillard21a0f912001-02-25 19:54:14 +0000308#define NEXT1 { \
309 ctxt->input->cur++; \
310 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000311 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000312 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
313 }
314
Owen Taylor3473f882001-02-23 17:55:21 +0000315#define NEXTL(l) do { \
316 if (*(ctxt->input->cur) == '\n') { \
317 ctxt->input->line++; ctxt->input->col = 1; \
318 } else ctxt->input->col++; \
319 ctxt->token = 0; ctxt->input->cur += l; \
320 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000321 } while (0)
322
323#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
324#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
325
326#define COPY_BUF(l,b,i,v) \
327 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000328 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000329
330/**
331 * xmlSkipBlankChars:
332 * @ctxt: the XML parser context
333 *
334 * skip all blanks character found at that point in the input streams.
335 * It pops up finished entities in the process if allowable at that point.
336 *
337 * Returns the number of space chars skipped
338 */
339
340int
341xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000342 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000343
Daniel Veillard02141ea2001-04-30 11:46:40 +0000344 if (ctxt->token != 0) {
345 if (!IS_BLANK(ctxt->token))
346 return(0);
347 ctxt->token = 0;
348 res++;
349 }
Owen Taylor3473f882001-02-23 17:55:21 +0000350 /*
351 * It's Okay to use CUR/NEXT here since all the blanks are on
352 * the ASCII range.
353 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000354 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
355 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000356 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000357 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000358 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000359 cur = ctxt->input->cur;
360 while (IS_BLANK(*cur)) {
361 if (*cur == '\n') {
362 ctxt->input->line++; ctxt->input->col = 1;
363 }
364 cur++;
365 res++;
366 if (*cur == 0) {
367 ctxt->input->cur = cur;
368 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
369 cur = ctxt->input->cur;
370 }
371 }
372 ctxt->input->cur = cur;
373 } else {
374 int cur;
375 do {
376 cur = CUR;
377 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
378 NEXT;
379 cur = CUR;
380 res++;
381 }
382 while ((cur == 0) && (ctxt->inputNr > 1) &&
383 (ctxt->instate != XML_PARSER_COMMENT)) {
384 xmlPopInput(ctxt);
385 cur = CUR;
386 }
387 /*
388 * Need to handle support of entities branching here
389 */
390 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
391 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
392 }
Owen Taylor3473f882001-02-23 17:55:21 +0000393 return(res);
394}
395
396/************************************************************************
397 * *
398 * Commodity functions to handle entities *
399 * *
400 ************************************************************************/
401
402/**
403 * xmlPopInput:
404 * @ctxt: an XML parser context
405 *
406 * xmlPopInput: the current input pointed by ctxt->input came to an end
407 * pop it and return the next char.
408 *
409 * Returns the current xmlChar in the parser context
410 */
411xmlChar
412xmlPopInput(xmlParserCtxtPtr ctxt) {
413 if (ctxt->inputNr == 1) return(0); /* End of main Input */
414 if (xmlParserDebugEntities)
415 xmlGenericError(xmlGenericErrorContext,
416 "Popping input %d\n", ctxt->inputNr);
417 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000418 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000419 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
420 return(xmlPopInput(ctxt));
421 return(CUR);
422}
423
424/**
425 * xmlPushInput:
426 * @ctxt: an XML parser context
427 * @input: an XML parser input fragment (entity, XML fragment ...).
428 *
429 * xmlPushInput: switch to a new input stream which is stacked on top
430 * of the previous one(s).
431 */
432void
433xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
434 if (input == NULL) return;
435
436 if (xmlParserDebugEntities) {
437 if ((ctxt->input != NULL) && (ctxt->input->filename))
438 xmlGenericError(xmlGenericErrorContext,
439 "%s(%d): ", ctxt->input->filename,
440 ctxt->input->line);
441 xmlGenericError(xmlGenericErrorContext,
442 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
443 }
444 inputPush(ctxt, input);
445 GROW;
446}
447
448/**
449 * xmlParseCharRef:
450 * @ctxt: an XML parser context
451 *
452 * parse Reference declarations
453 *
454 * [66] CharRef ::= '&#' [0-9]+ ';' |
455 * '&#x' [0-9a-fA-F]+ ';'
456 *
457 * [ WFC: Legal Character ]
458 * Characters referred to using character references must match the
459 * production for Char.
460 *
461 * Returns the value parsed (as an int), 0 in case of error
462 */
463int
464xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000465 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000466 int count = 0;
467
468 if (ctxt->token != 0) {
469 val = ctxt->token;
470 ctxt->token = 0;
471 return(val);
472 }
473 /*
474 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
475 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000476 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000477 (NXT(2) == 'x')) {
478 SKIP(3);
479 GROW;
480 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000481 if (count++ > 20) {
482 count = 0;
483 GROW;
484 }
485 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000486 val = val * 16 + (CUR - '0');
487 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
488 val = val * 16 + (CUR - 'a') + 10;
489 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
490 val = val * 16 + (CUR - 'A') + 10;
491 else {
492 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
493 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
494 ctxt->sax->error(ctxt->userData,
495 "xmlParseCharRef: invalid hexadecimal value\n");
496 ctxt->wellFormed = 0;
497 ctxt->disableSAX = 1;
498 val = 0;
499 break;
500 }
501 NEXT;
502 count++;
503 }
504 if (RAW == ';') {
505 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
506 ctxt->nbChars ++;
507 ctxt->input->cur++;
508 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000509 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000510 SKIP(2);
511 GROW;
512 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000513 if (count++ > 20) {
514 count = 0;
515 GROW;
516 }
517 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000518 val = val * 10 + (CUR - '0');
519 else {
520 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
522 ctxt->sax->error(ctxt->userData,
523 "xmlParseCharRef: invalid decimal value\n");
524 ctxt->wellFormed = 0;
525 ctxt->disableSAX = 1;
526 val = 0;
527 break;
528 }
529 NEXT;
530 count++;
531 }
532 if (RAW == ';') {
533 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
534 ctxt->nbChars ++;
535 ctxt->input->cur++;
536 }
537 } else {
538 ctxt->errNo = XML_ERR_INVALID_CHARREF;
539 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
540 ctxt->sax->error(ctxt->userData,
541 "xmlParseCharRef: invalid value\n");
542 ctxt->wellFormed = 0;
543 ctxt->disableSAX = 1;
544 }
545
546 /*
547 * [ WFC: Legal Character ]
548 * Characters referred to using character references must match the
549 * production for Char.
550 */
551 if (IS_CHAR(val)) {
552 return(val);
553 } else {
554 ctxt->errNo = XML_ERR_INVALID_CHAR;
555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000556 ctxt->sax->error(ctxt->userData,
557 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000558 val);
559 ctxt->wellFormed = 0;
560 ctxt->disableSAX = 1;
561 }
562 return(0);
563}
564
565/**
566 * xmlParseStringCharRef:
567 * @ctxt: an XML parser context
568 * @str: a pointer to an index in the string
569 *
570 * parse Reference declarations, variant parsing from a string rather
571 * than an an input flow.
572 *
573 * [66] CharRef ::= '&#' [0-9]+ ';' |
574 * '&#x' [0-9a-fA-F]+ ';'
575 *
576 * [ WFC: Legal Character ]
577 * Characters referred to using character references must match the
578 * production for Char.
579 *
580 * Returns the value parsed (as an int), 0 in case of error, str will be
581 * updated to the current value of the index
582 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000583static int
Owen Taylor3473f882001-02-23 17:55:21 +0000584xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
585 const xmlChar *ptr;
586 xmlChar cur;
587 int val = 0;
588
589 if ((str == NULL) || (*str == NULL)) return(0);
590 ptr = *str;
591 cur = *ptr;
592 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
593 ptr += 3;
594 cur = *ptr;
595 while (cur != ';') { /* Non input consuming loop */
596 if ((cur >= '0') && (cur <= '9'))
597 val = val * 16 + (cur - '0');
598 else if ((cur >= 'a') && (cur <= 'f'))
599 val = val * 16 + (cur - 'a') + 10;
600 else if ((cur >= 'A') && (cur <= 'F'))
601 val = val * 16 + (cur - 'A') + 10;
602 else {
603 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
605 ctxt->sax->error(ctxt->userData,
606 "xmlParseStringCharRef: invalid hexadecimal value\n");
607 ctxt->wellFormed = 0;
608 ctxt->disableSAX = 1;
609 val = 0;
610 break;
611 }
612 ptr++;
613 cur = *ptr;
614 }
615 if (cur == ';')
616 ptr++;
617 } else if ((cur == '&') && (ptr[1] == '#')){
618 ptr += 2;
619 cur = *ptr;
620 while (cur != ';') { /* Non input consuming loops */
621 if ((cur >= '0') && (cur <= '9'))
622 val = val * 10 + (cur - '0');
623 else {
624 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
626 ctxt->sax->error(ctxt->userData,
627 "xmlParseStringCharRef: invalid decimal value\n");
628 ctxt->wellFormed = 0;
629 ctxt->disableSAX = 1;
630 val = 0;
631 break;
632 }
633 ptr++;
634 cur = *ptr;
635 }
636 if (cur == ';')
637 ptr++;
638 } else {
639 ctxt->errNo = XML_ERR_INVALID_CHARREF;
640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
641 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000642 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000643 ctxt->wellFormed = 0;
644 ctxt->disableSAX = 1;
645 return(0);
646 }
647 *str = ptr;
648
649 /*
650 * [ WFC: Legal Character ]
651 * Characters referred to using character references must match the
652 * production for Char.
653 */
654 if (IS_CHAR(val)) {
655 return(val);
656 } else {
657 ctxt->errNo = XML_ERR_INVALID_CHAR;
658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
659 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000660 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000661 ctxt->wellFormed = 0;
662 ctxt->disableSAX = 1;
663 }
664 return(0);
665}
666
667/**
668 * xmlParserHandlePEReference:
669 * @ctxt: the parser context
670 *
671 * [69] PEReference ::= '%' Name ';'
672 *
673 * [ WFC: No Recursion ]
674 * A parsed entity must not contain a recursive
675 * reference to itself, either directly or indirectly.
676 *
677 * [ WFC: Entity Declared ]
678 * In a document without any DTD, a document with only an internal DTD
679 * subset which contains no parameter entity references, or a document
680 * with "standalone='yes'", ... ... The declaration of a parameter
681 * entity must precede any reference to it...
682 *
683 * [ VC: Entity Declared ]
684 * In a document with an external subset or external parameter entities
685 * with "standalone='no'", ... ... The declaration of a parameter entity
686 * must precede any reference to it...
687 *
688 * [ WFC: In DTD ]
689 * Parameter-entity references may only appear in the DTD.
690 * NOTE: misleading but this is handled.
691 *
692 * A PEReference may have been detected in the current input stream
693 * the handling is done accordingly to
694 * http://www.w3.org/TR/REC-xml#entproc
695 * i.e.
696 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000697 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000698 */
699void
700xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
701 xmlChar *name;
702 xmlEntityPtr entity = NULL;
703 xmlParserInputPtr input;
704
705 if (ctxt->token != 0) {
706 return;
707 }
708 if (RAW != '%') return;
709 switch(ctxt->instate) {
710 case XML_PARSER_CDATA_SECTION:
711 return;
712 case XML_PARSER_COMMENT:
713 return;
714 case XML_PARSER_START_TAG:
715 return;
716 case XML_PARSER_END_TAG:
717 return;
718 case XML_PARSER_EOF:
719 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
721 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
722 ctxt->wellFormed = 0;
723 ctxt->disableSAX = 1;
724 return;
725 case XML_PARSER_PROLOG:
726 case XML_PARSER_START:
727 case XML_PARSER_MISC:
728 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
730 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
731 ctxt->wellFormed = 0;
732 ctxt->disableSAX = 1;
733 return;
734 case XML_PARSER_ENTITY_DECL:
735 case XML_PARSER_CONTENT:
736 case XML_PARSER_ATTRIBUTE_VALUE:
737 case XML_PARSER_PI:
738 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000739 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000740 /* we just ignore it there */
741 return;
742 case XML_PARSER_EPILOG:
743 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
745 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
746 ctxt->wellFormed = 0;
747 ctxt->disableSAX = 1;
748 return;
749 case XML_PARSER_ENTITY_VALUE:
750 /*
751 * NOTE: in the case of entity values, we don't do the
752 * substitution here since we need the literal
753 * entity value to be able to save the internal
754 * subset of the document.
755 * This will be handled by xmlStringDecodeEntities
756 */
757 return;
758 case XML_PARSER_DTD:
759 /*
760 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
761 * In the internal DTD subset, parameter-entity references
762 * can occur only where markup declarations can occur, not
763 * within markup declarations.
764 * In that case this is handled in xmlParseMarkupDecl
765 */
766 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
767 return;
768 break;
769 case XML_PARSER_IGNORE:
770 return;
771 }
772
773 NEXT;
774 name = xmlParseName(ctxt);
775 if (xmlParserDebugEntities)
776 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000777 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000778 if (name == NULL) {
779 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000781 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000782 ctxt->wellFormed = 0;
783 ctxt->disableSAX = 1;
784 } else {
785 if (RAW == ';') {
786 NEXT;
787 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
788 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
789 if (entity == NULL) {
790
791 /*
792 * [ WFC: Entity Declared ]
793 * In a document without any DTD, a document with only an
794 * internal DTD subset which contains no parameter entity
795 * references, or a document with "standalone='yes'", ...
796 * ... The declaration of a parameter entity must precede
797 * any reference to it...
798 */
799 if ((ctxt->standalone == 1) ||
800 ((ctxt->hasExternalSubset == 0) &&
801 (ctxt->hasPErefs == 0))) {
802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
803 ctxt->sax->error(ctxt->userData,
804 "PEReference: %%%s; not found\n", name);
805 ctxt->wellFormed = 0;
806 ctxt->disableSAX = 1;
807 } else {
808 /*
809 * [ VC: Entity Declared ]
810 * In a document with an external subset or external
811 * parameter entities with "standalone='no'", ...
812 * ... The declaration of a parameter entity must precede
813 * any reference to it...
814 */
815 if ((!ctxt->disableSAX) &&
816 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
817 ctxt->vctxt.error(ctxt->vctxt.userData,
818 "PEReference: %%%s; not found\n", name);
819 } else if ((!ctxt->disableSAX) &&
820 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
821 ctxt->sax->warning(ctxt->userData,
822 "PEReference: %%%s; not found\n", name);
823 ctxt->valid = 0;
824 }
825 } else {
826 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
827 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000828 xmlChar start[4];
829 xmlCharEncoding enc;
830
Owen Taylor3473f882001-02-23 17:55:21 +0000831 /*
832 * handle the extra spaces added before and after
833 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000834 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000835 */
836 input = xmlNewEntityInputStream(ctxt, entity);
837 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000838
839 /*
840 * Get the 4 first bytes and decode the charset
841 * if enc != XML_CHAR_ENCODING_NONE
842 * plug some encoding conversion routines.
843 */
844 GROW
Daniel Veillard561b7f82002-03-20 21:55:57 +0000845 start[0] = RAW;
846 start[1] = NXT(1);
847 start[2] = NXT(2);
848 start[3] = NXT(3);
849 enc = xmlDetectCharEncoding(start, 4);
850 if (enc != XML_CHAR_ENCODING_NONE) {
851 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000852 }
853
Owen Taylor3473f882001-02-23 17:55:21 +0000854 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
855 (RAW == '<') && (NXT(1) == '?') &&
856 (NXT(2) == 'x') && (NXT(3) == 'm') &&
857 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
858 xmlParseTextDecl(ctxt);
859 }
860 if (ctxt->token == 0)
861 ctxt->token = ' ';
862 } else {
863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
864 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000865 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000866 name);
867 ctxt->wellFormed = 0;
868 ctxt->disableSAX = 1;
869 }
870 }
871 } else {
872 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
874 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000875 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000876 ctxt->wellFormed = 0;
877 ctxt->disableSAX = 1;
878 }
879 xmlFree(name);
880 }
881}
882
883/*
884 * Macro used to grow the current buffer.
885 */
886#define growBuffer(buffer) { \
887 buffer##_size *= 2; \
888 buffer = (xmlChar *) \
889 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
890 if (buffer == NULL) { \
891 perror("realloc failed"); \
892 return(NULL); \
893 } \
894}
895
896/**
897 * xmlStringDecodeEntities:
898 * @ctxt: the parser context
899 * @str: the input string
900 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
901 * @end: an end marker xmlChar, 0 if none
902 * @end2: an end marker xmlChar, 0 if none
903 * @end3: an end marker xmlChar, 0 if none
904 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000905 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000906 *
907 * [67] Reference ::= EntityRef | CharRef
908 *
909 * [69] PEReference ::= '%' Name ';'
910 *
911 * Returns A newly allocated string with the substitution done. The caller
912 * must deallocate it !
913 */
914xmlChar *
915xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
916 xmlChar end, xmlChar end2, xmlChar end3) {
917 xmlChar *buffer = NULL;
918 int buffer_size = 0;
919
920 xmlChar *current = NULL;
921 xmlEntityPtr ent;
922 int c,l;
923 int nbchars = 0;
924
925 if (str == NULL)
926 return(NULL);
927
928 if (ctxt->depth > 40) {
929 ctxt->errNo = XML_ERR_ENTITY_LOOP;
930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
931 ctxt->sax->error(ctxt->userData,
932 "Detected entity reference loop\n");
933 ctxt->wellFormed = 0;
934 ctxt->disableSAX = 1;
935 return(NULL);
936 }
937
938 /*
939 * allocate a translation buffer.
940 */
941 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
942 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
943 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000944 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000945 return(NULL);
946 }
947
948 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000949 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000950 * we are operating on already parsed values.
951 */
952 c = CUR_SCHAR(str, l);
953 while ((c != 0) && (c != end) && /* non input consuming loop */
954 (c != end2) && (c != end3)) {
955
956 if (c == 0) break;
957 if ((c == '&') && (str[1] == '#')) {
958 int val = xmlParseStringCharRef(ctxt, &str);
959 if (val != 0) {
960 COPY_BUF(0,buffer,nbchars,val);
961 }
962 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
963 if (xmlParserDebugEntities)
964 xmlGenericError(xmlGenericErrorContext,
965 "String decoding Entity Reference: %.30s\n",
966 str);
967 ent = xmlParseStringEntityRef(ctxt, &str);
968 if ((ent != NULL) &&
969 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
970 if (ent->content != NULL) {
971 COPY_BUF(0,buffer,nbchars,ent->content[0]);
972 } else {
973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
974 ctxt->sax->error(ctxt->userData,
975 "internal error entity has no content\n");
976 }
977 } else if ((ent != NULL) && (ent->content != NULL)) {
978 xmlChar *rep;
979
980 ctxt->depth++;
981 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
982 0, 0, 0);
983 ctxt->depth--;
984 if (rep != NULL) {
985 current = rep;
986 while (*current != 0) { /* non input consuming loop */
987 buffer[nbchars++] = *current++;
988 if (nbchars >
989 buffer_size - XML_PARSER_BUFFER_SIZE) {
990 growBuffer(buffer);
991 }
992 }
993 xmlFree(rep);
994 }
995 } else if (ent != NULL) {
996 int i = xmlStrlen(ent->name);
997 const xmlChar *cur = ent->name;
998
999 buffer[nbchars++] = '&';
1000 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1001 growBuffer(buffer);
1002 }
1003 for (;i > 0;i--)
1004 buffer[nbchars++] = *cur++;
1005 buffer[nbchars++] = ';';
1006 }
1007 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1008 if (xmlParserDebugEntities)
1009 xmlGenericError(xmlGenericErrorContext,
1010 "String decoding PE Reference: %.30s\n", str);
1011 ent = xmlParseStringPEReference(ctxt, &str);
1012 if (ent != NULL) {
1013 xmlChar *rep;
1014
1015 ctxt->depth++;
1016 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1017 0, 0, 0);
1018 ctxt->depth--;
1019 if (rep != NULL) {
1020 current = rep;
1021 while (*current != 0) { /* non input consuming loop */
1022 buffer[nbchars++] = *current++;
1023 if (nbchars >
1024 buffer_size - XML_PARSER_BUFFER_SIZE) {
1025 growBuffer(buffer);
1026 }
1027 }
1028 xmlFree(rep);
1029 }
1030 }
1031 } else {
1032 COPY_BUF(l,buffer,nbchars,c);
1033 str += l;
1034 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1035 growBuffer(buffer);
1036 }
1037 }
1038 c = CUR_SCHAR(str, l);
1039 }
1040 buffer[nbchars++] = 0;
1041 return(buffer);
1042}
1043
1044
1045/************************************************************************
1046 * *
1047 * Commodity functions to handle xmlChars *
1048 * *
1049 ************************************************************************/
1050
1051/**
1052 * xmlStrndup:
1053 * @cur: the input xmlChar *
1054 * @len: the len of @cur
1055 *
1056 * a strndup for array of xmlChar's
1057 *
1058 * Returns a new xmlChar * or NULL
1059 */
1060xmlChar *
1061xmlStrndup(const xmlChar *cur, int len) {
1062 xmlChar *ret;
1063
1064 if ((cur == NULL) || (len < 0)) return(NULL);
1065 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1066 if (ret == NULL) {
1067 xmlGenericError(xmlGenericErrorContext,
1068 "malloc of %ld byte failed\n",
1069 (len + 1) * (long)sizeof(xmlChar));
1070 return(NULL);
1071 }
1072 memcpy(ret, cur, len * sizeof(xmlChar));
1073 ret[len] = 0;
1074 return(ret);
1075}
1076
1077/**
1078 * xmlStrdup:
1079 * @cur: the input xmlChar *
1080 *
1081 * a strdup for array of xmlChar's. Since they are supposed to be
1082 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1083 * a termination mark of '0'.
1084 *
1085 * Returns a new xmlChar * or NULL
1086 */
1087xmlChar *
1088xmlStrdup(const xmlChar *cur) {
1089 const xmlChar *p = cur;
1090
1091 if (cur == NULL) return(NULL);
1092 while (*p != 0) p++; /* non input consuming */
1093 return(xmlStrndup(cur, p - cur));
1094}
1095
1096/**
1097 * xmlCharStrndup:
1098 * @cur: the input char *
1099 * @len: the len of @cur
1100 *
1101 * a strndup for char's to xmlChar's
1102 *
1103 * Returns a new xmlChar * or NULL
1104 */
1105
1106xmlChar *
1107xmlCharStrndup(const char *cur, int len) {
1108 int i;
1109 xmlChar *ret;
1110
1111 if ((cur == NULL) || (len < 0)) return(NULL);
1112 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1113 if (ret == NULL) {
1114 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1115 (len + 1) * (long)sizeof(xmlChar));
1116 return(NULL);
1117 }
1118 for (i = 0;i < len;i++)
1119 ret[i] = (xmlChar) cur[i];
1120 ret[len] = 0;
1121 return(ret);
1122}
1123
1124/**
1125 * xmlCharStrdup:
1126 * @cur: the input char *
1127 * @len: the len of @cur
1128 *
1129 * a strdup for char's to xmlChar's
1130 *
1131 * Returns a new xmlChar * or NULL
1132 */
1133
1134xmlChar *
1135xmlCharStrdup(const char *cur) {
1136 const char *p = cur;
1137
1138 if (cur == NULL) return(NULL);
1139 while (*p != '\0') p++; /* non input consuming */
1140 return(xmlCharStrndup(cur, p - cur));
1141}
1142
1143/**
1144 * xmlStrcmp:
1145 * @str1: the first xmlChar *
1146 * @str2: the second xmlChar *
1147 *
1148 * a strcmp for xmlChar's
1149 *
1150 * Returns the integer result of the comparison
1151 */
1152
1153int
1154xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1155 register int tmp;
1156
1157 if (str1 == str2) return(0);
1158 if (str1 == NULL) return(-1);
1159 if (str2 == NULL) return(1);
1160 do {
1161 tmp = *str1++ - *str2;
1162 if (tmp != 0) return(tmp);
1163 } while (*str2++ != 0);
1164 return 0;
1165}
1166
1167/**
1168 * xmlStrEqual:
1169 * @str1: the first xmlChar *
1170 * @str2: the second xmlChar *
1171 *
1172 * Check if both string are equal of have same content
1173 * Should be a bit more readable and faster than xmlStrEqual()
1174 *
1175 * Returns 1 if they are equal, 0 if they are different
1176 */
1177
1178int
1179xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1180 if (str1 == str2) return(1);
1181 if (str1 == NULL) return(0);
1182 if (str2 == NULL) return(0);
1183 do {
1184 if (*str1++ != *str2) return(0);
1185 } while (*str2++);
1186 return(1);
1187}
1188
1189/**
1190 * xmlStrncmp:
1191 * @str1: the first xmlChar *
1192 * @str2: the second xmlChar *
1193 * @len: the max comparison length
1194 *
1195 * a strncmp for xmlChar's
1196 *
1197 * Returns the integer result of the comparison
1198 */
1199
1200int
1201xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1202 register int tmp;
1203
1204 if (len <= 0) return(0);
1205 if (str1 == str2) return(0);
1206 if (str1 == NULL) return(-1);
1207 if (str2 == NULL) return(1);
1208 do {
1209 tmp = *str1++ - *str2;
1210 if (tmp != 0 || --len == 0) return(tmp);
1211 } while (*str2++ != 0);
1212 return 0;
1213}
1214
Daniel Veillardb44025c2001-10-11 22:55:55 +00001215static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001216 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1217 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1218 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1219 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1220 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1221 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1222 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1223 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1224 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1225 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1226 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1227 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1228 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1229 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1230 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1231 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1232 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1233 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1234 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1235 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1236 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1237 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1238 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1239 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1240 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1241 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1242 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1243 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1244 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1245 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1246 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1247 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1248};
1249
1250/**
1251 * xmlStrcasecmp:
1252 * @str1: the first xmlChar *
1253 * @str2: the second xmlChar *
1254 *
1255 * a strcasecmp for xmlChar's
1256 *
1257 * Returns the integer result of the comparison
1258 */
1259
1260int
1261xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1262 register int tmp;
1263
1264 if (str1 == str2) return(0);
1265 if (str1 == NULL) return(-1);
1266 if (str2 == NULL) return(1);
1267 do {
1268 tmp = casemap[*str1++] - casemap[*str2];
1269 if (tmp != 0) return(tmp);
1270 } while (*str2++ != 0);
1271 return 0;
1272}
1273
1274/**
1275 * xmlStrncasecmp:
1276 * @str1: the first xmlChar *
1277 * @str2: the second xmlChar *
1278 * @len: the max comparison length
1279 *
1280 * a strncasecmp for xmlChar's
1281 *
1282 * Returns the integer result of the comparison
1283 */
1284
1285int
1286xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1287 register int tmp;
1288
1289 if (len <= 0) return(0);
1290 if (str1 == str2) return(0);
1291 if (str1 == NULL) return(-1);
1292 if (str2 == NULL) return(1);
1293 do {
1294 tmp = casemap[*str1++] - casemap[*str2];
1295 if (tmp != 0 || --len == 0) return(tmp);
1296 } while (*str2++ != 0);
1297 return 0;
1298}
1299
1300/**
1301 * xmlStrchr:
1302 * @str: the xmlChar * array
1303 * @val: the xmlChar to search
1304 *
1305 * a strchr for xmlChar's
1306 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001307 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001308 */
1309
1310const xmlChar *
1311xmlStrchr(const xmlChar *str, xmlChar val) {
1312 if (str == NULL) return(NULL);
1313 while (*str != 0) { /* non input consuming */
1314 if (*str == val) return((xmlChar *) str);
1315 str++;
1316 }
1317 return(NULL);
1318}
1319
1320/**
1321 * xmlStrstr:
1322 * @str: the xmlChar * array (haystack)
1323 * @val: the xmlChar to search (needle)
1324 *
1325 * a strstr for xmlChar's
1326 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001327 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001328 */
1329
1330const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001331xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001332 int n;
1333
1334 if (str == NULL) return(NULL);
1335 if (val == NULL) return(NULL);
1336 n = xmlStrlen(val);
1337
1338 if (n == 0) return(str);
1339 while (*str != 0) { /* non input consuming */
1340 if (*str == *val) {
1341 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1342 }
1343 str++;
1344 }
1345 return(NULL);
1346}
1347
1348/**
1349 * xmlStrcasestr:
1350 * @str: the xmlChar * array (haystack)
1351 * @val: the xmlChar to search (needle)
1352 *
1353 * a case-ignoring strstr for xmlChar's
1354 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001355 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001356 */
1357
1358const xmlChar *
1359xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1360 int n;
1361
1362 if (str == NULL) return(NULL);
1363 if (val == NULL) return(NULL);
1364 n = xmlStrlen(val);
1365
1366 if (n == 0) return(str);
1367 while (*str != 0) { /* non input consuming */
1368 if (casemap[*str] == casemap[*val])
1369 if (!xmlStrncasecmp(str, val, n)) return(str);
1370 str++;
1371 }
1372 return(NULL);
1373}
1374
1375/**
1376 * xmlStrsub:
1377 * @str: the xmlChar * array (haystack)
1378 * @start: the index of the first char (zero based)
1379 * @len: the length of the substring
1380 *
1381 * Extract a substring of a given string
1382 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001383 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001384 */
1385
1386xmlChar *
1387xmlStrsub(const xmlChar *str, int start, int len) {
1388 int i;
1389
1390 if (str == NULL) return(NULL);
1391 if (start < 0) return(NULL);
1392 if (len < 0) return(NULL);
1393
1394 for (i = 0;i < start;i++) {
1395 if (*str == 0) return(NULL);
1396 str++;
1397 }
1398 if (*str == 0) return(NULL);
1399 return(xmlStrndup(str, len));
1400}
1401
1402/**
1403 * xmlStrlen:
1404 * @str: the xmlChar * array
1405 *
1406 * length of a xmlChar's string
1407 *
1408 * Returns the number of xmlChar contained in the ARRAY.
1409 */
1410
1411int
1412xmlStrlen(const xmlChar *str) {
1413 int len = 0;
1414
1415 if (str == NULL) return(0);
1416 while (*str != 0) { /* non input consuming */
1417 str++;
1418 len++;
1419 }
1420 return(len);
1421}
1422
1423/**
1424 * xmlStrncat:
1425 * @cur: the original xmlChar * array
1426 * @add: the xmlChar * array added
1427 * @len: the length of @add
1428 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001429 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001430 * first bytes of @add.
1431 *
1432 * Returns a new xmlChar *, the original @cur is reallocated if needed
1433 * and should not be freed
1434 */
1435
1436xmlChar *
1437xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1438 int size;
1439 xmlChar *ret;
1440
1441 if ((add == NULL) || (len == 0))
1442 return(cur);
1443 if (cur == NULL)
1444 return(xmlStrndup(add, len));
1445
1446 size = xmlStrlen(cur);
1447 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1448 if (ret == NULL) {
1449 xmlGenericError(xmlGenericErrorContext,
1450 "xmlStrncat: realloc of %ld byte failed\n",
1451 (size + len + 1) * (long)sizeof(xmlChar));
1452 return(cur);
1453 }
1454 memcpy(&ret[size], add, len * sizeof(xmlChar));
1455 ret[size + len] = 0;
1456 return(ret);
1457}
1458
1459/**
1460 * xmlStrcat:
1461 * @cur: the original xmlChar * array
1462 * @add: the xmlChar * array added
1463 *
1464 * a strcat for array of xmlChar's. Since they are supposed to be
1465 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1466 * a termination mark of '0'.
1467 *
1468 * Returns a new xmlChar * containing the concatenated string.
1469 */
1470xmlChar *
1471xmlStrcat(xmlChar *cur, const xmlChar *add) {
1472 const xmlChar *p = add;
1473
1474 if (add == NULL) return(cur);
1475 if (cur == NULL)
1476 return(xmlStrdup(add));
1477
1478 while (*p != 0) p++; /* non input consuming */
1479 return(xmlStrncat(cur, add, p - add));
1480}
1481
1482/************************************************************************
1483 * *
1484 * Commodity functions, cleanup needed ? *
1485 * *
1486 ************************************************************************/
1487
1488/**
1489 * areBlanks:
1490 * @ctxt: an XML parser context
1491 * @str: a xmlChar *
1492 * @len: the size of @str
1493 *
1494 * Is this a sequence of blank chars that one can ignore ?
1495 *
1496 * Returns 1 if ignorable 0 otherwise.
1497 */
1498
1499static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1500 int i, ret;
1501 xmlNodePtr lastChild;
1502
Daniel Veillard05c13a22001-09-09 08:38:09 +00001503 /*
1504 * Don't spend time trying to differentiate them, the same callback is
1505 * used !
1506 */
1507 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001508 return(0);
1509
Owen Taylor3473f882001-02-23 17:55:21 +00001510 /*
1511 * Check for xml:space value.
1512 */
1513 if (*(ctxt->space) == 1)
1514 return(0);
1515
1516 /*
1517 * Check that the string is made of blanks
1518 */
1519 for (i = 0;i < len;i++)
1520 if (!(IS_BLANK(str[i]))) return(0);
1521
1522 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001523 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001524 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001525 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001526 if (ctxt->myDoc != NULL) {
1527 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1528 if (ret == 0) return(1);
1529 if (ret == 1) return(0);
1530 }
1531
1532 /*
1533 * Otherwise, heuristic :-\
1534 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001535 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001536 if ((ctxt->node->children == NULL) &&
1537 (RAW == '<') && (NXT(1) == '/')) return(0);
1538
1539 lastChild = xmlGetLastChild(ctxt->node);
1540 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001541 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1542 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001543 } else if (xmlNodeIsText(lastChild))
1544 return(0);
1545 else if ((ctxt->node->children != NULL) &&
1546 (xmlNodeIsText(ctxt->node->children)))
1547 return(0);
1548 return(1);
1549}
1550
Owen Taylor3473f882001-02-23 17:55:21 +00001551/************************************************************************
1552 * *
1553 * Extra stuff for namespace support *
1554 * Relates to http://www.w3.org/TR/WD-xml-names *
1555 * *
1556 ************************************************************************/
1557
1558/**
1559 * xmlSplitQName:
1560 * @ctxt: an XML parser context
1561 * @name: an XML parser context
1562 * @prefix: a xmlChar **
1563 *
1564 * parse an UTF8 encoded XML qualified name string
1565 *
1566 * [NS 5] QName ::= (Prefix ':')? LocalPart
1567 *
1568 * [NS 6] Prefix ::= NCName
1569 *
1570 * [NS 7] LocalPart ::= NCName
1571 *
1572 * Returns the local part, and prefix is updated
1573 * to get the Prefix if any.
1574 */
1575
1576xmlChar *
1577xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1578 xmlChar buf[XML_MAX_NAMELEN + 5];
1579 xmlChar *buffer = NULL;
1580 int len = 0;
1581 int max = XML_MAX_NAMELEN;
1582 xmlChar *ret = NULL;
1583 const xmlChar *cur = name;
1584 int c;
1585
1586 *prefix = NULL;
1587
1588#ifndef XML_XML_NAMESPACE
1589 /* xml: prefix is not really a namespace */
1590 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1591 (cur[2] == 'l') && (cur[3] == ':'))
1592 return(xmlStrdup(name));
1593#endif
1594
1595 /* nasty but valid */
1596 if (cur[0] == ':')
1597 return(xmlStrdup(name));
1598
1599 c = *cur++;
1600 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1601 buf[len++] = c;
1602 c = *cur++;
1603 }
1604 if (len >= max) {
1605 /*
1606 * Okay someone managed to make a huge name, so he's ready to pay
1607 * for the processing speed.
1608 */
1609 max = len * 2;
1610
1611 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1612 if (buffer == NULL) {
1613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1614 ctxt->sax->error(ctxt->userData,
1615 "xmlSplitQName: out of memory\n");
1616 return(NULL);
1617 }
1618 memcpy(buffer, buf, len);
1619 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1620 if (len + 10 > max) {
1621 max *= 2;
1622 buffer = (xmlChar *) xmlRealloc(buffer,
1623 max * sizeof(xmlChar));
1624 if (buffer == NULL) {
1625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1626 ctxt->sax->error(ctxt->userData,
1627 "xmlSplitQName: out of memory\n");
1628 return(NULL);
1629 }
1630 }
1631 buffer[len++] = c;
1632 c = *cur++;
1633 }
1634 buffer[len] = 0;
1635 }
1636
1637 if (buffer == NULL)
1638 ret = xmlStrndup(buf, len);
1639 else {
1640 ret = buffer;
1641 buffer = NULL;
1642 max = XML_MAX_NAMELEN;
1643 }
1644
1645
1646 if (c == ':') {
1647 c = *cur++;
1648 if (c == 0) return(ret);
1649 *prefix = ret;
1650 len = 0;
1651
1652 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1653 buf[len++] = c;
1654 c = *cur++;
1655 }
1656 if (len >= max) {
1657 /*
1658 * Okay someone managed to make a huge name, so he's ready to pay
1659 * for the processing speed.
1660 */
1661 max = len * 2;
1662
1663 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1664 if (buffer == NULL) {
1665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1666 ctxt->sax->error(ctxt->userData,
1667 "xmlSplitQName: out of memory\n");
1668 return(NULL);
1669 }
1670 memcpy(buffer, buf, len);
1671 while (c != 0) { /* tested bigname2.xml */
1672 if (len + 10 > max) {
1673 max *= 2;
1674 buffer = (xmlChar *) xmlRealloc(buffer,
1675 max * sizeof(xmlChar));
1676 if (buffer == NULL) {
1677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1678 ctxt->sax->error(ctxt->userData,
1679 "xmlSplitQName: out of memory\n");
1680 return(NULL);
1681 }
1682 }
1683 buffer[len++] = c;
1684 c = *cur++;
1685 }
1686 buffer[len] = 0;
1687 }
1688
1689 if (buffer == NULL)
1690 ret = xmlStrndup(buf, len);
1691 else {
1692 ret = buffer;
1693 }
1694 }
1695
1696 return(ret);
1697}
1698
1699/************************************************************************
1700 * *
1701 * The parser itself *
1702 * Relates to http://www.w3.org/TR/REC-xml *
1703 * *
1704 ************************************************************************/
1705
Daniel Veillard76d66f42001-05-16 21:05:17 +00001706static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001707/**
1708 * xmlParseName:
1709 * @ctxt: an XML parser context
1710 *
1711 * parse an XML name.
1712 *
1713 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1714 * CombiningChar | Extender
1715 *
1716 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1717 *
1718 * [6] Names ::= Name (S Name)*
1719 *
1720 * Returns the Name parsed or NULL
1721 */
1722
1723xmlChar *
1724xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001725 const xmlChar *in;
1726 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001727 int count = 0;
1728
1729 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001730
1731 /*
1732 * Accelerator for simple ASCII names
1733 */
1734 in = ctxt->input->cur;
1735 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1736 ((*in >= 0x41) && (*in <= 0x5A)) ||
1737 (*in == '_') || (*in == ':')) {
1738 in++;
1739 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1740 ((*in >= 0x41) && (*in <= 0x5A)) ||
1741 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001742 (*in == '_') || (*in == '-') ||
1743 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001744 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001745 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001746 count = in - ctxt->input->cur;
1747 ret = xmlStrndup(ctxt->input->cur, count);
1748 ctxt->input->cur = in;
1749 return(ret);
1750 }
1751 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001752 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001753}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001754
Daniel Veillard46de64e2002-05-29 08:21:33 +00001755/**
1756 * xmlParseNameAndCompare:
1757 * @ctxt: an XML parser context
1758 *
1759 * parse an XML name and compares for match
1760 * (specialized for endtag parsing)
1761 *
1762 *
1763 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1764 * and the name for mismatch
1765 */
1766
1767xmlChar *
1768xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1769 const xmlChar *cmp = other;
1770 const xmlChar *in;
1771 xmlChar *ret;
1772 int count = 0;
1773
1774 GROW;
1775
1776 in = ctxt->input->cur;
1777 while (*in != 0 && *in == *cmp) {
1778 ++in;
1779 ++cmp;
1780 }
1781 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1782 /* success */
1783 ctxt->input->cur = in;
1784 return (xmlChar*) 1;
1785 }
1786 /* failure (or end of input buffer), check with full function */
1787 ret = xmlParseName (ctxt);
1788 if (ret != 0 && xmlStrEqual (ret, other)) {
1789 xmlFree (ret);
1790 return (xmlChar*) 1;
1791 }
1792 return ret;
1793}
1794
Daniel Veillard76d66f42001-05-16 21:05:17 +00001795static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001796xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1797 xmlChar buf[XML_MAX_NAMELEN + 5];
1798 int len = 0, l;
1799 int c;
1800 int count = 0;
1801
1802 /*
1803 * Handler for more complex cases
1804 */
1805 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001806 c = CUR_CHAR(l);
1807 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1808 (!IS_LETTER(c) && (c != '_') &&
1809 (c != ':'))) {
1810 return(NULL);
1811 }
1812
1813 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1814 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1815 (c == '.') || (c == '-') ||
1816 (c == '_') || (c == ':') ||
1817 (IS_COMBINING(c)) ||
1818 (IS_EXTENDER(c)))) {
1819 if (count++ > 100) {
1820 count = 0;
1821 GROW;
1822 }
1823 COPY_BUF(l,buf,len,c);
1824 NEXTL(l);
1825 c = CUR_CHAR(l);
1826 if (len >= XML_MAX_NAMELEN) {
1827 /*
1828 * Okay someone managed to make a huge name, so he's ready to pay
1829 * for the processing speed.
1830 */
1831 xmlChar *buffer;
1832 int max = len * 2;
1833
1834 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1835 if (buffer == NULL) {
1836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1837 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001838 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001839 return(NULL);
1840 }
1841 memcpy(buffer, buf, len);
1842 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1843 (c == '.') || (c == '-') ||
1844 (c == '_') || (c == ':') ||
1845 (IS_COMBINING(c)) ||
1846 (IS_EXTENDER(c))) {
1847 if (count++ > 100) {
1848 count = 0;
1849 GROW;
1850 }
1851 if (len + 10 > max) {
1852 max *= 2;
1853 buffer = (xmlChar *) xmlRealloc(buffer,
1854 max * sizeof(xmlChar));
1855 if (buffer == NULL) {
1856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1857 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001858 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001859 return(NULL);
1860 }
1861 }
1862 COPY_BUF(l,buffer,len,c);
1863 NEXTL(l);
1864 c = CUR_CHAR(l);
1865 }
1866 buffer[len] = 0;
1867 return(buffer);
1868 }
1869 }
1870 return(xmlStrndup(buf, len));
1871}
1872
1873/**
1874 * xmlParseStringName:
1875 * @ctxt: an XML parser context
1876 * @str: a pointer to the string pointer (IN/OUT)
1877 *
1878 * parse an XML name.
1879 *
1880 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1881 * CombiningChar | Extender
1882 *
1883 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1884 *
1885 * [6] Names ::= Name (S Name)*
1886 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001887 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001888 * is updated to the current location in the string.
1889 */
1890
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001891static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001892xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1893 xmlChar buf[XML_MAX_NAMELEN + 5];
1894 const xmlChar *cur = *str;
1895 int len = 0, l;
1896 int c;
1897
1898 c = CUR_SCHAR(cur, l);
1899 if (!IS_LETTER(c) && (c != '_') &&
1900 (c != ':')) {
1901 return(NULL);
1902 }
1903
1904 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1905 (c == '.') || (c == '-') ||
1906 (c == '_') || (c == ':') ||
1907 (IS_COMBINING(c)) ||
1908 (IS_EXTENDER(c))) {
1909 COPY_BUF(l,buf,len,c);
1910 cur += l;
1911 c = CUR_SCHAR(cur, l);
1912 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1913 /*
1914 * Okay someone managed to make a huge name, so he's ready to pay
1915 * for the processing speed.
1916 */
1917 xmlChar *buffer;
1918 int max = len * 2;
1919
1920 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1921 if (buffer == NULL) {
1922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1923 ctxt->sax->error(ctxt->userData,
1924 "xmlParseStringName: out of memory\n");
1925 return(NULL);
1926 }
1927 memcpy(buffer, buf, len);
1928 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1929 (c == '.') || (c == '-') ||
1930 (c == '_') || (c == ':') ||
1931 (IS_COMBINING(c)) ||
1932 (IS_EXTENDER(c))) {
1933 if (len + 10 > max) {
1934 max *= 2;
1935 buffer = (xmlChar *) xmlRealloc(buffer,
1936 max * sizeof(xmlChar));
1937 if (buffer == NULL) {
1938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1939 ctxt->sax->error(ctxt->userData,
1940 "xmlParseStringName: out of memory\n");
1941 return(NULL);
1942 }
1943 }
1944 COPY_BUF(l,buffer,len,c);
1945 cur += l;
1946 c = CUR_SCHAR(cur, l);
1947 }
1948 buffer[len] = 0;
1949 *str = cur;
1950 return(buffer);
1951 }
1952 }
1953 *str = cur;
1954 return(xmlStrndup(buf, len));
1955}
1956
1957/**
1958 * xmlParseNmtoken:
1959 * @ctxt: an XML parser context
1960 *
1961 * parse an XML Nmtoken.
1962 *
1963 * [7] Nmtoken ::= (NameChar)+
1964 *
1965 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1966 *
1967 * Returns the Nmtoken parsed or NULL
1968 */
1969
1970xmlChar *
1971xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1972 xmlChar buf[XML_MAX_NAMELEN + 5];
1973 int len = 0, l;
1974 int c;
1975 int count = 0;
1976
1977 GROW;
1978 c = CUR_CHAR(l);
1979
1980 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1981 (c == '.') || (c == '-') ||
1982 (c == '_') || (c == ':') ||
1983 (IS_COMBINING(c)) ||
1984 (IS_EXTENDER(c))) {
1985 if (count++ > 100) {
1986 count = 0;
1987 GROW;
1988 }
1989 COPY_BUF(l,buf,len,c);
1990 NEXTL(l);
1991 c = CUR_CHAR(l);
1992 if (len >= XML_MAX_NAMELEN) {
1993 /*
1994 * Okay someone managed to make a huge token, so he's ready to pay
1995 * for the processing speed.
1996 */
1997 xmlChar *buffer;
1998 int max = len * 2;
1999
2000 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2001 if (buffer == NULL) {
2002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2003 ctxt->sax->error(ctxt->userData,
2004 "xmlParseNmtoken: out of memory\n");
2005 return(NULL);
2006 }
2007 memcpy(buffer, buf, len);
2008 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2009 (c == '.') || (c == '-') ||
2010 (c == '_') || (c == ':') ||
2011 (IS_COMBINING(c)) ||
2012 (IS_EXTENDER(c))) {
2013 if (count++ > 100) {
2014 count = 0;
2015 GROW;
2016 }
2017 if (len + 10 > max) {
2018 max *= 2;
2019 buffer = (xmlChar *) xmlRealloc(buffer,
2020 max * sizeof(xmlChar));
2021 if (buffer == NULL) {
2022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2023 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002024 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002025 return(NULL);
2026 }
2027 }
2028 COPY_BUF(l,buffer,len,c);
2029 NEXTL(l);
2030 c = CUR_CHAR(l);
2031 }
2032 buffer[len] = 0;
2033 return(buffer);
2034 }
2035 }
2036 if (len == 0)
2037 return(NULL);
2038 return(xmlStrndup(buf, len));
2039}
2040
2041/**
2042 * xmlParseEntityValue:
2043 * @ctxt: an XML parser context
2044 * @orig: if non-NULL store a copy of the original entity value
2045 *
2046 * parse a value for ENTITY declarations
2047 *
2048 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2049 * "'" ([^%&'] | PEReference | Reference)* "'"
2050 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002051 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002052 */
2053
2054xmlChar *
2055xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2056 xmlChar *buf = NULL;
2057 int len = 0;
2058 int size = XML_PARSER_BUFFER_SIZE;
2059 int c, l;
2060 xmlChar stop;
2061 xmlChar *ret = NULL;
2062 const xmlChar *cur = NULL;
2063 xmlParserInputPtr input;
2064
2065 if (RAW == '"') stop = '"';
2066 else if (RAW == '\'') stop = '\'';
2067 else {
2068 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2070 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2071 ctxt->wellFormed = 0;
2072 ctxt->disableSAX = 1;
2073 return(NULL);
2074 }
2075 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2076 if (buf == NULL) {
2077 xmlGenericError(xmlGenericErrorContext,
2078 "malloc of %d byte failed\n", size);
2079 return(NULL);
2080 }
2081
2082 /*
2083 * The content of the entity definition is copied in a buffer.
2084 */
2085
2086 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2087 input = ctxt->input;
2088 GROW;
2089 NEXT;
2090 c = CUR_CHAR(l);
2091 /*
2092 * NOTE: 4.4.5 Included in Literal
2093 * When a parameter entity reference appears in a literal entity
2094 * value, ... a single or double quote character in the replacement
2095 * text is always treated as a normal data character and will not
2096 * terminate the literal.
2097 * In practice it means we stop the loop only when back at parsing
2098 * the initial entity and the quote is found
2099 */
2100 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2101 (ctxt->input != input))) {
2102 if (len + 5 >= size) {
2103 size *= 2;
2104 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2105 if (buf == NULL) {
2106 xmlGenericError(xmlGenericErrorContext,
2107 "realloc of %d byte failed\n", size);
2108 return(NULL);
2109 }
2110 }
2111 COPY_BUF(l,buf,len,c);
2112 NEXTL(l);
2113 /*
2114 * Pop-up of finished entities.
2115 */
2116 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2117 xmlPopInput(ctxt);
2118
2119 GROW;
2120 c = CUR_CHAR(l);
2121 if (c == 0) {
2122 GROW;
2123 c = CUR_CHAR(l);
2124 }
2125 }
2126 buf[len] = 0;
2127
2128 /*
2129 * Raise problem w.r.t. '&' and '%' being used in non-entities
2130 * reference constructs. Note Charref will be handled in
2131 * xmlStringDecodeEntities()
2132 */
2133 cur = buf;
2134 while (*cur != 0) { /* non input consuming */
2135 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2136 xmlChar *name;
2137 xmlChar tmp = *cur;
2138
2139 cur++;
2140 name = xmlParseStringName(ctxt, &cur);
2141 if ((name == NULL) || (*cur != ';')) {
2142 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2144 ctxt->sax->error(ctxt->userData,
2145 "EntityValue: '%c' forbidden except for entities references\n",
2146 tmp);
2147 ctxt->wellFormed = 0;
2148 ctxt->disableSAX = 1;
2149 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002150 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2151 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002152 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2154 ctxt->sax->error(ctxt->userData,
2155 "EntityValue: PEReferences forbidden in internal subset\n",
2156 tmp);
2157 ctxt->wellFormed = 0;
2158 ctxt->disableSAX = 1;
2159 }
2160 if (name != NULL)
2161 xmlFree(name);
2162 }
2163 cur++;
2164 }
2165
2166 /*
2167 * Then PEReference entities are substituted.
2168 */
2169 if (c != stop) {
2170 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2172 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2173 ctxt->wellFormed = 0;
2174 ctxt->disableSAX = 1;
2175 xmlFree(buf);
2176 } else {
2177 NEXT;
2178 /*
2179 * NOTE: 4.4.7 Bypassed
2180 * When a general entity reference appears in the EntityValue in
2181 * an entity declaration, it is bypassed and left as is.
2182 * so XML_SUBSTITUTE_REF is not set here.
2183 */
2184 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2185 0, 0, 0);
2186 if (orig != NULL)
2187 *orig = buf;
2188 else
2189 xmlFree(buf);
2190 }
2191
2192 return(ret);
2193}
2194
2195/**
2196 * xmlParseAttValue:
2197 * @ctxt: an XML parser context
2198 *
2199 * parse a value for an attribute
2200 * Note: the parser won't do substitution of entities here, this
2201 * will be handled later in xmlStringGetNodeList
2202 *
2203 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2204 * "'" ([^<&'] | Reference)* "'"
2205 *
2206 * 3.3.3 Attribute-Value Normalization:
2207 * Before the value of an attribute is passed to the application or
2208 * checked for validity, the XML processor must normalize it as follows:
2209 * - a character reference is processed by appending the referenced
2210 * character to the attribute value
2211 * - an entity reference is processed by recursively processing the
2212 * replacement text of the entity
2213 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2214 * appending #x20 to the normalized value, except that only a single
2215 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2216 * parsed entity or the literal entity value of an internal parsed entity
2217 * - other characters are processed by appending them to the normalized value
2218 * If the declared value is not CDATA, then the XML processor must further
2219 * process the normalized attribute value by discarding any leading and
2220 * trailing space (#x20) characters, and by replacing sequences of space
2221 * (#x20) characters by a single space (#x20) character.
2222 * All attributes for which no declaration has been read should be treated
2223 * by a non-validating parser as if declared CDATA.
2224 *
2225 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2226 */
2227
2228xmlChar *
2229xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2230 xmlChar limit = 0;
2231 xmlChar *buf = NULL;
2232 int len = 0;
2233 int buf_size = 0;
2234 int c, l;
2235 xmlChar *current = NULL;
2236 xmlEntityPtr ent;
2237
2238
2239 SHRINK;
2240 if (NXT(0) == '"') {
2241 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2242 limit = '"';
2243 NEXT;
2244 } else if (NXT(0) == '\'') {
2245 limit = '\'';
2246 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2247 NEXT;
2248 } else {
2249 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2251 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2252 ctxt->wellFormed = 0;
2253 ctxt->disableSAX = 1;
2254 return(NULL);
2255 }
2256
2257 /*
2258 * allocate a translation buffer.
2259 */
2260 buf_size = XML_PARSER_BUFFER_SIZE;
2261 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2262 if (buf == NULL) {
2263 perror("xmlParseAttValue: malloc failed");
2264 return(NULL);
2265 }
2266
2267 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002268 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002269 */
2270 c = CUR_CHAR(l);
2271 while (((NXT(0) != limit) && /* checked */
2272 (c != '<')) || (ctxt->token != 0)) {
2273 if (c == 0) break;
2274 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002275 if (ctxt->replaceEntities) {
2276 if (len > buf_size - 10) {
2277 growBuffer(buf);
2278 }
2279 buf[len++] = '&';
2280 } else {
2281 /*
2282 * The reparsing will be done in xmlStringGetNodeList()
2283 * called by the attribute() function in SAX.c
2284 */
2285 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002286
Daniel Veillard319a7422001-09-11 09:27:09 +00002287 if (len > buf_size - 10) {
2288 growBuffer(buf);
2289 }
2290 current = &buffer[0];
2291 while (*current != 0) { /* non input consuming */
2292 buf[len++] = *current++;
2293 }
2294 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002295 }
Owen Taylor3473f882001-02-23 17:55:21 +00002296 } else if (c == '&') {
2297 if (NXT(1) == '#') {
2298 int val = xmlParseCharRef(ctxt);
2299 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002300 if (ctxt->replaceEntities) {
2301 if (len > buf_size - 10) {
2302 growBuffer(buf);
2303 }
2304 buf[len++] = '&';
2305 } else {
2306 /*
2307 * The reparsing will be done in xmlStringGetNodeList()
2308 * called by the attribute() function in SAX.c
2309 */
2310 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002311
Daniel Veillard319a7422001-09-11 09:27:09 +00002312 if (len > buf_size - 10) {
2313 growBuffer(buf);
2314 }
2315 current = &buffer[0];
2316 while (*current != 0) { /* non input consuming */
2317 buf[len++] = *current++;
2318 }
Owen Taylor3473f882001-02-23 17:55:21 +00002319 }
2320 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002321 if (len > buf_size - 10) {
2322 growBuffer(buf);
2323 }
Owen Taylor3473f882001-02-23 17:55:21 +00002324 len += xmlCopyChar(0, &buf[len], val);
2325 }
2326 } else {
2327 ent = xmlParseEntityRef(ctxt);
2328 if ((ent != NULL) &&
2329 (ctxt->replaceEntities != 0)) {
2330 xmlChar *rep;
2331
2332 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2333 rep = xmlStringDecodeEntities(ctxt, ent->content,
2334 XML_SUBSTITUTE_REF, 0, 0, 0);
2335 if (rep != NULL) {
2336 current = rep;
2337 while (*current != 0) { /* non input consuming */
2338 buf[len++] = *current++;
2339 if (len > buf_size - 10) {
2340 growBuffer(buf);
2341 }
2342 }
2343 xmlFree(rep);
2344 }
2345 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002346 if (len > buf_size - 10) {
2347 growBuffer(buf);
2348 }
Owen Taylor3473f882001-02-23 17:55:21 +00002349 if (ent->content != NULL)
2350 buf[len++] = ent->content[0];
2351 }
2352 } else if (ent != NULL) {
2353 int i = xmlStrlen(ent->name);
2354 const xmlChar *cur = ent->name;
2355
2356 /*
2357 * This may look absurd but is needed to detect
2358 * entities problems
2359 */
2360 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2361 (ent->content != NULL)) {
2362 xmlChar *rep;
2363 rep = xmlStringDecodeEntities(ctxt, ent->content,
2364 XML_SUBSTITUTE_REF, 0, 0, 0);
2365 if (rep != NULL)
2366 xmlFree(rep);
2367 }
2368
2369 /*
2370 * Just output the reference
2371 */
2372 buf[len++] = '&';
2373 if (len > buf_size - i - 10) {
2374 growBuffer(buf);
2375 }
2376 for (;i > 0;i--)
2377 buf[len++] = *cur++;
2378 buf[len++] = ';';
2379 }
2380 }
2381 } else {
2382 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2383 COPY_BUF(l,buf,len,0x20);
2384 if (len > buf_size - 10) {
2385 growBuffer(buf);
2386 }
2387 } else {
2388 COPY_BUF(l,buf,len,c);
2389 if (len > buf_size - 10) {
2390 growBuffer(buf);
2391 }
2392 }
2393 NEXTL(l);
2394 }
2395 GROW;
2396 c = CUR_CHAR(l);
2397 }
2398 buf[len++] = 0;
2399 if (RAW == '<') {
2400 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2402 ctxt->sax->error(ctxt->userData,
2403 "Unescaped '<' not allowed in attributes values\n");
2404 ctxt->wellFormed = 0;
2405 ctxt->disableSAX = 1;
2406 } else if (RAW != limit) {
2407 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2409 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2410 ctxt->wellFormed = 0;
2411 ctxt->disableSAX = 1;
2412 } else
2413 NEXT;
2414 return(buf);
2415}
2416
2417/**
2418 * xmlParseSystemLiteral:
2419 * @ctxt: an XML parser context
2420 *
2421 * parse an XML Literal
2422 *
2423 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2424 *
2425 * Returns the SystemLiteral parsed or NULL
2426 */
2427
2428xmlChar *
2429xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2430 xmlChar *buf = NULL;
2431 int len = 0;
2432 int size = XML_PARSER_BUFFER_SIZE;
2433 int cur, l;
2434 xmlChar stop;
2435 int state = ctxt->instate;
2436 int count = 0;
2437
2438 SHRINK;
2439 if (RAW == '"') {
2440 NEXT;
2441 stop = '"';
2442 } else if (RAW == '\'') {
2443 NEXT;
2444 stop = '\'';
2445 } else {
2446 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2448 ctxt->sax->error(ctxt->userData,
2449 "SystemLiteral \" or ' expected\n");
2450 ctxt->wellFormed = 0;
2451 ctxt->disableSAX = 1;
2452 return(NULL);
2453 }
2454
2455 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2456 if (buf == NULL) {
2457 xmlGenericError(xmlGenericErrorContext,
2458 "malloc of %d byte failed\n", size);
2459 return(NULL);
2460 }
2461 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2462 cur = CUR_CHAR(l);
2463 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2464 if (len + 5 >= size) {
2465 size *= 2;
2466 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2467 if (buf == NULL) {
2468 xmlGenericError(xmlGenericErrorContext,
2469 "realloc of %d byte failed\n", size);
2470 ctxt->instate = (xmlParserInputState) state;
2471 return(NULL);
2472 }
2473 }
2474 count++;
2475 if (count > 50) {
2476 GROW;
2477 count = 0;
2478 }
2479 COPY_BUF(l,buf,len,cur);
2480 NEXTL(l);
2481 cur = CUR_CHAR(l);
2482 if (cur == 0) {
2483 GROW;
2484 SHRINK;
2485 cur = CUR_CHAR(l);
2486 }
2487 }
2488 buf[len] = 0;
2489 ctxt->instate = (xmlParserInputState) state;
2490 if (!IS_CHAR(cur)) {
2491 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2493 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2494 ctxt->wellFormed = 0;
2495 ctxt->disableSAX = 1;
2496 } else {
2497 NEXT;
2498 }
2499 return(buf);
2500}
2501
2502/**
2503 * xmlParsePubidLiteral:
2504 * @ctxt: an XML parser context
2505 *
2506 * parse an XML public literal
2507 *
2508 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2509 *
2510 * Returns the PubidLiteral parsed or NULL.
2511 */
2512
2513xmlChar *
2514xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2515 xmlChar *buf = NULL;
2516 int len = 0;
2517 int size = XML_PARSER_BUFFER_SIZE;
2518 xmlChar cur;
2519 xmlChar stop;
2520 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002521 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002522
2523 SHRINK;
2524 if (RAW == '"') {
2525 NEXT;
2526 stop = '"';
2527 } else if (RAW == '\'') {
2528 NEXT;
2529 stop = '\'';
2530 } else {
2531 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2533 ctxt->sax->error(ctxt->userData,
2534 "SystemLiteral \" or ' expected\n");
2535 ctxt->wellFormed = 0;
2536 ctxt->disableSAX = 1;
2537 return(NULL);
2538 }
2539 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2540 if (buf == NULL) {
2541 xmlGenericError(xmlGenericErrorContext,
2542 "malloc of %d byte failed\n", size);
2543 return(NULL);
2544 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002545 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002546 cur = CUR;
2547 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2548 if (len + 1 >= size) {
2549 size *= 2;
2550 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2551 if (buf == NULL) {
2552 xmlGenericError(xmlGenericErrorContext,
2553 "realloc of %d byte failed\n", size);
2554 return(NULL);
2555 }
2556 }
2557 buf[len++] = cur;
2558 count++;
2559 if (count > 50) {
2560 GROW;
2561 count = 0;
2562 }
2563 NEXT;
2564 cur = CUR;
2565 if (cur == 0) {
2566 GROW;
2567 SHRINK;
2568 cur = CUR;
2569 }
2570 }
2571 buf[len] = 0;
2572 if (cur != stop) {
2573 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2575 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2576 ctxt->wellFormed = 0;
2577 ctxt->disableSAX = 1;
2578 } else {
2579 NEXT;
2580 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002581 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002582 return(buf);
2583}
2584
Daniel Veillard48b2f892001-02-25 16:11:03 +00002585void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002586/**
2587 * xmlParseCharData:
2588 * @ctxt: an XML parser context
2589 * @cdata: int indicating whether we are within a CDATA section
2590 *
2591 * parse a CharData section.
2592 * if we are within a CDATA section ']]>' marks an end of section.
2593 *
2594 * The right angle bracket (>) may be represented using the string "&gt;",
2595 * and must, for compatibility, be escaped using "&gt;" or a character
2596 * reference when it appears in the string "]]>" in content, when that
2597 * string is not marking the end of a CDATA section.
2598 *
2599 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2600 */
2601
2602void
2603xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002604 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002605 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002606 int line = ctxt->input->line;
2607 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002608
2609 SHRINK;
2610 GROW;
2611 /*
2612 * Accelerated common case where input don't need to be
2613 * modified before passing it to the handler.
2614 */
2615 if ((ctxt->token == 0) && (!cdata)) {
2616 in = ctxt->input->cur;
2617 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002618get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002619 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2620 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002621 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002622 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002623 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002624 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002625 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002626 ctxt->input->line++;
2627 in++;
2628 }
2629 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002630 }
2631 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002632 if ((in[1] == ']') && (in[2] == '>')) {
2633 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2635 ctxt->sax->error(ctxt->userData,
2636 "Sequence ']]>' not allowed in content\n");
2637 ctxt->input->cur = in;
2638 ctxt->wellFormed = 0;
2639 ctxt->disableSAX = 1;
2640 return;
2641 }
2642 in++;
2643 goto get_more;
2644 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002645 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002646 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002647 if (IS_BLANK(*ctxt->input->cur)) {
2648 const xmlChar *tmp = ctxt->input->cur;
2649 ctxt->input->cur = in;
2650 if (areBlanks(ctxt, tmp, nbchar)) {
2651 if (ctxt->sax->ignorableWhitespace != NULL)
2652 ctxt->sax->ignorableWhitespace(ctxt->userData,
2653 tmp, nbchar);
2654 } else {
2655 if (ctxt->sax->characters != NULL)
2656 ctxt->sax->characters(ctxt->userData,
2657 tmp, nbchar);
2658 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002659 line = ctxt->input->line;
2660 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002661 } else {
2662 if (ctxt->sax->characters != NULL)
2663 ctxt->sax->characters(ctxt->userData,
2664 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002665 line = ctxt->input->line;
2666 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002667 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002668 }
2669 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002670 if (*in == 0xD) {
2671 in++;
2672 if (*in == 0xA) {
2673 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002674 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002675 ctxt->input->line++;
2676 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002677 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002678 in--;
2679 }
2680 if (*in == '<') {
2681 return;
2682 }
2683 if (*in == '&') {
2684 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002685 }
2686 SHRINK;
2687 GROW;
2688 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002689 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002690 nbchar = 0;
2691 }
Daniel Veillard50582112001-03-26 22:52:16 +00002692 ctxt->input->line = line;
2693 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002694 xmlParseCharDataComplex(ctxt, cdata);
2695}
2696
2697void
2698xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002699 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2700 int nbchar = 0;
2701 int cur, l;
2702 int count = 0;
2703
2704 SHRINK;
2705 GROW;
2706 cur = CUR_CHAR(l);
2707 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2708 ((cur != '&') || (ctxt->token == '&')) &&
2709 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2710 if ((cur == ']') && (NXT(1) == ']') &&
2711 (NXT(2) == '>')) {
2712 if (cdata) break;
2713 else {
2714 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2716 ctxt->sax->error(ctxt->userData,
2717 "Sequence ']]>' not allowed in content\n");
2718 /* Should this be relaxed ??? I see a "must here */
2719 ctxt->wellFormed = 0;
2720 ctxt->disableSAX = 1;
2721 }
2722 }
2723 COPY_BUF(l,buf,nbchar,cur);
2724 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2725 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002726 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002727 */
2728 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2729 if (areBlanks(ctxt, buf, nbchar)) {
2730 if (ctxt->sax->ignorableWhitespace != NULL)
2731 ctxt->sax->ignorableWhitespace(ctxt->userData,
2732 buf, nbchar);
2733 } else {
2734 if (ctxt->sax->characters != NULL)
2735 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2736 }
2737 }
2738 nbchar = 0;
2739 }
2740 count++;
2741 if (count > 50) {
2742 GROW;
2743 count = 0;
2744 }
2745 NEXTL(l);
2746 cur = CUR_CHAR(l);
2747 }
2748 if (nbchar != 0) {
2749 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002750 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002751 */
2752 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2753 if (areBlanks(ctxt, buf, nbchar)) {
2754 if (ctxt->sax->ignorableWhitespace != NULL)
2755 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2756 } else {
2757 if (ctxt->sax->characters != NULL)
2758 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2759 }
2760 }
2761 }
2762}
2763
2764/**
2765 * xmlParseExternalID:
2766 * @ctxt: an XML parser context
2767 * @publicID: a xmlChar** receiving PubidLiteral
2768 * @strict: indicate whether we should restrict parsing to only
2769 * production [75], see NOTE below
2770 *
2771 * Parse an External ID or a Public ID
2772 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002773 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002774 * 'PUBLIC' S PubidLiteral S SystemLiteral
2775 *
2776 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2777 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2778 *
2779 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2780 *
2781 * Returns the function returns SystemLiteral and in the second
2782 * case publicID receives PubidLiteral, is strict is off
2783 * it is possible to return NULL and have publicID set.
2784 */
2785
2786xmlChar *
2787xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2788 xmlChar *URI = NULL;
2789
2790 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002791
2792 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002793 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2794 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2795 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2796 SKIP(6);
2797 if (!IS_BLANK(CUR)) {
2798 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2800 ctxt->sax->error(ctxt->userData,
2801 "Space required after 'SYSTEM'\n");
2802 ctxt->wellFormed = 0;
2803 ctxt->disableSAX = 1;
2804 }
2805 SKIP_BLANKS;
2806 URI = xmlParseSystemLiteral(ctxt);
2807 if (URI == NULL) {
2808 ctxt->errNo = XML_ERR_URI_REQUIRED;
2809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2810 ctxt->sax->error(ctxt->userData,
2811 "xmlParseExternalID: SYSTEM, no URI\n");
2812 ctxt->wellFormed = 0;
2813 ctxt->disableSAX = 1;
2814 }
2815 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2816 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2817 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2818 SKIP(6);
2819 if (!IS_BLANK(CUR)) {
2820 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2822 ctxt->sax->error(ctxt->userData,
2823 "Space required after 'PUBLIC'\n");
2824 ctxt->wellFormed = 0;
2825 ctxt->disableSAX = 1;
2826 }
2827 SKIP_BLANKS;
2828 *publicID = xmlParsePubidLiteral(ctxt);
2829 if (*publicID == NULL) {
2830 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2832 ctxt->sax->error(ctxt->userData,
2833 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2834 ctxt->wellFormed = 0;
2835 ctxt->disableSAX = 1;
2836 }
2837 if (strict) {
2838 /*
2839 * We don't handle [83] so "S SystemLiteral" is required.
2840 */
2841 if (!IS_BLANK(CUR)) {
2842 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2844 ctxt->sax->error(ctxt->userData,
2845 "Space required after the Public Identifier\n");
2846 ctxt->wellFormed = 0;
2847 ctxt->disableSAX = 1;
2848 }
2849 } else {
2850 /*
2851 * We handle [83] so we return immediately, if
2852 * "S SystemLiteral" is not detected. From a purely parsing
2853 * point of view that's a nice mess.
2854 */
2855 const xmlChar *ptr;
2856 GROW;
2857
2858 ptr = CUR_PTR;
2859 if (!IS_BLANK(*ptr)) return(NULL);
2860
2861 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2862 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2863 }
2864 SKIP_BLANKS;
2865 URI = xmlParseSystemLiteral(ctxt);
2866 if (URI == NULL) {
2867 ctxt->errNo = XML_ERR_URI_REQUIRED;
2868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2869 ctxt->sax->error(ctxt->userData,
2870 "xmlParseExternalID: PUBLIC, no URI\n");
2871 ctxt->wellFormed = 0;
2872 ctxt->disableSAX = 1;
2873 }
2874 }
2875 return(URI);
2876}
2877
2878/**
2879 * xmlParseComment:
2880 * @ctxt: an XML parser context
2881 *
2882 * Skip an XML (SGML) comment <!-- .... -->
2883 * The spec says that "For compatibility, the string "--" (double-hyphen)
2884 * must not occur within comments. "
2885 *
2886 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2887 */
2888void
2889xmlParseComment(xmlParserCtxtPtr ctxt) {
2890 xmlChar *buf = NULL;
2891 int len;
2892 int size = XML_PARSER_BUFFER_SIZE;
2893 int q, ql;
2894 int r, rl;
2895 int cur, l;
2896 xmlParserInputState state;
2897 xmlParserInputPtr input = ctxt->input;
2898 int count = 0;
2899
2900 /*
2901 * Check that there is a comment right here.
2902 */
2903 if ((RAW != '<') || (NXT(1) != '!') ||
2904 (NXT(2) != '-') || (NXT(3) != '-')) return;
2905
2906 state = ctxt->instate;
2907 ctxt->instate = XML_PARSER_COMMENT;
2908 SHRINK;
2909 SKIP(4);
2910 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2911 if (buf == NULL) {
2912 xmlGenericError(xmlGenericErrorContext,
2913 "malloc of %d byte failed\n", size);
2914 ctxt->instate = state;
2915 return;
2916 }
2917 q = CUR_CHAR(ql);
2918 NEXTL(ql);
2919 r = CUR_CHAR(rl);
2920 NEXTL(rl);
2921 cur = CUR_CHAR(l);
2922 len = 0;
2923 while (IS_CHAR(cur) && /* checked */
2924 ((cur != '>') ||
2925 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002926 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002927 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2929 ctxt->sax->error(ctxt->userData,
2930 "Comment must not contain '--' (double-hyphen)`\n");
2931 ctxt->wellFormed = 0;
2932 ctxt->disableSAX = 1;
2933 }
2934 if (len + 5 >= size) {
2935 size *= 2;
2936 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2937 if (buf == NULL) {
2938 xmlGenericError(xmlGenericErrorContext,
2939 "realloc of %d byte failed\n", size);
2940 ctxt->instate = state;
2941 return;
2942 }
2943 }
2944 COPY_BUF(ql,buf,len,q);
2945 q = r;
2946 ql = rl;
2947 r = cur;
2948 rl = l;
2949
2950 count++;
2951 if (count > 50) {
2952 GROW;
2953 count = 0;
2954 }
2955 NEXTL(l);
2956 cur = CUR_CHAR(l);
2957 if (cur == 0) {
2958 SHRINK;
2959 GROW;
2960 cur = CUR_CHAR(l);
2961 }
2962 }
2963 buf[len] = 0;
2964 if (!IS_CHAR(cur)) {
2965 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2967 ctxt->sax->error(ctxt->userData,
2968 "Comment not terminated \n<!--%.50s\n", buf);
2969 ctxt->wellFormed = 0;
2970 ctxt->disableSAX = 1;
2971 xmlFree(buf);
2972 } else {
2973 if (input != ctxt->input) {
2974 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2976 ctxt->sax->error(ctxt->userData,
2977"Comment doesn't start and stop in the same entity\n");
2978 ctxt->wellFormed = 0;
2979 ctxt->disableSAX = 1;
2980 }
2981 NEXT;
2982 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2983 (!ctxt->disableSAX))
2984 ctxt->sax->comment(ctxt->userData, buf);
2985 xmlFree(buf);
2986 }
2987 ctxt->instate = state;
2988}
2989
2990/**
2991 * xmlParsePITarget:
2992 * @ctxt: an XML parser context
2993 *
2994 * parse the name of a PI
2995 *
2996 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2997 *
2998 * Returns the PITarget name or NULL
2999 */
3000
3001xmlChar *
3002xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3003 xmlChar *name;
3004
3005 name = xmlParseName(ctxt);
3006 if ((name != NULL) &&
3007 ((name[0] == 'x') || (name[0] == 'X')) &&
3008 ((name[1] == 'm') || (name[1] == 'M')) &&
3009 ((name[2] == 'l') || (name[2] == 'L'))) {
3010 int i;
3011 if ((name[0] == 'x') && (name[1] == 'm') &&
3012 (name[2] == 'l') && (name[3] == 0)) {
3013 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3015 ctxt->sax->error(ctxt->userData,
3016 "XML declaration allowed only at the start of the document\n");
3017 ctxt->wellFormed = 0;
3018 ctxt->disableSAX = 1;
3019 return(name);
3020 } else if (name[3] == 0) {
3021 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3023 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3024 ctxt->wellFormed = 0;
3025 ctxt->disableSAX = 1;
3026 return(name);
3027 }
3028 for (i = 0;;i++) {
3029 if (xmlW3CPIs[i] == NULL) break;
3030 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3031 return(name);
3032 }
3033 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3034 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3035 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003036 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003037 }
3038 }
3039 return(name);
3040}
3041
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003042#ifdef LIBXML_CATALOG_ENABLED
3043/**
3044 * xmlParseCatalogPI:
3045 * @ctxt: an XML parser context
3046 * @catalog: the PI value string
3047 *
3048 * parse an XML Catalog Processing Instruction.
3049 *
3050 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3051 *
3052 * Occurs only if allowed by the user and if happening in the Misc
3053 * part of the document before any doctype informations
3054 * This will add the given catalog to the parsing context in order
3055 * to be used if there is a resolution need further down in the document
3056 */
3057
3058static void
3059xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3060 xmlChar *URL = NULL;
3061 const xmlChar *tmp, *base;
3062 xmlChar marker;
3063
3064 tmp = catalog;
3065 while (IS_BLANK(*tmp)) tmp++;
3066 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3067 goto error;
3068 tmp += 7;
3069 while (IS_BLANK(*tmp)) tmp++;
3070 if (*tmp != '=') {
3071 return;
3072 }
3073 tmp++;
3074 while (IS_BLANK(*tmp)) tmp++;
3075 marker = *tmp;
3076 if ((marker != '\'') && (marker != '"'))
3077 goto error;
3078 tmp++;
3079 base = tmp;
3080 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3081 if (*tmp == 0)
3082 goto error;
3083 URL = xmlStrndup(base, tmp - base);
3084 tmp++;
3085 while (IS_BLANK(*tmp)) tmp++;
3086 if (*tmp != 0)
3087 goto error;
3088
3089 if (URL != NULL) {
3090 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3091 xmlFree(URL);
3092 }
3093 return;
3094
3095error:
3096 ctxt->errNo = XML_WAR_CATALOG_PI;
3097 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3098 ctxt->sax->warning(ctxt->userData,
3099 "Catalog PI syntax error: %s\n", catalog);
3100 if (URL != NULL)
3101 xmlFree(URL);
3102}
3103#endif
3104
Owen Taylor3473f882001-02-23 17:55:21 +00003105/**
3106 * xmlParsePI:
3107 * @ctxt: an XML parser context
3108 *
3109 * parse an XML Processing Instruction.
3110 *
3111 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3112 *
3113 * The processing is transfered to SAX once parsed.
3114 */
3115
3116void
3117xmlParsePI(xmlParserCtxtPtr ctxt) {
3118 xmlChar *buf = NULL;
3119 int len = 0;
3120 int size = XML_PARSER_BUFFER_SIZE;
3121 int cur, l;
3122 xmlChar *target;
3123 xmlParserInputState state;
3124 int count = 0;
3125
3126 if ((RAW == '<') && (NXT(1) == '?')) {
3127 xmlParserInputPtr input = ctxt->input;
3128 state = ctxt->instate;
3129 ctxt->instate = XML_PARSER_PI;
3130 /*
3131 * this is a Processing Instruction.
3132 */
3133 SKIP(2);
3134 SHRINK;
3135
3136 /*
3137 * Parse the target name and check for special support like
3138 * namespace.
3139 */
3140 target = xmlParsePITarget(ctxt);
3141 if (target != NULL) {
3142 if ((RAW == '?') && (NXT(1) == '>')) {
3143 if (input != ctxt->input) {
3144 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3145 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3146 ctxt->sax->error(ctxt->userData,
3147 "PI declaration doesn't start and stop in the same entity\n");
3148 ctxt->wellFormed = 0;
3149 ctxt->disableSAX = 1;
3150 }
3151 SKIP(2);
3152
3153 /*
3154 * SAX: PI detected.
3155 */
3156 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3157 (ctxt->sax->processingInstruction != NULL))
3158 ctxt->sax->processingInstruction(ctxt->userData,
3159 target, NULL);
3160 ctxt->instate = state;
3161 xmlFree(target);
3162 return;
3163 }
3164 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3165 if (buf == NULL) {
3166 xmlGenericError(xmlGenericErrorContext,
3167 "malloc of %d byte failed\n", size);
3168 ctxt->instate = state;
3169 return;
3170 }
3171 cur = CUR;
3172 if (!IS_BLANK(cur)) {
3173 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3175 ctxt->sax->error(ctxt->userData,
3176 "xmlParsePI: PI %s space expected\n", target);
3177 ctxt->wellFormed = 0;
3178 ctxt->disableSAX = 1;
3179 }
3180 SKIP_BLANKS;
3181 cur = CUR_CHAR(l);
3182 while (IS_CHAR(cur) && /* checked */
3183 ((cur != '?') || (NXT(1) != '>'))) {
3184 if (len + 5 >= size) {
3185 size *= 2;
3186 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3187 if (buf == NULL) {
3188 xmlGenericError(xmlGenericErrorContext,
3189 "realloc of %d byte failed\n", size);
3190 ctxt->instate = state;
3191 return;
3192 }
3193 }
3194 count++;
3195 if (count > 50) {
3196 GROW;
3197 count = 0;
3198 }
3199 COPY_BUF(l,buf,len,cur);
3200 NEXTL(l);
3201 cur = CUR_CHAR(l);
3202 if (cur == 0) {
3203 SHRINK;
3204 GROW;
3205 cur = CUR_CHAR(l);
3206 }
3207 }
3208 buf[len] = 0;
3209 if (cur != '?') {
3210 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3212 ctxt->sax->error(ctxt->userData,
3213 "xmlParsePI: PI %s never end ...\n", target);
3214 ctxt->wellFormed = 0;
3215 ctxt->disableSAX = 1;
3216 } else {
3217 if (input != ctxt->input) {
3218 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3220 ctxt->sax->error(ctxt->userData,
3221 "PI declaration doesn't start and stop in the same entity\n");
3222 ctxt->wellFormed = 0;
3223 ctxt->disableSAX = 1;
3224 }
3225 SKIP(2);
3226
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003227#ifdef LIBXML_CATALOG_ENABLED
3228 if (((state == XML_PARSER_MISC) ||
3229 (state == XML_PARSER_START)) &&
3230 (xmlStrEqual(target, XML_CATALOG_PI))) {
3231 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3232 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3233 (allow == XML_CATA_ALLOW_ALL))
3234 xmlParseCatalogPI(ctxt, buf);
3235 }
3236#endif
3237
3238
Owen Taylor3473f882001-02-23 17:55:21 +00003239 /*
3240 * SAX: PI detected.
3241 */
3242 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3243 (ctxt->sax->processingInstruction != NULL))
3244 ctxt->sax->processingInstruction(ctxt->userData,
3245 target, buf);
3246 }
3247 xmlFree(buf);
3248 xmlFree(target);
3249 } else {
3250 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3251 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3252 ctxt->sax->error(ctxt->userData,
3253 "xmlParsePI : no target name\n");
3254 ctxt->wellFormed = 0;
3255 ctxt->disableSAX = 1;
3256 }
3257 ctxt->instate = state;
3258 }
3259}
3260
3261/**
3262 * xmlParseNotationDecl:
3263 * @ctxt: an XML parser context
3264 *
3265 * parse a notation declaration
3266 *
3267 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3268 *
3269 * Hence there is actually 3 choices:
3270 * 'PUBLIC' S PubidLiteral
3271 * 'PUBLIC' S PubidLiteral S SystemLiteral
3272 * and 'SYSTEM' S SystemLiteral
3273 *
3274 * See the NOTE on xmlParseExternalID().
3275 */
3276
3277void
3278xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3279 xmlChar *name;
3280 xmlChar *Pubid;
3281 xmlChar *Systemid;
3282
3283 if ((RAW == '<') && (NXT(1) == '!') &&
3284 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3285 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3286 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3287 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3288 xmlParserInputPtr input = ctxt->input;
3289 SHRINK;
3290 SKIP(10);
3291 if (!IS_BLANK(CUR)) {
3292 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3294 ctxt->sax->error(ctxt->userData,
3295 "Space required after '<!NOTATION'\n");
3296 ctxt->wellFormed = 0;
3297 ctxt->disableSAX = 1;
3298 return;
3299 }
3300 SKIP_BLANKS;
3301
Daniel Veillard76d66f42001-05-16 21:05:17 +00003302 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003303 if (name == NULL) {
3304 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3306 ctxt->sax->error(ctxt->userData,
3307 "NOTATION: Name expected here\n");
3308 ctxt->wellFormed = 0;
3309 ctxt->disableSAX = 1;
3310 return;
3311 }
3312 if (!IS_BLANK(CUR)) {
3313 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3314 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3315 ctxt->sax->error(ctxt->userData,
3316 "Space required after the NOTATION name'\n");
3317 ctxt->wellFormed = 0;
3318 ctxt->disableSAX = 1;
3319 return;
3320 }
3321 SKIP_BLANKS;
3322
3323 /*
3324 * Parse the IDs.
3325 */
3326 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3327 SKIP_BLANKS;
3328
3329 if (RAW == '>') {
3330 if (input != ctxt->input) {
3331 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3333 ctxt->sax->error(ctxt->userData,
3334"Notation declaration doesn't start and stop in the same entity\n");
3335 ctxt->wellFormed = 0;
3336 ctxt->disableSAX = 1;
3337 }
3338 NEXT;
3339 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3340 (ctxt->sax->notationDecl != NULL))
3341 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3342 } else {
3343 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3345 ctxt->sax->error(ctxt->userData,
3346 "'>' required to close NOTATION declaration\n");
3347 ctxt->wellFormed = 0;
3348 ctxt->disableSAX = 1;
3349 }
3350 xmlFree(name);
3351 if (Systemid != NULL) xmlFree(Systemid);
3352 if (Pubid != NULL) xmlFree(Pubid);
3353 }
3354}
3355
3356/**
3357 * xmlParseEntityDecl:
3358 * @ctxt: an XML parser context
3359 *
3360 * parse <!ENTITY declarations
3361 *
3362 * [70] EntityDecl ::= GEDecl | PEDecl
3363 *
3364 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3365 *
3366 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3367 *
3368 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3369 *
3370 * [74] PEDef ::= EntityValue | ExternalID
3371 *
3372 * [76] NDataDecl ::= S 'NDATA' S Name
3373 *
3374 * [ VC: Notation Declared ]
3375 * The Name must match the declared name of a notation.
3376 */
3377
3378void
3379xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3380 xmlChar *name = NULL;
3381 xmlChar *value = NULL;
3382 xmlChar *URI = NULL, *literal = NULL;
3383 xmlChar *ndata = NULL;
3384 int isParameter = 0;
3385 xmlChar *orig = NULL;
3386
3387 GROW;
3388 if ((RAW == '<') && (NXT(1) == '!') &&
3389 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3390 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3391 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3392 xmlParserInputPtr input = ctxt->input;
3393 ctxt->instate = XML_PARSER_ENTITY_DECL;
3394 SHRINK;
3395 SKIP(8);
3396 if (!IS_BLANK(CUR)) {
3397 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3399 ctxt->sax->error(ctxt->userData,
3400 "Space required after '<!ENTITY'\n");
3401 ctxt->wellFormed = 0;
3402 ctxt->disableSAX = 1;
3403 }
3404 SKIP_BLANKS;
3405
3406 if (RAW == '%') {
3407 NEXT;
3408 if (!IS_BLANK(CUR)) {
3409 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3411 ctxt->sax->error(ctxt->userData,
3412 "Space required after '%'\n");
3413 ctxt->wellFormed = 0;
3414 ctxt->disableSAX = 1;
3415 }
3416 SKIP_BLANKS;
3417 isParameter = 1;
3418 }
3419
Daniel Veillard76d66f42001-05-16 21:05:17 +00003420 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003421 if (name == NULL) {
3422 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3423 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3424 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3425 ctxt->wellFormed = 0;
3426 ctxt->disableSAX = 1;
3427 return;
3428 }
3429 if (!IS_BLANK(CUR)) {
3430 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3431 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3432 ctxt->sax->error(ctxt->userData,
3433 "Space required after the entity name\n");
3434 ctxt->wellFormed = 0;
3435 ctxt->disableSAX = 1;
3436 }
3437 SKIP_BLANKS;
3438
3439 /*
3440 * handle the various case of definitions...
3441 */
3442 if (isParameter) {
3443 if ((RAW == '"') || (RAW == '\'')) {
3444 value = xmlParseEntityValue(ctxt, &orig);
3445 if (value) {
3446 if ((ctxt->sax != NULL) &&
3447 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3448 ctxt->sax->entityDecl(ctxt->userData, name,
3449 XML_INTERNAL_PARAMETER_ENTITY,
3450 NULL, NULL, value);
3451 }
3452 } else {
3453 URI = xmlParseExternalID(ctxt, &literal, 1);
3454 if ((URI == NULL) && (literal == NULL)) {
3455 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3457 ctxt->sax->error(ctxt->userData,
3458 "Entity value required\n");
3459 ctxt->wellFormed = 0;
3460 ctxt->disableSAX = 1;
3461 }
3462 if (URI) {
3463 xmlURIPtr uri;
3464
3465 uri = xmlParseURI((const char *) URI);
3466 if (uri == NULL) {
3467 ctxt->errNo = XML_ERR_INVALID_URI;
3468 if ((ctxt->sax != NULL) &&
3469 (!ctxt->disableSAX) &&
3470 (ctxt->sax->error != NULL))
3471 ctxt->sax->error(ctxt->userData,
3472 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003473 /*
3474 * This really ought to be a well formedness error
3475 * but the XML Core WG decided otherwise c.f. issue
3476 * E26 of the XML erratas.
3477 */
Owen Taylor3473f882001-02-23 17:55:21 +00003478 } else {
3479 if (uri->fragment != NULL) {
3480 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3481 if ((ctxt->sax != NULL) &&
3482 (!ctxt->disableSAX) &&
3483 (ctxt->sax->error != NULL))
3484 ctxt->sax->error(ctxt->userData,
3485 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003486 /*
3487 * Okay this is foolish to block those but not
3488 * invalid URIs.
3489 */
Owen Taylor3473f882001-02-23 17:55:21 +00003490 ctxt->wellFormed = 0;
3491 } else {
3492 if ((ctxt->sax != NULL) &&
3493 (!ctxt->disableSAX) &&
3494 (ctxt->sax->entityDecl != NULL))
3495 ctxt->sax->entityDecl(ctxt->userData, name,
3496 XML_EXTERNAL_PARAMETER_ENTITY,
3497 literal, URI, NULL);
3498 }
3499 xmlFreeURI(uri);
3500 }
3501 }
3502 }
3503 } else {
3504 if ((RAW == '"') || (RAW == '\'')) {
3505 value = xmlParseEntityValue(ctxt, &orig);
3506 if ((ctxt->sax != NULL) &&
3507 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3508 ctxt->sax->entityDecl(ctxt->userData, name,
3509 XML_INTERNAL_GENERAL_ENTITY,
3510 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003511 /*
3512 * For expat compatibility in SAX mode.
3513 */
3514 if ((ctxt->myDoc == NULL) ||
3515 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3516 if (ctxt->myDoc == NULL) {
3517 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3518 }
3519 if (ctxt->myDoc->intSubset == NULL)
3520 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3521 BAD_CAST "fake", NULL, NULL);
3522
3523 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3524 NULL, NULL, value);
3525 }
Owen Taylor3473f882001-02-23 17:55:21 +00003526 } else {
3527 URI = xmlParseExternalID(ctxt, &literal, 1);
3528 if ((URI == NULL) && (literal == NULL)) {
3529 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3531 ctxt->sax->error(ctxt->userData,
3532 "Entity value required\n");
3533 ctxt->wellFormed = 0;
3534 ctxt->disableSAX = 1;
3535 }
3536 if (URI) {
3537 xmlURIPtr uri;
3538
3539 uri = xmlParseURI((const char *)URI);
3540 if (uri == NULL) {
3541 ctxt->errNo = XML_ERR_INVALID_URI;
3542 if ((ctxt->sax != NULL) &&
3543 (!ctxt->disableSAX) &&
3544 (ctxt->sax->error != NULL))
3545 ctxt->sax->error(ctxt->userData,
3546 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003547 /*
3548 * This really ought to be a well formedness error
3549 * but the XML Core WG decided otherwise c.f. issue
3550 * E26 of the XML erratas.
3551 */
Owen Taylor3473f882001-02-23 17:55:21 +00003552 } else {
3553 if (uri->fragment != NULL) {
3554 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3555 if ((ctxt->sax != NULL) &&
3556 (!ctxt->disableSAX) &&
3557 (ctxt->sax->error != NULL))
3558 ctxt->sax->error(ctxt->userData,
3559 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003560 /*
3561 * Okay this is foolish to block those but not
3562 * invalid URIs.
3563 */
Owen Taylor3473f882001-02-23 17:55:21 +00003564 ctxt->wellFormed = 0;
3565 }
3566 xmlFreeURI(uri);
3567 }
3568 }
3569 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3570 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3572 ctxt->sax->error(ctxt->userData,
3573 "Space required before 'NDATA'\n");
3574 ctxt->wellFormed = 0;
3575 ctxt->disableSAX = 1;
3576 }
3577 SKIP_BLANKS;
3578 if ((RAW == 'N') && (NXT(1) == 'D') &&
3579 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3580 (NXT(4) == 'A')) {
3581 SKIP(5);
3582 if (!IS_BLANK(CUR)) {
3583 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3585 ctxt->sax->error(ctxt->userData,
3586 "Space required after 'NDATA'\n");
3587 ctxt->wellFormed = 0;
3588 ctxt->disableSAX = 1;
3589 }
3590 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003591 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003592 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3593 (ctxt->sax->unparsedEntityDecl != NULL))
3594 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3595 literal, URI, ndata);
3596 } else {
3597 if ((ctxt->sax != NULL) &&
3598 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3599 ctxt->sax->entityDecl(ctxt->userData, name,
3600 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3601 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003602 /*
3603 * For expat compatibility in SAX mode.
3604 * assuming the entity repalcement was asked for
3605 */
3606 if ((ctxt->replaceEntities != 0) &&
3607 ((ctxt->myDoc == NULL) ||
3608 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3609 if (ctxt->myDoc == NULL) {
3610 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3611 }
3612
3613 if (ctxt->myDoc->intSubset == NULL)
3614 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3615 BAD_CAST "fake", NULL, NULL);
3616 entityDecl(ctxt, name,
3617 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3618 literal, URI, NULL);
3619 }
Owen Taylor3473f882001-02-23 17:55:21 +00003620 }
3621 }
3622 }
3623 SKIP_BLANKS;
3624 if (RAW != '>') {
3625 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3627 ctxt->sax->error(ctxt->userData,
3628 "xmlParseEntityDecl: entity %s not terminated\n", name);
3629 ctxt->wellFormed = 0;
3630 ctxt->disableSAX = 1;
3631 } else {
3632 if (input != ctxt->input) {
3633 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3635 ctxt->sax->error(ctxt->userData,
3636"Entity declaration doesn't start and stop in the same entity\n");
3637 ctxt->wellFormed = 0;
3638 ctxt->disableSAX = 1;
3639 }
3640 NEXT;
3641 }
3642 if (orig != NULL) {
3643 /*
3644 * Ugly mechanism to save the raw entity value.
3645 */
3646 xmlEntityPtr cur = NULL;
3647
3648 if (isParameter) {
3649 if ((ctxt->sax != NULL) &&
3650 (ctxt->sax->getParameterEntity != NULL))
3651 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3652 } else {
3653 if ((ctxt->sax != NULL) &&
3654 (ctxt->sax->getEntity != NULL))
3655 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003656 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3657 cur = getEntity(ctxt, name);
3658 }
Owen Taylor3473f882001-02-23 17:55:21 +00003659 }
3660 if (cur != NULL) {
3661 if (cur->orig != NULL)
3662 xmlFree(orig);
3663 else
3664 cur->orig = orig;
3665 } else
3666 xmlFree(orig);
3667 }
3668 if (name != NULL) xmlFree(name);
3669 if (value != NULL) xmlFree(value);
3670 if (URI != NULL) xmlFree(URI);
3671 if (literal != NULL) xmlFree(literal);
3672 if (ndata != NULL) xmlFree(ndata);
3673 }
3674}
3675
3676/**
3677 * xmlParseDefaultDecl:
3678 * @ctxt: an XML parser context
3679 * @value: Receive a possible fixed default value for the attribute
3680 *
3681 * Parse an attribute default declaration
3682 *
3683 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3684 *
3685 * [ VC: Required Attribute ]
3686 * if the default declaration is the keyword #REQUIRED, then the
3687 * attribute must be specified for all elements of the type in the
3688 * attribute-list declaration.
3689 *
3690 * [ VC: Attribute Default Legal ]
3691 * The declared default value must meet the lexical constraints of
3692 * the declared attribute type c.f. xmlValidateAttributeDecl()
3693 *
3694 * [ VC: Fixed Attribute Default ]
3695 * if an attribute has a default value declared with the #FIXED
3696 * keyword, instances of that attribute must match the default value.
3697 *
3698 * [ WFC: No < in Attribute Values ]
3699 * handled in xmlParseAttValue()
3700 *
3701 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3702 * or XML_ATTRIBUTE_FIXED.
3703 */
3704
3705int
3706xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3707 int val;
3708 xmlChar *ret;
3709
3710 *value = NULL;
3711 if ((RAW == '#') && (NXT(1) == 'R') &&
3712 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3713 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3714 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3715 (NXT(8) == 'D')) {
3716 SKIP(9);
3717 return(XML_ATTRIBUTE_REQUIRED);
3718 }
3719 if ((RAW == '#') && (NXT(1) == 'I') &&
3720 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3721 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3722 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3723 SKIP(8);
3724 return(XML_ATTRIBUTE_IMPLIED);
3725 }
3726 val = XML_ATTRIBUTE_NONE;
3727 if ((RAW == '#') && (NXT(1) == 'F') &&
3728 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3729 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3730 SKIP(6);
3731 val = XML_ATTRIBUTE_FIXED;
3732 if (!IS_BLANK(CUR)) {
3733 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3735 ctxt->sax->error(ctxt->userData,
3736 "Space required after '#FIXED'\n");
3737 ctxt->wellFormed = 0;
3738 ctxt->disableSAX = 1;
3739 }
3740 SKIP_BLANKS;
3741 }
3742 ret = xmlParseAttValue(ctxt);
3743 ctxt->instate = XML_PARSER_DTD;
3744 if (ret == NULL) {
3745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3746 ctxt->sax->error(ctxt->userData,
3747 "Attribute default value declaration error\n");
3748 ctxt->wellFormed = 0;
3749 ctxt->disableSAX = 1;
3750 } else
3751 *value = ret;
3752 return(val);
3753}
3754
3755/**
3756 * xmlParseNotationType:
3757 * @ctxt: an XML parser context
3758 *
3759 * parse an Notation attribute type.
3760 *
3761 * Note: the leading 'NOTATION' S part has already being parsed...
3762 *
3763 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3764 *
3765 * [ VC: Notation Attributes ]
3766 * Values of this type must match one of the notation names included
3767 * in the declaration; all notation names in the declaration must be declared.
3768 *
3769 * Returns: the notation attribute tree built while parsing
3770 */
3771
3772xmlEnumerationPtr
3773xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3774 xmlChar *name;
3775 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3776
3777 if (RAW != '(') {
3778 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3779 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3780 ctxt->sax->error(ctxt->userData,
3781 "'(' required to start 'NOTATION'\n");
3782 ctxt->wellFormed = 0;
3783 ctxt->disableSAX = 1;
3784 return(NULL);
3785 }
3786 SHRINK;
3787 do {
3788 NEXT;
3789 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003790 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003791 if (name == NULL) {
3792 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3794 ctxt->sax->error(ctxt->userData,
3795 "Name expected in NOTATION declaration\n");
3796 ctxt->wellFormed = 0;
3797 ctxt->disableSAX = 1;
3798 return(ret);
3799 }
3800 cur = xmlCreateEnumeration(name);
3801 xmlFree(name);
3802 if (cur == NULL) return(ret);
3803 if (last == NULL) ret = last = cur;
3804 else {
3805 last->next = cur;
3806 last = cur;
3807 }
3808 SKIP_BLANKS;
3809 } while (RAW == '|');
3810 if (RAW != ')') {
3811 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3813 ctxt->sax->error(ctxt->userData,
3814 "')' required to finish NOTATION declaration\n");
3815 ctxt->wellFormed = 0;
3816 ctxt->disableSAX = 1;
3817 if ((last != NULL) && (last != ret))
3818 xmlFreeEnumeration(last);
3819 return(ret);
3820 }
3821 NEXT;
3822 return(ret);
3823}
3824
3825/**
3826 * xmlParseEnumerationType:
3827 * @ctxt: an XML parser context
3828 *
3829 * parse an Enumeration attribute type.
3830 *
3831 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3832 *
3833 * [ VC: Enumeration ]
3834 * Values of this type must match one of the Nmtoken tokens in
3835 * the declaration
3836 *
3837 * Returns: the enumeration attribute tree built while parsing
3838 */
3839
3840xmlEnumerationPtr
3841xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3842 xmlChar *name;
3843 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3844
3845 if (RAW != '(') {
3846 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3848 ctxt->sax->error(ctxt->userData,
3849 "'(' required to start ATTLIST enumeration\n");
3850 ctxt->wellFormed = 0;
3851 ctxt->disableSAX = 1;
3852 return(NULL);
3853 }
3854 SHRINK;
3855 do {
3856 NEXT;
3857 SKIP_BLANKS;
3858 name = xmlParseNmtoken(ctxt);
3859 if (name == NULL) {
3860 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3862 ctxt->sax->error(ctxt->userData,
3863 "NmToken expected in ATTLIST enumeration\n");
3864 ctxt->wellFormed = 0;
3865 ctxt->disableSAX = 1;
3866 return(ret);
3867 }
3868 cur = xmlCreateEnumeration(name);
3869 xmlFree(name);
3870 if (cur == NULL) return(ret);
3871 if (last == NULL) ret = last = cur;
3872 else {
3873 last->next = cur;
3874 last = cur;
3875 }
3876 SKIP_BLANKS;
3877 } while (RAW == '|');
3878 if (RAW != ')') {
3879 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3881 ctxt->sax->error(ctxt->userData,
3882 "')' required to finish ATTLIST enumeration\n");
3883 ctxt->wellFormed = 0;
3884 ctxt->disableSAX = 1;
3885 return(ret);
3886 }
3887 NEXT;
3888 return(ret);
3889}
3890
3891/**
3892 * xmlParseEnumeratedType:
3893 * @ctxt: an XML parser context
3894 * @tree: the enumeration tree built while parsing
3895 *
3896 * parse an Enumerated attribute type.
3897 *
3898 * [57] EnumeratedType ::= NotationType | Enumeration
3899 *
3900 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3901 *
3902 *
3903 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3904 */
3905
3906int
3907xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3908 if ((RAW == 'N') && (NXT(1) == 'O') &&
3909 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3910 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3911 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3912 SKIP(8);
3913 if (!IS_BLANK(CUR)) {
3914 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3916 ctxt->sax->error(ctxt->userData,
3917 "Space required after 'NOTATION'\n");
3918 ctxt->wellFormed = 0;
3919 ctxt->disableSAX = 1;
3920 return(0);
3921 }
3922 SKIP_BLANKS;
3923 *tree = xmlParseNotationType(ctxt);
3924 if (*tree == NULL) return(0);
3925 return(XML_ATTRIBUTE_NOTATION);
3926 }
3927 *tree = xmlParseEnumerationType(ctxt);
3928 if (*tree == NULL) return(0);
3929 return(XML_ATTRIBUTE_ENUMERATION);
3930}
3931
3932/**
3933 * xmlParseAttributeType:
3934 * @ctxt: an XML parser context
3935 * @tree: the enumeration tree built while parsing
3936 *
3937 * parse the Attribute list def for an element
3938 *
3939 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3940 *
3941 * [55] StringType ::= 'CDATA'
3942 *
3943 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3944 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3945 *
3946 * Validity constraints for attribute values syntax are checked in
3947 * xmlValidateAttributeValue()
3948 *
3949 * [ VC: ID ]
3950 * Values of type ID must match the Name production. A name must not
3951 * appear more than once in an XML document as a value of this type;
3952 * i.e., ID values must uniquely identify the elements which bear them.
3953 *
3954 * [ VC: One ID per Element Type ]
3955 * No element type may have more than one ID attribute specified.
3956 *
3957 * [ VC: ID Attribute Default ]
3958 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3959 *
3960 * [ VC: IDREF ]
3961 * Values of type IDREF must match the Name production, and values
3962 * of type IDREFS must match Names; each IDREF Name must match the value
3963 * of an ID attribute on some element in the XML document; i.e. IDREF
3964 * values must match the value of some ID attribute.
3965 *
3966 * [ VC: Entity Name ]
3967 * Values of type ENTITY must match the Name production, values
3968 * of type ENTITIES must match Names; each Entity Name must match the
3969 * name of an unparsed entity declared in the DTD.
3970 *
3971 * [ VC: Name Token ]
3972 * Values of type NMTOKEN must match the Nmtoken production; values
3973 * of type NMTOKENS must match Nmtokens.
3974 *
3975 * Returns the attribute type
3976 */
3977int
3978xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3979 SHRINK;
3980 if ((RAW == 'C') && (NXT(1) == 'D') &&
3981 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3982 (NXT(4) == 'A')) {
3983 SKIP(5);
3984 return(XML_ATTRIBUTE_CDATA);
3985 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3986 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3987 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3988 SKIP(6);
3989 return(XML_ATTRIBUTE_IDREFS);
3990 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3991 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3992 (NXT(4) == 'F')) {
3993 SKIP(5);
3994 return(XML_ATTRIBUTE_IDREF);
3995 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3996 SKIP(2);
3997 return(XML_ATTRIBUTE_ID);
3998 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3999 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4000 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4001 SKIP(6);
4002 return(XML_ATTRIBUTE_ENTITY);
4003 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4004 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4005 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4006 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4007 SKIP(8);
4008 return(XML_ATTRIBUTE_ENTITIES);
4009 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4010 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4011 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4012 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4013 SKIP(8);
4014 return(XML_ATTRIBUTE_NMTOKENS);
4015 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4016 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4017 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4018 (NXT(6) == 'N')) {
4019 SKIP(7);
4020 return(XML_ATTRIBUTE_NMTOKEN);
4021 }
4022 return(xmlParseEnumeratedType(ctxt, tree));
4023}
4024
4025/**
4026 * xmlParseAttributeListDecl:
4027 * @ctxt: an XML parser context
4028 *
4029 * : parse the Attribute list def for an element
4030 *
4031 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4032 *
4033 * [53] AttDef ::= S Name S AttType S DefaultDecl
4034 *
4035 */
4036void
4037xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4038 xmlChar *elemName;
4039 xmlChar *attrName;
4040 xmlEnumerationPtr tree;
4041
4042 if ((RAW == '<') && (NXT(1) == '!') &&
4043 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4044 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4045 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4046 (NXT(8) == 'T')) {
4047 xmlParserInputPtr input = ctxt->input;
4048
4049 SKIP(9);
4050 if (!IS_BLANK(CUR)) {
4051 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4053 ctxt->sax->error(ctxt->userData,
4054 "Space required after '<!ATTLIST'\n");
4055 ctxt->wellFormed = 0;
4056 ctxt->disableSAX = 1;
4057 }
4058 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004059 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004060 if (elemName == NULL) {
4061 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4063 ctxt->sax->error(ctxt->userData,
4064 "ATTLIST: no name for Element\n");
4065 ctxt->wellFormed = 0;
4066 ctxt->disableSAX = 1;
4067 return;
4068 }
4069 SKIP_BLANKS;
4070 GROW;
4071 while (RAW != '>') {
4072 const xmlChar *check = CUR_PTR;
4073 int type;
4074 int def;
4075 xmlChar *defaultValue = NULL;
4076
4077 GROW;
4078 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004079 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004080 if (attrName == NULL) {
4081 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4082 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4083 ctxt->sax->error(ctxt->userData,
4084 "ATTLIST: no name for Attribute\n");
4085 ctxt->wellFormed = 0;
4086 ctxt->disableSAX = 1;
4087 break;
4088 }
4089 GROW;
4090 if (!IS_BLANK(CUR)) {
4091 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4093 ctxt->sax->error(ctxt->userData,
4094 "Space required after the attribute name\n");
4095 ctxt->wellFormed = 0;
4096 ctxt->disableSAX = 1;
4097 if (attrName != NULL)
4098 xmlFree(attrName);
4099 if (defaultValue != NULL)
4100 xmlFree(defaultValue);
4101 break;
4102 }
4103 SKIP_BLANKS;
4104
4105 type = xmlParseAttributeType(ctxt, &tree);
4106 if (type <= 0) {
4107 if (attrName != NULL)
4108 xmlFree(attrName);
4109 if (defaultValue != NULL)
4110 xmlFree(defaultValue);
4111 break;
4112 }
4113
4114 GROW;
4115 if (!IS_BLANK(CUR)) {
4116 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4118 ctxt->sax->error(ctxt->userData,
4119 "Space required after the attribute type\n");
4120 ctxt->wellFormed = 0;
4121 ctxt->disableSAX = 1;
4122 if (attrName != NULL)
4123 xmlFree(attrName);
4124 if (defaultValue != NULL)
4125 xmlFree(defaultValue);
4126 if (tree != NULL)
4127 xmlFreeEnumeration(tree);
4128 break;
4129 }
4130 SKIP_BLANKS;
4131
4132 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4133 if (def <= 0) {
4134 if (attrName != NULL)
4135 xmlFree(attrName);
4136 if (defaultValue != NULL)
4137 xmlFree(defaultValue);
4138 if (tree != NULL)
4139 xmlFreeEnumeration(tree);
4140 break;
4141 }
4142
4143 GROW;
4144 if (RAW != '>') {
4145 if (!IS_BLANK(CUR)) {
4146 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4147 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4148 ctxt->sax->error(ctxt->userData,
4149 "Space required after the attribute default value\n");
4150 ctxt->wellFormed = 0;
4151 ctxt->disableSAX = 1;
4152 if (attrName != NULL)
4153 xmlFree(attrName);
4154 if (defaultValue != NULL)
4155 xmlFree(defaultValue);
4156 if (tree != NULL)
4157 xmlFreeEnumeration(tree);
4158 break;
4159 }
4160 SKIP_BLANKS;
4161 }
4162 if (check == CUR_PTR) {
4163 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4165 ctxt->sax->error(ctxt->userData,
4166 "xmlParseAttributeListDecl: detected internal error\n");
4167 if (attrName != NULL)
4168 xmlFree(attrName);
4169 if (defaultValue != NULL)
4170 xmlFree(defaultValue);
4171 if (tree != NULL)
4172 xmlFreeEnumeration(tree);
4173 break;
4174 }
4175 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4176 (ctxt->sax->attributeDecl != NULL))
4177 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4178 type, def, defaultValue, tree);
4179 if (attrName != NULL)
4180 xmlFree(attrName);
4181 if (defaultValue != NULL)
4182 xmlFree(defaultValue);
4183 GROW;
4184 }
4185 if (RAW == '>') {
4186 if (input != ctxt->input) {
4187 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4189 ctxt->sax->error(ctxt->userData,
4190"Attribute list declaration doesn't start and stop in the same entity\n");
4191 ctxt->wellFormed = 0;
4192 ctxt->disableSAX = 1;
4193 }
4194 NEXT;
4195 }
4196
4197 xmlFree(elemName);
4198 }
4199}
4200
4201/**
4202 * xmlParseElementMixedContentDecl:
4203 * @ctxt: an XML parser context
4204 *
4205 * parse the declaration for a Mixed Element content
4206 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4207 *
4208 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4209 * '(' S? '#PCDATA' S? ')'
4210 *
4211 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4212 *
4213 * [ VC: No Duplicate Types ]
4214 * The same name must not appear more than once in a single
4215 * mixed-content declaration.
4216 *
4217 * returns: the list of the xmlElementContentPtr describing the element choices
4218 */
4219xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004220xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004221 xmlElementContentPtr ret = NULL, cur = NULL, n;
4222 xmlChar *elem = NULL;
4223
4224 GROW;
4225 if ((RAW == '#') && (NXT(1) == 'P') &&
4226 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4227 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4228 (NXT(6) == 'A')) {
4229 SKIP(7);
4230 SKIP_BLANKS;
4231 SHRINK;
4232 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004233 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4234 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4235 if (ctxt->vctxt.error != NULL)
4236 ctxt->vctxt.error(ctxt->vctxt.userData,
4237"Element content declaration doesn't start and stop in the same entity\n");
4238 ctxt->valid = 0;
4239 }
Owen Taylor3473f882001-02-23 17:55:21 +00004240 NEXT;
4241 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4242 if (RAW == '*') {
4243 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4244 NEXT;
4245 }
4246 return(ret);
4247 }
4248 if ((RAW == '(') || (RAW == '|')) {
4249 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4250 if (ret == NULL) return(NULL);
4251 }
4252 while (RAW == '|') {
4253 NEXT;
4254 if (elem == NULL) {
4255 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4256 if (ret == NULL) return(NULL);
4257 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004258 if (cur != NULL)
4259 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004260 cur = ret;
4261 } else {
4262 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4263 if (n == NULL) return(NULL);
4264 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004265 if (n->c1 != NULL)
4266 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004267 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004268 if (n != NULL)
4269 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004270 cur = n;
4271 xmlFree(elem);
4272 }
4273 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004274 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004275 if (elem == NULL) {
4276 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4277 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4278 ctxt->sax->error(ctxt->userData,
4279 "xmlParseElementMixedContentDecl : Name expected\n");
4280 ctxt->wellFormed = 0;
4281 ctxt->disableSAX = 1;
4282 xmlFreeElementContent(cur);
4283 return(NULL);
4284 }
4285 SKIP_BLANKS;
4286 GROW;
4287 }
4288 if ((RAW == ')') && (NXT(1) == '*')) {
4289 if (elem != NULL) {
4290 cur->c2 = xmlNewElementContent(elem,
4291 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004292 if (cur->c2 != NULL)
4293 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004294 xmlFree(elem);
4295 }
4296 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004297 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4298 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4299 if (ctxt->vctxt.error != NULL)
4300 ctxt->vctxt.error(ctxt->vctxt.userData,
4301"Element content declaration doesn't start and stop in the same entity\n");
4302 ctxt->valid = 0;
4303 }
Owen Taylor3473f882001-02-23 17:55:21 +00004304 SKIP(2);
4305 } else {
4306 if (elem != NULL) xmlFree(elem);
4307 xmlFreeElementContent(ret);
4308 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4310 ctxt->sax->error(ctxt->userData,
4311 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4312 ctxt->wellFormed = 0;
4313 ctxt->disableSAX = 1;
4314 return(NULL);
4315 }
4316
4317 } else {
4318 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4320 ctxt->sax->error(ctxt->userData,
4321 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4322 ctxt->wellFormed = 0;
4323 ctxt->disableSAX = 1;
4324 }
4325 return(ret);
4326}
4327
4328/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004329 * xmlParseElementChildrenContentD:
4330 * @ctxt: an XML parser context
4331 *
4332 * VMS version of xmlParseElementChildrenContentDecl()
4333 *
4334 * Returns the tree of xmlElementContentPtr describing the element
4335 * hierarchy.
4336 */
4337/**
Owen Taylor3473f882001-02-23 17:55:21 +00004338 * xmlParseElementChildrenContentDecl:
4339 * @ctxt: an XML parser context
4340 *
4341 * parse the declaration for a Mixed Element content
4342 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4343 *
4344 *
4345 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4346 *
4347 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4348 *
4349 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4350 *
4351 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4352 *
4353 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4354 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004355 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004356 * opening or closing parentheses in a choice, seq, or Mixed
4357 * construct is contained in the replacement text for a parameter
4358 * entity, both must be contained in the same replacement text. For
4359 * interoperability, if a parameter-entity reference appears in a
4360 * choice, seq, or Mixed construct, its replacement text should not
4361 * be empty, and neither the first nor last non-blank character of
4362 * the replacement text should be a connector (| or ,).
4363 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004364 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004365 * hierarchy.
4366 */
4367xmlElementContentPtr
4368#ifdef VMS
4369xmlParseElementChildrenContentD
4370#else
4371xmlParseElementChildrenContentDecl
4372#endif
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004373(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004374 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4375 xmlChar *elem;
4376 xmlChar type = 0;
4377
4378 SKIP_BLANKS;
4379 GROW;
4380 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004381 xmlParserInputPtr input = ctxt->input;
4382
Owen Taylor3473f882001-02-23 17:55:21 +00004383 /* Recurse on first child */
4384 NEXT;
4385 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004386 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004387 SKIP_BLANKS;
4388 GROW;
4389 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004390 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004391 if (elem == NULL) {
4392 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4394 ctxt->sax->error(ctxt->userData,
4395 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4396 ctxt->wellFormed = 0;
4397 ctxt->disableSAX = 1;
4398 return(NULL);
4399 }
4400 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4401 GROW;
4402 if (RAW == '?') {
4403 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4404 NEXT;
4405 } else if (RAW == '*') {
4406 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4407 NEXT;
4408 } else if (RAW == '+') {
4409 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4410 NEXT;
4411 } else {
4412 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4413 }
4414 xmlFree(elem);
4415 GROW;
4416 }
4417 SKIP_BLANKS;
4418 SHRINK;
4419 while (RAW != ')') {
4420 /*
4421 * Each loop we parse one separator and one element.
4422 */
4423 if (RAW == ',') {
4424 if (type == 0) type = CUR;
4425
4426 /*
4427 * Detect "Name | Name , Name" error
4428 */
4429 else if (type != CUR) {
4430 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4431 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4432 ctxt->sax->error(ctxt->userData,
4433 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4434 type);
4435 ctxt->wellFormed = 0;
4436 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004437 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004438 xmlFreeElementContent(last);
4439 if (ret != NULL)
4440 xmlFreeElementContent(ret);
4441 return(NULL);
4442 }
4443 NEXT;
4444
4445 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4446 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004447 if ((last != NULL) && (last != ret))
4448 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004449 xmlFreeElementContent(ret);
4450 return(NULL);
4451 }
4452 if (last == NULL) {
4453 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004454 if (ret != NULL)
4455 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004456 ret = cur = op;
4457 } else {
4458 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004459 if (op != NULL)
4460 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004461 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004462 if (last != NULL)
4463 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004464 cur =op;
4465 last = NULL;
4466 }
4467 } else if (RAW == '|') {
4468 if (type == 0) type = CUR;
4469
4470 /*
4471 * Detect "Name , Name | Name" error
4472 */
4473 else if (type != CUR) {
4474 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4476 ctxt->sax->error(ctxt->userData,
4477 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4478 type);
4479 ctxt->wellFormed = 0;
4480 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004481 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004482 xmlFreeElementContent(last);
4483 if (ret != NULL)
4484 xmlFreeElementContent(ret);
4485 return(NULL);
4486 }
4487 NEXT;
4488
4489 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4490 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004491 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004492 xmlFreeElementContent(last);
4493 if (ret != NULL)
4494 xmlFreeElementContent(ret);
4495 return(NULL);
4496 }
4497 if (last == NULL) {
4498 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004499 if (ret != NULL)
4500 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004501 ret = cur = op;
4502 } else {
4503 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004504 if (op != NULL)
4505 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004506 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004507 if (last != NULL)
4508 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004509 cur =op;
4510 last = NULL;
4511 }
4512 } else {
4513 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4515 ctxt->sax->error(ctxt->userData,
4516 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4517 ctxt->wellFormed = 0;
4518 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004519 if (ret != NULL)
4520 xmlFreeElementContent(ret);
4521 return(NULL);
4522 }
4523 GROW;
4524 SKIP_BLANKS;
4525 GROW;
4526 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004527 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004528 /* Recurse on second child */
4529 NEXT;
4530 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004531 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004532 SKIP_BLANKS;
4533 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004534 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004535 if (elem == NULL) {
4536 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4538 ctxt->sax->error(ctxt->userData,
4539 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4540 ctxt->wellFormed = 0;
4541 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004542 if (ret != NULL)
4543 xmlFreeElementContent(ret);
4544 return(NULL);
4545 }
4546 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4547 xmlFree(elem);
4548 if (RAW == '?') {
4549 last->ocur = XML_ELEMENT_CONTENT_OPT;
4550 NEXT;
4551 } else if (RAW == '*') {
4552 last->ocur = XML_ELEMENT_CONTENT_MULT;
4553 NEXT;
4554 } else if (RAW == '+') {
4555 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4556 NEXT;
4557 } else {
4558 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4559 }
4560 }
4561 SKIP_BLANKS;
4562 GROW;
4563 }
4564 if ((cur != NULL) && (last != NULL)) {
4565 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004566 if (last != NULL)
4567 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004568 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004569 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4570 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4571 if (ctxt->vctxt.error != NULL)
4572 ctxt->vctxt.error(ctxt->vctxt.userData,
4573"Element content declaration doesn't start and stop in the same entity\n");
4574 ctxt->valid = 0;
4575 }
Owen Taylor3473f882001-02-23 17:55:21 +00004576 NEXT;
4577 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004578 if (ret != NULL)
4579 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004580 NEXT;
4581 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004582 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004583 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004584 cur = ret;
4585 /*
4586 * Some normalization:
4587 * (a | b* | c?)* == (a | b | c)*
4588 */
4589 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4590 if ((cur->c1 != NULL) &&
4591 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4592 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4593 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4594 if ((cur->c2 != NULL) &&
4595 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4596 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4597 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4598 cur = cur->c2;
4599 }
4600 }
Owen Taylor3473f882001-02-23 17:55:21 +00004601 NEXT;
4602 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004603 if (ret != NULL) {
4604 int found = 0;
4605
Daniel Veillarde470df72001-04-18 21:41:07 +00004606 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004607 /*
4608 * Some normalization:
4609 * (a | b*)+ == (a | b)*
4610 * (a | b?)+ == (a | b)*
4611 */
4612 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4613 if ((cur->c1 != NULL) &&
4614 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4615 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4616 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4617 found = 1;
4618 }
4619 if ((cur->c2 != NULL) &&
4620 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4621 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4622 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4623 found = 1;
4624 }
4625 cur = cur->c2;
4626 }
4627 if (found)
4628 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4629 }
Owen Taylor3473f882001-02-23 17:55:21 +00004630 NEXT;
4631 }
4632 return(ret);
4633}
4634
4635/**
4636 * xmlParseElementContentDecl:
4637 * @ctxt: an XML parser context
4638 * @name: the name of the element being defined.
4639 * @result: the Element Content pointer will be stored here if any
4640 *
4641 * parse the declaration for an Element content either Mixed or Children,
4642 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4643 *
4644 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4645 *
4646 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4647 */
4648
4649int
4650xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4651 xmlElementContentPtr *result) {
4652
4653 xmlElementContentPtr tree = NULL;
4654 xmlParserInputPtr input = ctxt->input;
4655 int res;
4656
4657 *result = NULL;
4658
4659 if (RAW != '(') {
4660 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4662 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004663 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004664 ctxt->wellFormed = 0;
4665 ctxt->disableSAX = 1;
4666 return(-1);
4667 }
4668 NEXT;
4669 GROW;
4670 SKIP_BLANKS;
4671 if ((RAW == '#') && (NXT(1) == 'P') &&
4672 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4673 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4674 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004675 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004676 res = XML_ELEMENT_TYPE_MIXED;
4677 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004678 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004679 res = XML_ELEMENT_TYPE_ELEMENT;
4680 }
Owen Taylor3473f882001-02-23 17:55:21 +00004681 SKIP_BLANKS;
4682 *result = tree;
4683 return(res);
4684}
4685
4686/**
4687 * xmlParseElementDecl:
4688 * @ctxt: an XML parser context
4689 *
4690 * parse an Element declaration.
4691 *
4692 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4693 *
4694 * [ VC: Unique Element Type Declaration ]
4695 * No element type may be declared more than once
4696 *
4697 * Returns the type of the element, or -1 in case of error
4698 */
4699int
4700xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4701 xmlChar *name;
4702 int ret = -1;
4703 xmlElementContentPtr content = NULL;
4704
4705 GROW;
4706 if ((RAW == '<') && (NXT(1) == '!') &&
4707 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4708 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4709 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4710 (NXT(8) == 'T')) {
4711 xmlParserInputPtr input = ctxt->input;
4712
4713 SKIP(9);
4714 if (!IS_BLANK(CUR)) {
4715 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4717 ctxt->sax->error(ctxt->userData,
4718 "Space required after 'ELEMENT'\n");
4719 ctxt->wellFormed = 0;
4720 ctxt->disableSAX = 1;
4721 }
4722 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004723 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004724 if (name == NULL) {
4725 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4727 ctxt->sax->error(ctxt->userData,
4728 "xmlParseElementDecl: no name for Element\n");
4729 ctxt->wellFormed = 0;
4730 ctxt->disableSAX = 1;
4731 return(-1);
4732 }
4733 while ((RAW == 0) && (ctxt->inputNr > 1))
4734 xmlPopInput(ctxt);
4735 if (!IS_BLANK(CUR)) {
4736 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4738 ctxt->sax->error(ctxt->userData,
4739 "Space required after the element name\n");
4740 ctxt->wellFormed = 0;
4741 ctxt->disableSAX = 1;
4742 }
4743 SKIP_BLANKS;
4744 if ((RAW == 'E') && (NXT(1) == 'M') &&
4745 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4746 (NXT(4) == 'Y')) {
4747 SKIP(5);
4748 /*
4749 * Element must always be empty.
4750 */
4751 ret = XML_ELEMENT_TYPE_EMPTY;
4752 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4753 (NXT(2) == 'Y')) {
4754 SKIP(3);
4755 /*
4756 * Element is a generic container.
4757 */
4758 ret = XML_ELEMENT_TYPE_ANY;
4759 } else if (RAW == '(') {
4760 ret = xmlParseElementContentDecl(ctxt, name, &content);
4761 } else {
4762 /*
4763 * [ WFC: PEs in Internal Subset ] error handling.
4764 */
4765 if ((RAW == '%') && (ctxt->external == 0) &&
4766 (ctxt->inputNr == 1)) {
4767 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4769 ctxt->sax->error(ctxt->userData,
4770 "PEReference: forbidden within markup decl in internal subset\n");
4771 } else {
4772 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4774 ctxt->sax->error(ctxt->userData,
4775 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4776 }
4777 ctxt->wellFormed = 0;
4778 ctxt->disableSAX = 1;
4779 if (name != NULL) xmlFree(name);
4780 return(-1);
4781 }
4782
4783 SKIP_BLANKS;
4784 /*
4785 * Pop-up of finished entities.
4786 */
4787 while ((RAW == 0) && (ctxt->inputNr > 1))
4788 xmlPopInput(ctxt);
4789 SKIP_BLANKS;
4790
4791 if (RAW != '>') {
4792 ctxt->errNo = XML_ERR_GT_REQUIRED;
4793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4794 ctxt->sax->error(ctxt->userData,
4795 "xmlParseElementDecl: expected '>' at the end\n");
4796 ctxt->wellFormed = 0;
4797 ctxt->disableSAX = 1;
4798 } else {
4799 if (input != ctxt->input) {
4800 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4802 ctxt->sax->error(ctxt->userData,
4803"Element declaration doesn't start and stop in the same entity\n");
4804 ctxt->wellFormed = 0;
4805 ctxt->disableSAX = 1;
4806 }
4807
4808 NEXT;
4809 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4810 (ctxt->sax->elementDecl != NULL))
4811 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4812 content);
4813 }
4814 if (content != NULL) {
4815 xmlFreeElementContent(content);
4816 }
4817 if (name != NULL) {
4818 xmlFree(name);
4819 }
4820 }
4821 return(ret);
4822}
4823
4824/**
Owen Taylor3473f882001-02-23 17:55:21 +00004825 * xmlParseConditionalSections
4826 * @ctxt: an XML parser context
4827 *
4828 * [61] conditionalSect ::= includeSect | ignoreSect
4829 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4830 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4831 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4832 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4833 */
4834
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004835static void
Owen Taylor3473f882001-02-23 17:55:21 +00004836xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4837 SKIP(3);
4838 SKIP_BLANKS;
4839 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4840 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4841 (NXT(6) == 'E')) {
4842 SKIP(7);
4843 SKIP_BLANKS;
4844 if (RAW != '[') {
4845 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4847 ctxt->sax->error(ctxt->userData,
4848 "XML conditional section '[' expected\n");
4849 ctxt->wellFormed = 0;
4850 ctxt->disableSAX = 1;
4851 } else {
4852 NEXT;
4853 }
4854 if (xmlParserDebugEntities) {
4855 if ((ctxt->input != NULL) && (ctxt->input->filename))
4856 xmlGenericError(xmlGenericErrorContext,
4857 "%s(%d): ", ctxt->input->filename,
4858 ctxt->input->line);
4859 xmlGenericError(xmlGenericErrorContext,
4860 "Entering INCLUDE Conditional Section\n");
4861 }
4862
4863 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4864 (NXT(2) != '>'))) {
4865 const xmlChar *check = CUR_PTR;
4866 int cons = ctxt->input->consumed;
4867 int tok = ctxt->token;
4868
4869 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4870 xmlParseConditionalSections(ctxt);
4871 } else if (IS_BLANK(CUR)) {
4872 NEXT;
4873 } else if (RAW == '%') {
4874 xmlParsePEReference(ctxt);
4875 } else
4876 xmlParseMarkupDecl(ctxt);
4877
4878 /*
4879 * Pop-up of finished entities.
4880 */
4881 while ((RAW == 0) && (ctxt->inputNr > 1))
4882 xmlPopInput(ctxt);
4883
4884 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4885 (tok == ctxt->token)) {
4886 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4888 ctxt->sax->error(ctxt->userData,
4889 "Content error in the external subset\n");
4890 ctxt->wellFormed = 0;
4891 ctxt->disableSAX = 1;
4892 break;
4893 }
4894 }
4895 if (xmlParserDebugEntities) {
4896 if ((ctxt->input != NULL) && (ctxt->input->filename))
4897 xmlGenericError(xmlGenericErrorContext,
4898 "%s(%d): ", ctxt->input->filename,
4899 ctxt->input->line);
4900 xmlGenericError(xmlGenericErrorContext,
4901 "Leaving INCLUDE Conditional Section\n");
4902 }
4903
4904 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4905 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4906 int state;
4907 int instate;
4908 int depth = 0;
4909
4910 SKIP(6);
4911 SKIP_BLANKS;
4912 if (RAW != '[') {
4913 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4915 ctxt->sax->error(ctxt->userData,
4916 "XML conditional section '[' expected\n");
4917 ctxt->wellFormed = 0;
4918 ctxt->disableSAX = 1;
4919 } else {
4920 NEXT;
4921 }
4922 if (xmlParserDebugEntities) {
4923 if ((ctxt->input != NULL) && (ctxt->input->filename))
4924 xmlGenericError(xmlGenericErrorContext,
4925 "%s(%d): ", ctxt->input->filename,
4926 ctxt->input->line);
4927 xmlGenericError(xmlGenericErrorContext,
4928 "Entering IGNORE Conditional Section\n");
4929 }
4930
4931 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004932 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004933 * But disable SAX event generating DTD building in the meantime
4934 */
4935 state = ctxt->disableSAX;
4936 instate = ctxt->instate;
4937 ctxt->disableSAX = 1;
4938 ctxt->instate = XML_PARSER_IGNORE;
4939
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004940 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004941 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4942 depth++;
4943 SKIP(3);
4944 continue;
4945 }
4946 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4947 if (--depth >= 0) SKIP(3);
4948 continue;
4949 }
4950 NEXT;
4951 continue;
4952 }
4953
4954 ctxt->disableSAX = state;
4955 ctxt->instate = instate;
4956
4957 if (xmlParserDebugEntities) {
4958 if ((ctxt->input != NULL) && (ctxt->input->filename))
4959 xmlGenericError(xmlGenericErrorContext,
4960 "%s(%d): ", ctxt->input->filename,
4961 ctxt->input->line);
4962 xmlGenericError(xmlGenericErrorContext,
4963 "Leaving IGNORE Conditional Section\n");
4964 }
4965
4966 } else {
4967 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4969 ctxt->sax->error(ctxt->userData,
4970 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4971 ctxt->wellFormed = 0;
4972 ctxt->disableSAX = 1;
4973 }
4974
4975 if (RAW == 0)
4976 SHRINK;
4977
4978 if (RAW == 0) {
4979 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4981 ctxt->sax->error(ctxt->userData,
4982 "XML conditional section not closed\n");
4983 ctxt->wellFormed = 0;
4984 ctxt->disableSAX = 1;
4985 } else {
4986 SKIP(3);
4987 }
4988}
4989
4990/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004991 * xmlParseMarkupDecl:
4992 * @ctxt: an XML parser context
4993 *
4994 * parse Markup declarations
4995 *
4996 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4997 * NotationDecl | PI | Comment
4998 *
4999 * [ VC: Proper Declaration/PE Nesting ]
5000 * Parameter-entity replacement text must be properly nested with
5001 * markup declarations. That is to say, if either the first character
5002 * or the last character of a markup declaration (markupdecl above) is
5003 * contained in the replacement text for a parameter-entity reference,
5004 * both must be contained in the same replacement text.
5005 *
5006 * [ WFC: PEs in Internal Subset ]
5007 * In the internal DTD subset, parameter-entity references can occur
5008 * only where markup declarations can occur, not within markup declarations.
5009 * (This does not apply to references that occur in external parameter
5010 * entities or to the external subset.)
5011 */
5012void
5013xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5014 GROW;
5015 xmlParseElementDecl(ctxt);
5016 xmlParseAttributeListDecl(ctxt);
5017 xmlParseEntityDecl(ctxt);
5018 xmlParseNotationDecl(ctxt);
5019 xmlParsePI(ctxt);
5020 xmlParseComment(ctxt);
5021 /*
5022 * This is only for internal subset. On external entities,
5023 * the replacement is done before parsing stage
5024 */
5025 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5026 xmlParsePEReference(ctxt);
5027
5028 /*
5029 * Conditional sections are allowed from entities included
5030 * by PE References in the internal subset.
5031 */
5032 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5033 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5034 xmlParseConditionalSections(ctxt);
5035 }
5036 }
5037
5038 ctxt->instate = XML_PARSER_DTD;
5039}
5040
5041/**
5042 * xmlParseTextDecl:
5043 * @ctxt: an XML parser context
5044 *
5045 * parse an XML declaration header for external entities
5046 *
5047 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5048 *
5049 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5050 */
5051
5052void
5053xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5054 xmlChar *version;
5055
5056 /*
5057 * We know that '<?xml' is here.
5058 */
5059 if ((RAW == '<') && (NXT(1) == '?') &&
5060 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5061 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5062 SKIP(5);
5063 } else {
5064 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5066 ctxt->sax->error(ctxt->userData,
5067 "Text declaration '<?xml' required\n");
5068 ctxt->wellFormed = 0;
5069 ctxt->disableSAX = 1;
5070
5071 return;
5072 }
5073
5074 if (!IS_BLANK(CUR)) {
5075 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5076 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5077 ctxt->sax->error(ctxt->userData,
5078 "Space needed after '<?xml'\n");
5079 ctxt->wellFormed = 0;
5080 ctxt->disableSAX = 1;
5081 }
5082 SKIP_BLANKS;
5083
5084 /*
5085 * We may have the VersionInfo here.
5086 */
5087 version = xmlParseVersionInfo(ctxt);
5088 if (version == NULL)
5089 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005090 else {
5091 if (!IS_BLANK(CUR)) {
5092 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5094 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5095 ctxt->wellFormed = 0;
5096 ctxt->disableSAX = 1;
5097 }
5098 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005099 ctxt->input->version = version;
5100
5101 /*
5102 * We must have the encoding declaration
5103 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005104 xmlParseEncodingDecl(ctxt);
5105 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5106 /*
5107 * The XML REC instructs us to stop parsing right here
5108 */
5109 return;
5110 }
5111
5112 SKIP_BLANKS;
5113 if ((RAW == '?') && (NXT(1) == '>')) {
5114 SKIP(2);
5115 } else if (RAW == '>') {
5116 /* Deprecated old WD ... */
5117 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5119 ctxt->sax->error(ctxt->userData,
5120 "XML declaration must end-up with '?>'\n");
5121 ctxt->wellFormed = 0;
5122 ctxt->disableSAX = 1;
5123 NEXT;
5124 } else {
5125 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5127 ctxt->sax->error(ctxt->userData,
5128 "parsing XML declaration: '?>' expected\n");
5129 ctxt->wellFormed = 0;
5130 ctxt->disableSAX = 1;
5131 MOVETO_ENDTAG(CUR_PTR);
5132 NEXT;
5133 }
5134}
5135
5136/**
Owen Taylor3473f882001-02-23 17:55:21 +00005137 * xmlParseExternalSubset:
5138 * @ctxt: an XML parser context
5139 * @ExternalID: the external identifier
5140 * @SystemID: the system identifier (or URL)
5141 *
5142 * parse Markup declarations from an external subset
5143 *
5144 * [30] extSubset ::= textDecl? extSubsetDecl
5145 *
5146 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5147 */
5148void
5149xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5150 const xmlChar *SystemID) {
5151 GROW;
5152 if ((RAW == '<') && (NXT(1) == '?') &&
5153 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5154 (NXT(4) == 'l')) {
5155 xmlParseTextDecl(ctxt);
5156 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5157 /*
5158 * The XML REC instructs us to stop parsing right here
5159 */
5160 ctxt->instate = XML_PARSER_EOF;
5161 return;
5162 }
5163 }
5164 if (ctxt->myDoc == NULL) {
5165 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5166 }
5167 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5168 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5169
5170 ctxt->instate = XML_PARSER_DTD;
5171 ctxt->external = 1;
5172 while (((RAW == '<') && (NXT(1) == '?')) ||
5173 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005174 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005175 const xmlChar *check = CUR_PTR;
5176 int cons = ctxt->input->consumed;
5177 int tok = ctxt->token;
5178
5179 GROW;
5180 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5181 xmlParseConditionalSections(ctxt);
5182 } else if (IS_BLANK(CUR)) {
5183 NEXT;
5184 } else if (RAW == '%') {
5185 xmlParsePEReference(ctxt);
5186 } else
5187 xmlParseMarkupDecl(ctxt);
5188
5189 /*
5190 * Pop-up of finished entities.
5191 */
5192 while ((RAW == 0) && (ctxt->inputNr > 1))
5193 xmlPopInput(ctxt);
5194
5195 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5196 (tok == ctxt->token)) {
5197 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5199 ctxt->sax->error(ctxt->userData,
5200 "Content error in the external subset\n");
5201 ctxt->wellFormed = 0;
5202 ctxt->disableSAX = 1;
5203 break;
5204 }
5205 }
5206
5207 if (RAW != 0) {
5208 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5209 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5210 ctxt->sax->error(ctxt->userData,
5211 "Extra content at the end of the document\n");
5212 ctxt->wellFormed = 0;
5213 ctxt->disableSAX = 1;
5214 }
5215
5216}
5217
5218/**
5219 * xmlParseReference:
5220 * @ctxt: an XML parser context
5221 *
5222 * parse and handle entity references in content, depending on the SAX
5223 * interface, this may end-up in a call to character() if this is a
5224 * CharRef, a predefined entity, if there is no reference() callback.
5225 * or if the parser was asked to switch to that mode.
5226 *
5227 * [67] Reference ::= EntityRef | CharRef
5228 */
5229void
5230xmlParseReference(xmlParserCtxtPtr ctxt) {
5231 xmlEntityPtr ent;
5232 xmlChar *val;
5233 if (RAW != '&') return;
5234
5235 if (NXT(1) == '#') {
5236 int i = 0;
5237 xmlChar out[10];
5238 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005239 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005240
5241 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5242 /*
5243 * So we are using non-UTF-8 buffers
5244 * Check that the char fit on 8bits, if not
5245 * generate a CharRef.
5246 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005247 if (value <= 0xFF) {
5248 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005249 out[1] = 0;
5250 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5251 (!ctxt->disableSAX))
5252 ctxt->sax->characters(ctxt->userData, out, 1);
5253 } else {
5254 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005255 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005256 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005257 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005258 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5259 (!ctxt->disableSAX))
5260 ctxt->sax->reference(ctxt->userData, out);
5261 }
5262 } else {
5263 /*
5264 * Just encode the value in UTF-8
5265 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005266 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005267 out[i] = 0;
5268 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5269 (!ctxt->disableSAX))
5270 ctxt->sax->characters(ctxt->userData, out, i);
5271 }
5272 } else {
5273 ent = xmlParseEntityRef(ctxt);
5274 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005275 if (!ctxt->wellFormed)
5276 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005277 if ((ent->name != NULL) &&
5278 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5279 xmlNodePtr list = NULL;
5280 int ret;
5281
5282
5283 /*
5284 * The first reference to the entity trigger a parsing phase
5285 * where the ent->children is filled with the result from
5286 * the parsing.
5287 */
5288 if (ent->children == NULL) {
5289 xmlChar *value;
5290 value = ent->content;
5291
5292 /*
5293 * Check that this entity is well formed
5294 */
5295 if ((value != NULL) &&
5296 (value[1] == 0) && (value[0] == '<') &&
5297 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5298 /*
5299 * DONE: get definite answer on this !!!
5300 * Lots of entity decls are used to declare a single
5301 * char
5302 * <!ENTITY lt "<">
5303 * Which seems to be valid since
5304 * 2.4: The ampersand character (&) and the left angle
5305 * bracket (<) may appear in their literal form only
5306 * when used ... They are also legal within the literal
5307 * entity value of an internal entity declaration;i
5308 * see "4.3.2 Well-Formed Parsed Entities".
5309 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5310 * Looking at the OASIS test suite and James Clark
5311 * tests, this is broken. However the XML REC uses
5312 * it. Is the XML REC not well-formed ????
5313 * This is a hack to avoid this problem
5314 *
5315 * ANSWER: since lt gt amp .. are already defined,
5316 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005317 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005318 * is lousy but acceptable.
5319 */
5320 list = xmlNewDocText(ctxt->myDoc, value);
5321 if (list != NULL) {
5322 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5323 (ent->children == NULL)) {
5324 ent->children = list;
5325 ent->last = list;
5326 list->parent = (xmlNodePtr) ent;
5327 } else {
5328 xmlFreeNodeList(list);
5329 }
5330 } else if (list != NULL) {
5331 xmlFreeNodeList(list);
5332 }
5333 } else {
5334 /*
5335 * 4.3.2: An internal general parsed entity is well-formed
5336 * if its replacement text matches the production labeled
5337 * content.
5338 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005339
5340 void *user_data;
5341 /*
5342 * This is a bit hackish but this seems the best
5343 * way to make sure both SAX and DOM entity support
5344 * behaves okay.
5345 */
5346 if (ctxt->userData == ctxt)
5347 user_data = NULL;
5348 else
5349 user_data = ctxt->userData;
5350
Owen Taylor3473f882001-02-23 17:55:21 +00005351 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5352 ctxt->depth++;
5353 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005354 ctxt->sax, user_data, ctxt->depth,
Owen Taylor3473f882001-02-23 17:55:21 +00005355 value, &list);
5356 ctxt->depth--;
5357 } else if (ent->etype ==
5358 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5359 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005360 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005361 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005362 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005363 ctxt->depth--;
5364 } else {
5365 ret = -1;
5366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5367 ctxt->sax->error(ctxt->userData,
5368 "Internal: invalid entity type\n");
5369 }
5370 if (ret == XML_ERR_ENTITY_LOOP) {
5371 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5373 ctxt->sax->error(ctxt->userData,
5374 "Detected entity reference loop\n");
5375 ctxt->wellFormed = 0;
5376 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005377 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005378 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005379 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5380 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005381 (ent->children == NULL)) {
5382 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005383 if (ctxt->replaceEntities) {
5384 /*
5385 * Prune it directly in the generated document
5386 * except for single text nodes.
5387 */
5388 if ((list->type == XML_TEXT_NODE) &&
5389 (list->next == NULL)) {
5390 list->parent = (xmlNodePtr) ent;
5391 list = NULL;
5392 } else {
5393 while (list != NULL) {
5394 list->parent = (xmlNodePtr) ctxt->node;
5395 if (list->next == NULL)
5396 ent->last = list;
5397 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005398 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005399 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005400 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5401 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005402 }
5403 } else {
5404 while (list != NULL) {
5405 list->parent = (xmlNodePtr) ent;
5406 if (list->next == NULL)
5407 ent->last = list;
5408 list = list->next;
5409 }
Owen Taylor3473f882001-02-23 17:55:21 +00005410 }
5411 } else {
5412 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005413 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005414 }
5415 } else if (ret > 0) {
5416 ctxt->errNo = ret;
5417 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5418 ctxt->sax->error(ctxt->userData,
5419 "Entity value required\n");
5420 ctxt->wellFormed = 0;
5421 ctxt->disableSAX = 1;
5422 } else if (list != NULL) {
5423 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005424 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005425 }
5426 }
5427 }
5428 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5429 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5430 /*
5431 * Create a node.
5432 */
5433 ctxt->sax->reference(ctxt->userData, ent->name);
5434 return;
5435 } else if (ctxt->replaceEntities) {
5436 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5437 /*
5438 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005439 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005440 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005441 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005442 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005443 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005444 cur = ent->children;
5445 while (cur != NULL) {
5446 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005447 if (firstChild == NULL){
5448 firstChild = new;
5449 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005450 xmlAddChild(ctxt->node, new);
5451 if (cur == ent->last)
5452 break;
5453 cur = cur->next;
5454 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005455 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5456 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005457 } else {
5458 /*
5459 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005460 * node with a possible previous text one which
5461 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005462 */
5463 if (ent->children->type == XML_TEXT_NODE)
5464 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5465 if ((ent->last != ent->children) &&
5466 (ent->last->type == XML_TEXT_NODE))
5467 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5468 xmlAddChildList(ctxt->node, ent->children);
5469 }
5470
Owen Taylor3473f882001-02-23 17:55:21 +00005471 /*
5472 * This is to avoid a nasty side effect, see
5473 * characters() in SAX.c
5474 */
5475 ctxt->nodemem = 0;
5476 ctxt->nodelen = 0;
5477 return;
5478 } else {
5479 /*
5480 * Probably running in SAX mode
5481 */
5482 xmlParserInputPtr input;
5483
5484 input = xmlNewEntityInputStream(ctxt, ent);
5485 xmlPushInput(ctxt, input);
5486 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5487 (RAW == '<') && (NXT(1) == '?') &&
5488 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5489 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5490 xmlParseTextDecl(ctxt);
5491 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5492 /*
5493 * The XML REC instructs us to stop parsing right here
5494 */
5495 ctxt->instate = XML_PARSER_EOF;
5496 return;
5497 }
5498 if (input->standalone == 1) {
5499 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5501 ctxt->sax->error(ctxt->userData,
5502 "external parsed entities cannot be standalone\n");
5503 ctxt->wellFormed = 0;
5504 ctxt->disableSAX = 1;
5505 }
5506 }
5507 return;
5508 }
5509 }
5510 } else {
5511 val = ent->content;
5512 if (val == NULL) return;
5513 /*
5514 * inline the entity.
5515 */
5516 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5517 (!ctxt->disableSAX))
5518 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5519 }
5520 }
5521}
5522
5523/**
5524 * xmlParseEntityRef:
5525 * @ctxt: an XML parser context
5526 *
5527 * parse ENTITY references declarations
5528 *
5529 * [68] EntityRef ::= '&' Name ';'
5530 *
5531 * [ WFC: Entity Declared ]
5532 * In a document without any DTD, a document with only an internal DTD
5533 * subset which contains no parameter entity references, or a document
5534 * with "standalone='yes'", the Name given in the entity reference
5535 * must match that in an entity declaration, except that well-formed
5536 * documents need not declare any of the following entities: amp, lt,
5537 * gt, apos, quot. The declaration of a parameter entity must precede
5538 * any reference to it. Similarly, the declaration of a general entity
5539 * must precede any reference to it which appears in a default value in an
5540 * attribute-list declaration. Note that if entities are declared in the
5541 * external subset or in external parameter entities, a non-validating
5542 * processor is not obligated to read and process their declarations;
5543 * for such documents, the rule that an entity must be declared is a
5544 * well-formedness constraint only if standalone='yes'.
5545 *
5546 * [ WFC: Parsed Entity ]
5547 * An entity reference must not contain the name of an unparsed entity
5548 *
5549 * Returns the xmlEntityPtr if found, or NULL otherwise.
5550 */
5551xmlEntityPtr
5552xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5553 xmlChar *name;
5554 xmlEntityPtr ent = NULL;
5555
5556 GROW;
5557
5558 if (RAW == '&') {
5559 NEXT;
5560 name = xmlParseName(ctxt);
5561 if (name == NULL) {
5562 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5564 ctxt->sax->error(ctxt->userData,
5565 "xmlParseEntityRef: no name\n");
5566 ctxt->wellFormed = 0;
5567 ctxt->disableSAX = 1;
5568 } else {
5569 if (RAW == ';') {
5570 NEXT;
5571 /*
5572 * Ask first SAX for entity resolution, otherwise try the
5573 * predefined set.
5574 */
5575 if (ctxt->sax != NULL) {
5576 if (ctxt->sax->getEntity != NULL)
5577 ent = ctxt->sax->getEntity(ctxt->userData, name);
5578 if (ent == NULL)
5579 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005580 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5581 ent = getEntity(ctxt, name);
5582 }
Owen Taylor3473f882001-02-23 17:55:21 +00005583 }
5584 /*
5585 * [ WFC: Entity Declared ]
5586 * In a document without any DTD, a document with only an
5587 * internal DTD subset which contains no parameter entity
5588 * references, or a document with "standalone='yes'", the
5589 * Name given in the entity reference must match that in an
5590 * entity declaration, except that well-formed documents
5591 * need not declare any of the following entities: amp, lt,
5592 * gt, apos, quot.
5593 * The declaration of a parameter entity must precede any
5594 * reference to it.
5595 * Similarly, the declaration of a general entity must
5596 * precede any reference to it which appears in a default
5597 * value in an attribute-list declaration. Note that if
5598 * entities are declared in the external subset or in
5599 * external parameter entities, a non-validating processor
5600 * is not obligated to read and process their declarations;
5601 * for such documents, the rule that an entity must be
5602 * declared is a well-formedness constraint only if
5603 * standalone='yes'.
5604 */
5605 if (ent == NULL) {
5606 if ((ctxt->standalone == 1) ||
5607 ((ctxt->hasExternalSubset == 0) &&
5608 (ctxt->hasPErefs == 0))) {
5609 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5611 ctxt->sax->error(ctxt->userData,
5612 "Entity '%s' not defined\n", name);
5613 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005614 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005615 ctxt->disableSAX = 1;
5616 } else {
5617 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005619 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005620 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005621 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005622 }
5623 }
5624
5625 /*
5626 * [ WFC: Parsed Entity ]
5627 * An entity reference must not contain the name of an
5628 * unparsed entity
5629 */
5630 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5631 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5633 ctxt->sax->error(ctxt->userData,
5634 "Entity reference to unparsed entity %s\n", name);
5635 ctxt->wellFormed = 0;
5636 ctxt->disableSAX = 1;
5637 }
5638
5639 /*
5640 * [ WFC: No External Entity References ]
5641 * Attribute values cannot contain direct or indirect
5642 * entity references to external entities.
5643 */
5644 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5645 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5646 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5648 ctxt->sax->error(ctxt->userData,
5649 "Attribute references external entity '%s'\n", name);
5650 ctxt->wellFormed = 0;
5651 ctxt->disableSAX = 1;
5652 }
5653 /*
5654 * [ WFC: No < in Attribute Values ]
5655 * The replacement text of any entity referred to directly or
5656 * indirectly in an attribute value (other than "&lt;") must
5657 * not contain a <.
5658 */
5659 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5660 (ent != NULL) &&
5661 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5662 (ent->content != NULL) &&
5663 (xmlStrchr(ent->content, '<'))) {
5664 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5666 ctxt->sax->error(ctxt->userData,
5667 "'<' in entity '%s' is not allowed in attributes values\n", name);
5668 ctxt->wellFormed = 0;
5669 ctxt->disableSAX = 1;
5670 }
5671
5672 /*
5673 * Internal check, no parameter entities here ...
5674 */
5675 else {
5676 switch (ent->etype) {
5677 case XML_INTERNAL_PARAMETER_ENTITY:
5678 case XML_EXTERNAL_PARAMETER_ENTITY:
5679 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5681 ctxt->sax->error(ctxt->userData,
5682 "Attempt to reference the parameter entity '%s'\n", name);
5683 ctxt->wellFormed = 0;
5684 ctxt->disableSAX = 1;
5685 break;
5686 default:
5687 break;
5688 }
5689 }
5690
5691 /*
5692 * [ WFC: No Recursion ]
5693 * A parsed entity must not contain a recursive reference
5694 * to itself, either directly or indirectly.
5695 * Done somewhere else
5696 */
5697
5698 } else {
5699 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5701 ctxt->sax->error(ctxt->userData,
5702 "xmlParseEntityRef: expecting ';'\n");
5703 ctxt->wellFormed = 0;
5704 ctxt->disableSAX = 1;
5705 }
5706 xmlFree(name);
5707 }
5708 }
5709 return(ent);
5710}
5711
5712/**
5713 * xmlParseStringEntityRef:
5714 * @ctxt: an XML parser context
5715 * @str: a pointer to an index in the string
5716 *
5717 * parse ENTITY references declarations, but this version parses it from
5718 * a string value.
5719 *
5720 * [68] EntityRef ::= '&' Name ';'
5721 *
5722 * [ WFC: Entity Declared ]
5723 * In a document without any DTD, a document with only an internal DTD
5724 * subset which contains no parameter entity references, or a document
5725 * with "standalone='yes'", the Name given in the entity reference
5726 * must match that in an entity declaration, except that well-formed
5727 * documents need not declare any of the following entities: amp, lt,
5728 * gt, apos, quot. The declaration of a parameter entity must precede
5729 * any reference to it. Similarly, the declaration of a general entity
5730 * must precede any reference to it which appears in a default value in an
5731 * attribute-list declaration. Note that if entities are declared in the
5732 * external subset or in external parameter entities, a non-validating
5733 * processor is not obligated to read and process their declarations;
5734 * for such documents, the rule that an entity must be declared is a
5735 * well-formedness constraint only if standalone='yes'.
5736 *
5737 * [ WFC: Parsed Entity ]
5738 * An entity reference must not contain the name of an unparsed entity
5739 *
5740 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5741 * is updated to the current location in the string.
5742 */
5743xmlEntityPtr
5744xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5745 xmlChar *name;
5746 const xmlChar *ptr;
5747 xmlChar cur;
5748 xmlEntityPtr ent = NULL;
5749
5750 if ((str == NULL) || (*str == NULL))
5751 return(NULL);
5752 ptr = *str;
5753 cur = *ptr;
5754 if (cur == '&') {
5755 ptr++;
5756 cur = *ptr;
5757 name = xmlParseStringName(ctxt, &ptr);
5758 if (name == NULL) {
5759 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5761 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005762 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005763 ctxt->wellFormed = 0;
5764 ctxt->disableSAX = 1;
5765 } else {
5766 if (*ptr == ';') {
5767 ptr++;
5768 /*
5769 * Ask first SAX for entity resolution, otherwise try the
5770 * predefined set.
5771 */
5772 if (ctxt->sax != NULL) {
5773 if (ctxt->sax->getEntity != NULL)
5774 ent = ctxt->sax->getEntity(ctxt->userData, name);
5775 if (ent == NULL)
5776 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005777 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5778 ent = getEntity(ctxt, name);
5779 }
Owen Taylor3473f882001-02-23 17:55:21 +00005780 }
5781 /*
5782 * [ WFC: Entity Declared ]
5783 * In a document without any DTD, a document with only an
5784 * internal DTD subset which contains no parameter entity
5785 * references, or a document with "standalone='yes'", the
5786 * Name given in the entity reference must match that in an
5787 * entity declaration, except that well-formed documents
5788 * need not declare any of the following entities: amp, lt,
5789 * gt, apos, quot.
5790 * The declaration of a parameter entity must precede any
5791 * reference to it.
5792 * Similarly, the declaration of a general entity must
5793 * precede any reference to it which appears in a default
5794 * value in an attribute-list declaration. Note that if
5795 * entities are declared in the external subset or in
5796 * external parameter entities, a non-validating processor
5797 * is not obligated to read and process their declarations;
5798 * for such documents, the rule that an entity must be
5799 * declared is a well-formedness constraint only if
5800 * standalone='yes'.
5801 */
5802 if (ent == NULL) {
5803 if ((ctxt->standalone == 1) ||
5804 ((ctxt->hasExternalSubset == 0) &&
5805 (ctxt->hasPErefs == 0))) {
5806 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5808 ctxt->sax->error(ctxt->userData,
5809 "Entity '%s' not defined\n", name);
5810 ctxt->wellFormed = 0;
5811 ctxt->disableSAX = 1;
5812 } else {
5813 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5814 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5815 ctxt->sax->warning(ctxt->userData,
5816 "Entity '%s' not defined\n", name);
5817 }
5818 }
5819
5820 /*
5821 * [ WFC: Parsed Entity ]
5822 * An entity reference must not contain the name of an
5823 * unparsed entity
5824 */
5825 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5826 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5828 ctxt->sax->error(ctxt->userData,
5829 "Entity reference to unparsed entity %s\n", name);
5830 ctxt->wellFormed = 0;
5831 ctxt->disableSAX = 1;
5832 }
5833
5834 /*
5835 * [ WFC: No External Entity References ]
5836 * Attribute values cannot contain direct or indirect
5837 * entity references to external entities.
5838 */
5839 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5840 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5841 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5843 ctxt->sax->error(ctxt->userData,
5844 "Attribute references external entity '%s'\n", name);
5845 ctxt->wellFormed = 0;
5846 ctxt->disableSAX = 1;
5847 }
5848 /*
5849 * [ WFC: No < in Attribute Values ]
5850 * The replacement text of any entity referred to directly or
5851 * indirectly in an attribute value (other than "&lt;") must
5852 * not contain a <.
5853 */
5854 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5855 (ent != NULL) &&
5856 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5857 (ent->content != NULL) &&
5858 (xmlStrchr(ent->content, '<'))) {
5859 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5861 ctxt->sax->error(ctxt->userData,
5862 "'<' in entity '%s' is not allowed in attributes values\n", name);
5863 ctxt->wellFormed = 0;
5864 ctxt->disableSAX = 1;
5865 }
5866
5867 /*
5868 * Internal check, no parameter entities here ...
5869 */
5870 else {
5871 switch (ent->etype) {
5872 case XML_INTERNAL_PARAMETER_ENTITY:
5873 case XML_EXTERNAL_PARAMETER_ENTITY:
5874 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5876 ctxt->sax->error(ctxt->userData,
5877 "Attempt to reference the parameter entity '%s'\n", name);
5878 ctxt->wellFormed = 0;
5879 ctxt->disableSAX = 1;
5880 break;
5881 default:
5882 break;
5883 }
5884 }
5885
5886 /*
5887 * [ WFC: No Recursion ]
5888 * A parsed entity must not contain a recursive reference
5889 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005890 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005891 */
5892
5893 } else {
5894 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5896 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005897 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005898 ctxt->wellFormed = 0;
5899 ctxt->disableSAX = 1;
5900 }
5901 xmlFree(name);
5902 }
5903 }
5904 *str = ptr;
5905 return(ent);
5906}
5907
5908/**
5909 * xmlParsePEReference:
5910 * @ctxt: an XML parser context
5911 *
5912 * parse PEReference declarations
5913 * The entity content is handled directly by pushing it's content as
5914 * a new input stream.
5915 *
5916 * [69] PEReference ::= '%' Name ';'
5917 *
5918 * [ WFC: No Recursion ]
5919 * A parsed entity must not contain a recursive
5920 * reference to itself, either directly or indirectly.
5921 *
5922 * [ WFC: Entity Declared ]
5923 * In a document without any DTD, a document with only an internal DTD
5924 * subset which contains no parameter entity references, or a document
5925 * with "standalone='yes'", ... ... The declaration of a parameter
5926 * entity must precede any reference to it...
5927 *
5928 * [ VC: Entity Declared ]
5929 * In a document with an external subset or external parameter entities
5930 * with "standalone='no'", ... ... The declaration of a parameter entity
5931 * must precede any reference to it...
5932 *
5933 * [ WFC: In DTD ]
5934 * Parameter-entity references may only appear in the DTD.
5935 * NOTE: misleading but this is handled.
5936 */
5937void
5938xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5939 xmlChar *name;
5940 xmlEntityPtr entity = NULL;
5941 xmlParserInputPtr input;
5942
5943 if (RAW == '%') {
5944 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005945 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005946 if (name == NULL) {
5947 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5949 ctxt->sax->error(ctxt->userData,
5950 "xmlParsePEReference: no name\n");
5951 ctxt->wellFormed = 0;
5952 ctxt->disableSAX = 1;
5953 } else {
5954 if (RAW == ';') {
5955 NEXT;
5956 if ((ctxt->sax != NULL) &&
5957 (ctxt->sax->getParameterEntity != NULL))
5958 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5959 name);
5960 if (entity == NULL) {
5961 /*
5962 * [ WFC: Entity Declared ]
5963 * In a document without any DTD, a document with only an
5964 * internal DTD subset which contains no parameter entity
5965 * references, or a document with "standalone='yes'", ...
5966 * ... The declaration of a parameter entity must precede
5967 * any reference to it...
5968 */
5969 if ((ctxt->standalone == 1) ||
5970 ((ctxt->hasExternalSubset == 0) &&
5971 (ctxt->hasPErefs == 0))) {
5972 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5973 if ((!ctxt->disableSAX) &&
5974 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5975 ctxt->sax->error(ctxt->userData,
5976 "PEReference: %%%s; not found\n", name);
5977 ctxt->wellFormed = 0;
5978 ctxt->disableSAX = 1;
5979 } else {
5980 /*
5981 * [ VC: Entity Declared ]
5982 * In a document with an external subset or external
5983 * parameter entities with "standalone='no'", ...
5984 * ... The declaration of a parameter entity must precede
5985 * any reference to it...
5986 */
5987 if ((!ctxt->disableSAX) &&
5988 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5989 ctxt->sax->warning(ctxt->userData,
5990 "PEReference: %%%s; not found\n", name);
5991 ctxt->valid = 0;
5992 }
5993 } else {
5994 /*
5995 * Internal checking in case the entity quest barfed
5996 */
5997 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5998 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5999 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6000 ctxt->sax->warning(ctxt->userData,
6001 "Internal: %%%s; is not a parameter entity\n", name);
6002 } else {
6003 /*
6004 * TODO !!!
6005 * handle the extra spaces added before and after
6006 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6007 */
6008 input = xmlNewEntityInputStream(ctxt, entity);
6009 xmlPushInput(ctxt, input);
6010 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6011 (RAW == '<') && (NXT(1) == '?') &&
6012 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6013 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6014 xmlParseTextDecl(ctxt);
6015 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6016 /*
6017 * The XML REC instructs us to stop parsing
6018 * right here
6019 */
6020 ctxt->instate = XML_PARSER_EOF;
6021 xmlFree(name);
6022 return;
6023 }
6024 }
6025 if (ctxt->token == 0)
6026 ctxt->token = ' ';
6027 }
6028 }
6029 ctxt->hasPErefs = 1;
6030 } else {
6031 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6033 ctxt->sax->error(ctxt->userData,
6034 "xmlParsePEReference: expecting ';'\n");
6035 ctxt->wellFormed = 0;
6036 ctxt->disableSAX = 1;
6037 }
6038 xmlFree(name);
6039 }
6040 }
6041}
6042
6043/**
6044 * xmlParseStringPEReference:
6045 * @ctxt: an XML parser context
6046 * @str: a pointer to an index in the string
6047 *
6048 * parse PEReference declarations
6049 *
6050 * [69] PEReference ::= '%' Name ';'
6051 *
6052 * [ WFC: No Recursion ]
6053 * A parsed entity must not contain a recursive
6054 * reference to itself, either directly or indirectly.
6055 *
6056 * [ WFC: Entity Declared ]
6057 * In a document without any DTD, a document with only an internal DTD
6058 * subset which contains no parameter entity references, or a document
6059 * with "standalone='yes'", ... ... The declaration of a parameter
6060 * entity must precede any reference to it...
6061 *
6062 * [ VC: Entity Declared ]
6063 * In a document with an external subset or external parameter entities
6064 * with "standalone='no'", ... ... The declaration of a parameter entity
6065 * must precede any reference to it...
6066 *
6067 * [ WFC: In DTD ]
6068 * Parameter-entity references may only appear in the DTD.
6069 * NOTE: misleading but this is handled.
6070 *
6071 * Returns the string of the entity content.
6072 * str is updated to the current value of the index
6073 */
6074xmlEntityPtr
6075xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6076 const xmlChar *ptr;
6077 xmlChar cur;
6078 xmlChar *name;
6079 xmlEntityPtr entity = NULL;
6080
6081 if ((str == NULL) || (*str == NULL)) return(NULL);
6082 ptr = *str;
6083 cur = *ptr;
6084 if (cur == '%') {
6085 ptr++;
6086 cur = *ptr;
6087 name = xmlParseStringName(ctxt, &ptr);
6088 if (name == NULL) {
6089 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6091 ctxt->sax->error(ctxt->userData,
6092 "xmlParseStringPEReference: no name\n");
6093 ctxt->wellFormed = 0;
6094 ctxt->disableSAX = 1;
6095 } else {
6096 cur = *ptr;
6097 if (cur == ';') {
6098 ptr++;
6099 cur = *ptr;
6100 if ((ctxt->sax != NULL) &&
6101 (ctxt->sax->getParameterEntity != NULL))
6102 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6103 name);
6104 if (entity == NULL) {
6105 /*
6106 * [ WFC: Entity Declared ]
6107 * In a document without any DTD, a document with only an
6108 * internal DTD subset which contains no parameter entity
6109 * references, or a document with "standalone='yes'", ...
6110 * ... The declaration of a parameter entity must precede
6111 * any reference to it...
6112 */
6113 if ((ctxt->standalone == 1) ||
6114 ((ctxt->hasExternalSubset == 0) &&
6115 (ctxt->hasPErefs == 0))) {
6116 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6118 ctxt->sax->error(ctxt->userData,
6119 "PEReference: %%%s; not found\n", name);
6120 ctxt->wellFormed = 0;
6121 ctxt->disableSAX = 1;
6122 } else {
6123 /*
6124 * [ VC: Entity Declared ]
6125 * In a document with an external subset or external
6126 * parameter entities with "standalone='no'", ...
6127 * ... The declaration of a parameter entity must
6128 * precede any reference to it...
6129 */
6130 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6131 ctxt->sax->warning(ctxt->userData,
6132 "PEReference: %%%s; not found\n", name);
6133 ctxt->valid = 0;
6134 }
6135 } else {
6136 /*
6137 * Internal checking in case the entity quest barfed
6138 */
6139 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6140 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6141 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6142 ctxt->sax->warning(ctxt->userData,
6143 "Internal: %%%s; is not a parameter entity\n", name);
6144 }
6145 }
6146 ctxt->hasPErefs = 1;
6147 } else {
6148 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6150 ctxt->sax->error(ctxt->userData,
6151 "xmlParseStringPEReference: expecting ';'\n");
6152 ctxt->wellFormed = 0;
6153 ctxt->disableSAX = 1;
6154 }
6155 xmlFree(name);
6156 }
6157 }
6158 *str = ptr;
6159 return(entity);
6160}
6161
6162/**
6163 * xmlParseDocTypeDecl:
6164 * @ctxt: an XML parser context
6165 *
6166 * parse a DOCTYPE declaration
6167 *
6168 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6169 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6170 *
6171 * [ VC: Root Element Type ]
6172 * The Name in the document type declaration must match the element
6173 * type of the root element.
6174 */
6175
6176void
6177xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6178 xmlChar *name = NULL;
6179 xmlChar *ExternalID = NULL;
6180 xmlChar *URI = NULL;
6181
6182 /*
6183 * We know that '<!DOCTYPE' has been detected.
6184 */
6185 SKIP(9);
6186
6187 SKIP_BLANKS;
6188
6189 /*
6190 * Parse the DOCTYPE name.
6191 */
6192 name = xmlParseName(ctxt);
6193 if (name == NULL) {
6194 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6196 ctxt->sax->error(ctxt->userData,
6197 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6198 ctxt->wellFormed = 0;
6199 ctxt->disableSAX = 1;
6200 }
6201 ctxt->intSubName = name;
6202
6203 SKIP_BLANKS;
6204
6205 /*
6206 * Check for SystemID and ExternalID
6207 */
6208 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6209
6210 if ((URI != NULL) || (ExternalID != NULL)) {
6211 ctxt->hasExternalSubset = 1;
6212 }
6213 ctxt->extSubURI = URI;
6214 ctxt->extSubSystem = ExternalID;
6215
6216 SKIP_BLANKS;
6217
6218 /*
6219 * Create and update the internal subset.
6220 */
6221 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6222 (!ctxt->disableSAX))
6223 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6224
6225 /*
6226 * Is there any internal subset declarations ?
6227 * they are handled separately in xmlParseInternalSubset()
6228 */
6229 if (RAW == '[')
6230 return;
6231
6232 /*
6233 * We should be at the end of the DOCTYPE declaration.
6234 */
6235 if (RAW != '>') {
6236 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006238 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006239 ctxt->wellFormed = 0;
6240 ctxt->disableSAX = 1;
6241 }
6242 NEXT;
6243}
6244
6245/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006246 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006247 * @ctxt: an XML parser context
6248 *
6249 * parse the internal subset declaration
6250 *
6251 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6252 */
6253
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006254static void
Owen Taylor3473f882001-02-23 17:55:21 +00006255xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6256 /*
6257 * Is there any DTD definition ?
6258 */
6259 if (RAW == '[') {
6260 ctxt->instate = XML_PARSER_DTD;
6261 NEXT;
6262 /*
6263 * Parse the succession of Markup declarations and
6264 * PEReferences.
6265 * Subsequence (markupdecl | PEReference | S)*
6266 */
6267 while (RAW != ']') {
6268 const xmlChar *check = CUR_PTR;
6269 int cons = ctxt->input->consumed;
6270
6271 SKIP_BLANKS;
6272 xmlParseMarkupDecl(ctxt);
6273 xmlParsePEReference(ctxt);
6274
6275 /*
6276 * Pop-up of finished entities.
6277 */
6278 while ((RAW == 0) && (ctxt->inputNr > 1))
6279 xmlPopInput(ctxt);
6280
6281 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6282 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6284 ctxt->sax->error(ctxt->userData,
6285 "xmlParseInternalSubset: error detected in Markup declaration\n");
6286 ctxt->wellFormed = 0;
6287 ctxt->disableSAX = 1;
6288 break;
6289 }
6290 }
6291 if (RAW == ']') {
6292 NEXT;
6293 SKIP_BLANKS;
6294 }
6295 }
6296
6297 /*
6298 * We should be at the end of the DOCTYPE declaration.
6299 */
6300 if (RAW != '>') {
6301 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006303 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006304 ctxt->wellFormed = 0;
6305 ctxt->disableSAX = 1;
6306 }
6307 NEXT;
6308}
6309
6310/**
6311 * xmlParseAttribute:
6312 * @ctxt: an XML parser context
6313 * @value: a xmlChar ** used to store the value of the attribute
6314 *
6315 * parse an attribute
6316 *
6317 * [41] Attribute ::= Name Eq AttValue
6318 *
6319 * [ WFC: No External Entity References ]
6320 * Attribute values cannot contain direct or indirect entity references
6321 * to external entities.
6322 *
6323 * [ WFC: No < in Attribute Values ]
6324 * The replacement text of any entity referred to directly or indirectly in
6325 * an attribute value (other than "&lt;") must not contain a <.
6326 *
6327 * [ VC: Attribute Value Type ]
6328 * The attribute must have been declared; the value must be of the type
6329 * declared for it.
6330 *
6331 * [25] Eq ::= S? '=' S?
6332 *
6333 * With namespace:
6334 *
6335 * [NS 11] Attribute ::= QName Eq AttValue
6336 *
6337 * Also the case QName == xmlns:??? is handled independently as a namespace
6338 * definition.
6339 *
6340 * Returns the attribute name, and the value in *value.
6341 */
6342
6343xmlChar *
6344xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6345 xmlChar *name, *val;
6346
6347 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006348 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006349 name = xmlParseName(ctxt);
6350 if (name == NULL) {
6351 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6353 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6354 ctxt->wellFormed = 0;
6355 ctxt->disableSAX = 1;
6356 return(NULL);
6357 }
6358
6359 /*
6360 * read the value
6361 */
6362 SKIP_BLANKS;
6363 if (RAW == '=') {
6364 NEXT;
6365 SKIP_BLANKS;
6366 val = xmlParseAttValue(ctxt);
6367 ctxt->instate = XML_PARSER_CONTENT;
6368 } else {
6369 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6370 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6371 ctxt->sax->error(ctxt->userData,
6372 "Specification mandate value for attribute %s\n", name);
6373 ctxt->wellFormed = 0;
6374 ctxt->disableSAX = 1;
6375 xmlFree(name);
6376 return(NULL);
6377 }
6378
6379 /*
6380 * Check that xml:lang conforms to the specification
6381 * No more registered as an error, just generate a warning now
6382 * since this was deprecated in XML second edition
6383 */
6384 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6385 if (!xmlCheckLanguageID(val)) {
6386 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6387 ctxt->sax->warning(ctxt->userData,
6388 "Malformed value for xml:lang : %s\n", val);
6389 }
6390 }
6391
6392 /*
6393 * Check that xml:space conforms to the specification
6394 */
6395 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6396 if (xmlStrEqual(val, BAD_CAST "default"))
6397 *(ctxt->space) = 0;
6398 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6399 *(ctxt->space) = 1;
6400 else {
6401 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6403 ctxt->sax->error(ctxt->userData,
6404"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6405 val);
6406 ctxt->wellFormed = 0;
6407 ctxt->disableSAX = 1;
6408 }
6409 }
6410
6411 *value = val;
6412 return(name);
6413}
6414
6415/**
6416 * xmlParseStartTag:
6417 * @ctxt: an XML parser context
6418 *
6419 * parse a start of tag either for rule element or
6420 * EmptyElement. In both case we don't parse the tag closing chars.
6421 *
6422 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6423 *
6424 * [ WFC: Unique Att Spec ]
6425 * No attribute name may appear more than once in the same start-tag or
6426 * empty-element tag.
6427 *
6428 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6429 *
6430 * [ WFC: Unique Att Spec ]
6431 * No attribute name may appear more than once in the same start-tag or
6432 * empty-element tag.
6433 *
6434 * With namespace:
6435 *
6436 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6437 *
6438 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6439 *
6440 * Returns the element name parsed
6441 */
6442
6443xmlChar *
6444xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6445 xmlChar *name;
6446 xmlChar *attname;
6447 xmlChar *attvalue;
6448 const xmlChar **atts = NULL;
6449 int nbatts = 0;
6450 int maxatts = 0;
6451 int i;
6452
6453 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006454 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006455
6456 name = xmlParseName(ctxt);
6457 if (name == NULL) {
6458 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6460 ctxt->sax->error(ctxt->userData,
6461 "xmlParseStartTag: invalid element name\n");
6462 ctxt->wellFormed = 0;
6463 ctxt->disableSAX = 1;
6464 return(NULL);
6465 }
6466
6467 /*
6468 * Now parse the attributes, it ends up with the ending
6469 *
6470 * (S Attribute)* S?
6471 */
6472 SKIP_BLANKS;
6473 GROW;
6474
Daniel Veillard21a0f912001-02-25 19:54:14 +00006475 while ((RAW != '>') &&
6476 ((RAW != '/') || (NXT(1) != '>')) &&
6477 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006478 const xmlChar *q = CUR_PTR;
6479 int cons = ctxt->input->consumed;
6480
6481 attname = xmlParseAttribute(ctxt, &attvalue);
6482 if ((attname != NULL) && (attvalue != NULL)) {
6483 /*
6484 * [ WFC: Unique Att Spec ]
6485 * No attribute name may appear more than once in the same
6486 * start-tag or empty-element tag.
6487 */
6488 for (i = 0; i < nbatts;i += 2) {
6489 if (xmlStrEqual(atts[i], attname)) {
6490 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6492 ctxt->sax->error(ctxt->userData,
6493 "Attribute %s redefined\n",
6494 attname);
6495 ctxt->wellFormed = 0;
6496 ctxt->disableSAX = 1;
6497 xmlFree(attname);
6498 xmlFree(attvalue);
6499 goto failed;
6500 }
6501 }
6502
6503 /*
6504 * Add the pair to atts
6505 */
6506 if (atts == NULL) {
6507 maxatts = 10;
6508 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6509 if (atts == NULL) {
6510 xmlGenericError(xmlGenericErrorContext,
6511 "malloc of %ld byte failed\n",
6512 maxatts * (long)sizeof(xmlChar *));
6513 return(NULL);
6514 }
6515 } else if (nbatts + 4 > maxatts) {
6516 maxatts *= 2;
6517 atts = (const xmlChar **) xmlRealloc((void *) atts,
6518 maxatts * sizeof(xmlChar *));
6519 if (atts == NULL) {
6520 xmlGenericError(xmlGenericErrorContext,
6521 "realloc of %ld byte failed\n",
6522 maxatts * (long)sizeof(xmlChar *));
6523 return(NULL);
6524 }
6525 }
6526 atts[nbatts++] = attname;
6527 atts[nbatts++] = attvalue;
6528 atts[nbatts] = NULL;
6529 atts[nbatts + 1] = NULL;
6530 } else {
6531 if (attname != NULL)
6532 xmlFree(attname);
6533 if (attvalue != NULL)
6534 xmlFree(attvalue);
6535 }
6536
6537failed:
6538
6539 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6540 break;
6541 if (!IS_BLANK(RAW)) {
6542 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6544 ctxt->sax->error(ctxt->userData,
6545 "attributes construct error\n");
6546 ctxt->wellFormed = 0;
6547 ctxt->disableSAX = 1;
6548 }
6549 SKIP_BLANKS;
6550 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6551 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6553 ctxt->sax->error(ctxt->userData,
6554 "xmlParseStartTag: problem parsing attributes\n");
6555 ctxt->wellFormed = 0;
6556 ctxt->disableSAX = 1;
6557 break;
6558 }
6559 GROW;
6560 }
6561
6562 /*
6563 * SAX: Start of Element !
6564 */
6565 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6566 (!ctxt->disableSAX))
6567 ctxt->sax->startElement(ctxt->userData, name, atts);
6568
6569 if (atts != NULL) {
6570 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6571 xmlFree((void *) atts);
6572 }
6573 return(name);
6574}
6575
6576/**
6577 * xmlParseEndTag:
6578 * @ctxt: an XML parser context
6579 *
6580 * parse an end of tag
6581 *
6582 * [42] ETag ::= '</' Name S? '>'
6583 *
6584 * With namespace
6585 *
6586 * [NS 9] ETag ::= '</' QName S? '>'
6587 */
6588
6589void
6590xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6591 xmlChar *name;
6592 xmlChar *oldname;
6593
6594 GROW;
6595 if ((RAW != '<') || (NXT(1) != '/')) {
6596 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6598 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6599 ctxt->wellFormed = 0;
6600 ctxt->disableSAX = 1;
6601 return;
6602 }
6603 SKIP(2);
6604
Daniel Veillard46de64e2002-05-29 08:21:33 +00006605 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006606
6607 /*
6608 * We should definitely be at the ending "S? '>'" part
6609 */
6610 GROW;
6611 SKIP_BLANKS;
6612 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6613 ctxt->errNo = XML_ERR_GT_REQUIRED;
6614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6615 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6616 ctxt->wellFormed = 0;
6617 ctxt->disableSAX = 1;
6618 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006619 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006620
6621 /*
6622 * [ WFC: Element Type Match ]
6623 * The Name in an element's end-tag must match the element type in the
6624 * start-tag.
6625 *
6626 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006627 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006628 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006630 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006631 ctxt->sax->error(ctxt->userData,
6632 "Opening and ending tag mismatch: %s and %s\n",
6633 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006634 xmlFree(name);
6635 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006636 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006637 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006638 }
6639
6640 }
6641 ctxt->wellFormed = 0;
6642 ctxt->disableSAX = 1;
6643 }
6644
6645 /*
6646 * SAX: End of Tag
6647 */
6648 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6649 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006650 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006651
Owen Taylor3473f882001-02-23 17:55:21 +00006652 oldname = namePop(ctxt);
6653 spacePop(ctxt);
6654 if (oldname != NULL) {
6655#ifdef DEBUG_STACK
6656 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6657#endif
6658 xmlFree(oldname);
6659 }
6660 return;
6661}
6662
6663/**
6664 * xmlParseCDSect:
6665 * @ctxt: an XML parser context
6666 *
6667 * Parse escaped pure raw content.
6668 *
6669 * [18] CDSect ::= CDStart CData CDEnd
6670 *
6671 * [19] CDStart ::= '<![CDATA['
6672 *
6673 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6674 *
6675 * [21] CDEnd ::= ']]>'
6676 */
6677void
6678xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6679 xmlChar *buf = NULL;
6680 int len = 0;
6681 int size = XML_PARSER_BUFFER_SIZE;
6682 int r, rl;
6683 int s, sl;
6684 int cur, l;
6685 int count = 0;
6686
6687 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6688 (NXT(2) == '[') && (NXT(3) == 'C') &&
6689 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6690 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6691 (NXT(8) == '[')) {
6692 SKIP(9);
6693 } else
6694 return;
6695
6696 ctxt->instate = XML_PARSER_CDATA_SECTION;
6697 r = CUR_CHAR(rl);
6698 if (!IS_CHAR(r)) {
6699 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6701 ctxt->sax->error(ctxt->userData,
6702 "CData section not finished\n");
6703 ctxt->wellFormed = 0;
6704 ctxt->disableSAX = 1;
6705 ctxt->instate = XML_PARSER_CONTENT;
6706 return;
6707 }
6708 NEXTL(rl);
6709 s = CUR_CHAR(sl);
6710 if (!IS_CHAR(s)) {
6711 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6713 ctxt->sax->error(ctxt->userData,
6714 "CData section not finished\n");
6715 ctxt->wellFormed = 0;
6716 ctxt->disableSAX = 1;
6717 ctxt->instate = XML_PARSER_CONTENT;
6718 return;
6719 }
6720 NEXTL(sl);
6721 cur = CUR_CHAR(l);
6722 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6723 if (buf == NULL) {
6724 xmlGenericError(xmlGenericErrorContext,
6725 "malloc of %d byte failed\n", size);
6726 return;
6727 }
6728 while (IS_CHAR(cur) &&
6729 ((r != ']') || (s != ']') || (cur != '>'))) {
6730 if (len + 5 >= size) {
6731 size *= 2;
6732 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6733 if (buf == NULL) {
6734 xmlGenericError(xmlGenericErrorContext,
6735 "realloc of %d byte failed\n", size);
6736 return;
6737 }
6738 }
6739 COPY_BUF(rl,buf,len,r);
6740 r = s;
6741 rl = sl;
6742 s = cur;
6743 sl = l;
6744 count++;
6745 if (count > 50) {
6746 GROW;
6747 count = 0;
6748 }
6749 NEXTL(l);
6750 cur = CUR_CHAR(l);
6751 }
6752 buf[len] = 0;
6753 ctxt->instate = XML_PARSER_CONTENT;
6754 if (cur != '>') {
6755 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6757 ctxt->sax->error(ctxt->userData,
6758 "CData section not finished\n%.50s\n", buf);
6759 ctxt->wellFormed = 0;
6760 ctxt->disableSAX = 1;
6761 xmlFree(buf);
6762 return;
6763 }
6764 NEXTL(l);
6765
6766 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006767 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006768 */
6769 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6770 if (ctxt->sax->cdataBlock != NULL)
6771 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006772 else if (ctxt->sax->characters != NULL)
6773 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006774 }
6775 xmlFree(buf);
6776}
6777
6778/**
6779 * xmlParseContent:
6780 * @ctxt: an XML parser context
6781 *
6782 * Parse a content:
6783 *
6784 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6785 */
6786
6787void
6788xmlParseContent(xmlParserCtxtPtr ctxt) {
6789 GROW;
Daniel Veillard561b7f82002-03-20 21:55:57 +00006790 while (((RAW != 0) || (ctxt->token != 0)) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006791 ((RAW != '<') || (NXT(1) != '/'))) {
6792 const xmlChar *test = CUR_PTR;
6793 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006794 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006795 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006796
6797 /*
6798 * Handle possible processed charrefs.
6799 */
6800 if (ctxt->token != 0) {
6801 xmlParseCharData(ctxt, 0);
6802 }
6803 /*
6804 * First case : a Processing Instruction.
6805 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006806 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006807 xmlParsePI(ctxt);
6808 }
6809
6810 /*
6811 * Second case : a CDSection
6812 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006813 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006814 (NXT(2) == '[') && (NXT(3) == 'C') &&
6815 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6816 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6817 (NXT(8) == '[')) {
6818 xmlParseCDSect(ctxt);
6819 }
6820
6821 /*
6822 * Third case : a comment
6823 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006824 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006825 (NXT(2) == '-') && (NXT(3) == '-')) {
6826 xmlParseComment(ctxt);
6827 ctxt->instate = XML_PARSER_CONTENT;
6828 }
6829
6830 /*
6831 * Fourth case : a sub-element.
6832 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006833 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006834 xmlParseElement(ctxt);
6835 }
6836
6837 /*
6838 * Fifth case : a reference. If if has not been resolved,
6839 * parsing returns it's Name, create the node
6840 */
6841
Daniel Veillard21a0f912001-02-25 19:54:14 +00006842 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006843 xmlParseReference(ctxt);
6844 }
6845
6846 /*
6847 * Last case, text. Note that References are handled directly.
6848 */
6849 else {
6850 xmlParseCharData(ctxt, 0);
6851 }
6852
6853 GROW;
6854 /*
6855 * Pop-up of finished entities.
6856 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00006857 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00006858 xmlPopInput(ctxt);
6859 SHRINK;
6860
6861 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6862 (tok == ctxt->token)) {
6863 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6865 ctxt->sax->error(ctxt->userData,
6866 "detected an error in element content\n");
6867 ctxt->wellFormed = 0;
6868 ctxt->disableSAX = 1;
6869 ctxt->instate = XML_PARSER_EOF;
6870 break;
6871 }
6872 }
6873}
6874
6875/**
6876 * xmlParseElement:
6877 * @ctxt: an XML parser context
6878 *
6879 * parse an XML element, this is highly recursive
6880 *
6881 * [39] element ::= EmptyElemTag | STag content ETag
6882 *
6883 * [ WFC: Element Type Match ]
6884 * The Name in an element's end-tag must match the element type in the
6885 * start-tag.
6886 *
6887 * [ VC: Element Valid ]
6888 * An element is valid if there is a declaration matching elementdecl
6889 * where the Name matches the element type and one of the following holds:
6890 * - The declaration matches EMPTY and the element has no content.
6891 * - The declaration matches children and the sequence of child elements
6892 * belongs to the language generated by the regular expression in the
6893 * content model, with optional white space (characters matching the
6894 * nonterminal S) between each pair of child elements.
6895 * - The declaration matches Mixed and the content consists of character
6896 * data and child elements whose types match names in the content model.
6897 * - The declaration matches ANY, and the types of any child elements have
6898 * been declared.
6899 */
6900
6901void
6902xmlParseElement(xmlParserCtxtPtr ctxt) {
6903 const xmlChar *openTag = CUR_PTR;
6904 xmlChar *name;
6905 xmlChar *oldname;
6906 xmlParserNodeInfo node_info;
6907 xmlNodePtr ret;
6908
6909 /* Capture start position */
6910 if (ctxt->record_info) {
6911 node_info.begin_pos = ctxt->input->consumed +
6912 (CUR_PTR - ctxt->input->base);
6913 node_info.begin_line = ctxt->input->line;
6914 }
6915
6916 if (ctxt->spaceNr == 0)
6917 spacePush(ctxt, -1);
6918 else
6919 spacePush(ctxt, *ctxt->space);
6920
6921 name = xmlParseStartTag(ctxt);
6922 if (name == NULL) {
6923 spacePop(ctxt);
6924 return;
6925 }
6926 namePush(ctxt, name);
6927 ret = ctxt->node;
6928
6929 /*
6930 * [ VC: Root Element Type ]
6931 * The Name in the document type declaration must match the element
6932 * type of the root element.
6933 */
6934 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6935 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6936 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6937
6938 /*
6939 * Check for an Empty Element.
6940 */
6941 if ((RAW == '/') && (NXT(1) == '>')) {
6942 SKIP(2);
6943 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6944 (!ctxt->disableSAX))
6945 ctxt->sax->endElement(ctxt->userData, name);
6946 oldname = namePop(ctxt);
6947 spacePop(ctxt);
6948 if (oldname != NULL) {
6949#ifdef DEBUG_STACK
6950 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6951#endif
6952 xmlFree(oldname);
6953 }
6954 if ( ret != NULL && ctxt->record_info ) {
6955 node_info.end_pos = ctxt->input->consumed +
6956 (CUR_PTR - ctxt->input->base);
6957 node_info.end_line = ctxt->input->line;
6958 node_info.node = ret;
6959 xmlParserAddNodeInfo(ctxt, &node_info);
6960 }
6961 return;
6962 }
6963 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006964 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006965 } else {
6966 ctxt->errNo = XML_ERR_GT_REQUIRED;
6967 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6968 ctxt->sax->error(ctxt->userData,
6969 "Couldn't find end of Start Tag\n%.30s\n",
6970 openTag);
6971 ctxt->wellFormed = 0;
6972 ctxt->disableSAX = 1;
6973
6974 /*
6975 * end of parsing of this node.
6976 */
6977 nodePop(ctxt);
6978 oldname = namePop(ctxt);
6979 spacePop(ctxt);
6980 if (oldname != NULL) {
6981#ifdef DEBUG_STACK
6982 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6983#endif
6984 xmlFree(oldname);
6985 }
6986
6987 /*
6988 * Capture end position and add node
6989 */
6990 if ( ret != NULL && ctxt->record_info ) {
6991 node_info.end_pos = ctxt->input->consumed +
6992 (CUR_PTR - ctxt->input->base);
6993 node_info.end_line = ctxt->input->line;
6994 node_info.node = ret;
6995 xmlParserAddNodeInfo(ctxt, &node_info);
6996 }
6997 return;
6998 }
6999
7000 /*
7001 * Parse the content of the element:
7002 */
7003 xmlParseContent(ctxt);
7004 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007005 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7007 ctxt->sax->error(ctxt->userData,
7008 "Premature end of data in tag %.30s\n", openTag);
7009 ctxt->wellFormed = 0;
7010 ctxt->disableSAX = 1;
7011
7012 /*
7013 * end of parsing of this node.
7014 */
7015 nodePop(ctxt);
7016 oldname = namePop(ctxt);
7017 spacePop(ctxt);
7018 if (oldname != NULL) {
7019#ifdef DEBUG_STACK
7020 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7021#endif
7022 xmlFree(oldname);
7023 }
7024 return;
7025 }
7026
7027 /*
7028 * parse the end of tag: '</' should be here.
7029 */
7030 xmlParseEndTag(ctxt);
7031
7032 /*
7033 * Capture end position and add node
7034 */
7035 if ( ret != NULL && ctxt->record_info ) {
7036 node_info.end_pos = ctxt->input->consumed +
7037 (CUR_PTR - ctxt->input->base);
7038 node_info.end_line = ctxt->input->line;
7039 node_info.node = ret;
7040 xmlParserAddNodeInfo(ctxt, &node_info);
7041 }
7042}
7043
7044/**
7045 * xmlParseVersionNum:
7046 * @ctxt: an XML parser context
7047 *
7048 * parse the XML version value.
7049 *
7050 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7051 *
7052 * Returns the string giving the XML version number, or NULL
7053 */
7054xmlChar *
7055xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7056 xmlChar *buf = NULL;
7057 int len = 0;
7058 int size = 10;
7059 xmlChar cur;
7060
7061 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7062 if (buf == NULL) {
7063 xmlGenericError(xmlGenericErrorContext,
7064 "malloc of %d byte failed\n", size);
7065 return(NULL);
7066 }
7067 cur = CUR;
7068 while (((cur >= 'a') && (cur <= 'z')) ||
7069 ((cur >= 'A') && (cur <= 'Z')) ||
7070 ((cur >= '0') && (cur <= '9')) ||
7071 (cur == '_') || (cur == '.') ||
7072 (cur == ':') || (cur == '-')) {
7073 if (len + 1 >= size) {
7074 size *= 2;
7075 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7076 if (buf == NULL) {
7077 xmlGenericError(xmlGenericErrorContext,
7078 "realloc of %d byte failed\n", size);
7079 return(NULL);
7080 }
7081 }
7082 buf[len++] = cur;
7083 NEXT;
7084 cur=CUR;
7085 }
7086 buf[len] = 0;
7087 return(buf);
7088}
7089
7090/**
7091 * xmlParseVersionInfo:
7092 * @ctxt: an XML parser context
7093 *
7094 * parse the XML version.
7095 *
7096 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7097 *
7098 * [25] Eq ::= S? '=' S?
7099 *
7100 * Returns the version string, e.g. "1.0"
7101 */
7102
7103xmlChar *
7104xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7105 xmlChar *version = NULL;
7106 const xmlChar *q;
7107
7108 if ((RAW == 'v') && (NXT(1) == 'e') &&
7109 (NXT(2) == 'r') && (NXT(3) == 's') &&
7110 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7111 (NXT(6) == 'n')) {
7112 SKIP(7);
7113 SKIP_BLANKS;
7114 if (RAW != '=') {
7115 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7117 ctxt->sax->error(ctxt->userData,
7118 "xmlParseVersionInfo : expected '='\n");
7119 ctxt->wellFormed = 0;
7120 ctxt->disableSAX = 1;
7121 return(NULL);
7122 }
7123 NEXT;
7124 SKIP_BLANKS;
7125 if (RAW == '"') {
7126 NEXT;
7127 q = CUR_PTR;
7128 version = xmlParseVersionNum(ctxt);
7129 if (RAW != '"') {
7130 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7132 ctxt->sax->error(ctxt->userData,
7133 "String not closed\n%.50s\n", q);
7134 ctxt->wellFormed = 0;
7135 ctxt->disableSAX = 1;
7136 } else
7137 NEXT;
7138 } else if (RAW == '\''){
7139 NEXT;
7140 q = CUR_PTR;
7141 version = xmlParseVersionNum(ctxt);
7142 if (RAW != '\'') {
7143 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7145 ctxt->sax->error(ctxt->userData,
7146 "String not closed\n%.50s\n", q);
7147 ctxt->wellFormed = 0;
7148 ctxt->disableSAX = 1;
7149 } else
7150 NEXT;
7151 } else {
7152 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7154 ctxt->sax->error(ctxt->userData,
7155 "xmlParseVersionInfo : expected ' or \"\n");
7156 ctxt->wellFormed = 0;
7157 ctxt->disableSAX = 1;
7158 }
7159 }
7160 return(version);
7161}
7162
7163/**
7164 * xmlParseEncName:
7165 * @ctxt: an XML parser context
7166 *
7167 * parse the XML encoding name
7168 *
7169 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7170 *
7171 * Returns the encoding name value or NULL
7172 */
7173xmlChar *
7174xmlParseEncName(xmlParserCtxtPtr ctxt) {
7175 xmlChar *buf = NULL;
7176 int len = 0;
7177 int size = 10;
7178 xmlChar cur;
7179
7180 cur = CUR;
7181 if (((cur >= 'a') && (cur <= 'z')) ||
7182 ((cur >= 'A') && (cur <= 'Z'))) {
7183 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7184 if (buf == NULL) {
7185 xmlGenericError(xmlGenericErrorContext,
7186 "malloc of %d byte failed\n", size);
7187 return(NULL);
7188 }
7189
7190 buf[len++] = cur;
7191 NEXT;
7192 cur = CUR;
7193 while (((cur >= 'a') && (cur <= 'z')) ||
7194 ((cur >= 'A') && (cur <= 'Z')) ||
7195 ((cur >= '0') && (cur <= '9')) ||
7196 (cur == '.') || (cur == '_') ||
7197 (cur == '-')) {
7198 if (len + 1 >= size) {
7199 size *= 2;
7200 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7201 if (buf == NULL) {
7202 xmlGenericError(xmlGenericErrorContext,
7203 "realloc of %d byte failed\n", size);
7204 return(NULL);
7205 }
7206 }
7207 buf[len++] = cur;
7208 NEXT;
7209 cur = CUR;
7210 if (cur == 0) {
7211 SHRINK;
7212 GROW;
7213 cur = CUR;
7214 }
7215 }
7216 buf[len] = 0;
7217 } else {
7218 ctxt->errNo = XML_ERR_ENCODING_NAME;
7219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7220 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7221 ctxt->wellFormed = 0;
7222 ctxt->disableSAX = 1;
7223 }
7224 return(buf);
7225}
7226
7227/**
7228 * xmlParseEncodingDecl:
7229 * @ctxt: an XML parser context
7230 *
7231 * parse the XML encoding declaration
7232 *
7233 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7234 *
7235 * this setups the conversion filters.
7236 *
7237 * Returns the encoding value or NULL
7238 */
7239
7240xmlChar *
7241xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7242 xmlChar *encoding = NULL;
7243 const xmlChar *q;
7244
7245 SKIP_BLANKS;
7246 if ((RAW == 'e') && (NXT(1) == 'n') &&
7247 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7248 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7249 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7250 SKIP(8);
7251 SKIP_BLANKS;
7252 if (RAW != '=') {
7253 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7254 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7255 ctxt->sax->error(ctxt->userData,
7256 "xmlParseEncodingDecl : expected '='\n");
7257 ctxt->wellFormed = 0;
7258 ctxt->disableSAX = 1;
7259 return(NULL);
7260 }
7261 NEXT;
7262 SKIP_BLANKS;
7263 if (RAW == '"') {
7264 NEXT;
7265 q = CUR_PTR;
7266 encoding = xmlParseEncName(ctxt);
7267 if (RAW != '"') {
7268 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7270 ctxt->sax->error(ctxt->userData,
7271 "String not closed\n%.50s\n", q);
7272 ctxt->wellFormed = 0;
7273 ctxt->disableSAX = 1;
7274 } else
7275 NEXT;
7276 } else if (RAW == '\''){
7277 NEXT;
7278 q = CUR_PTR;
7279 encoding = xmlParseEncName(ctxt);
7280 if (RAW != '\'') {
7281 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7282 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7283 ctxt->sax->error(ctxt->userData,
7284 "String not closed\n%.50s\n", q);
7285 ctxt->wellFormed = 0;
7286 ctxt->disableSAX = 1;
7287 } else
7288 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007289 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007290 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7292 ctxt->sax->error(ctxt->userData,
7293 "xmlParseEncodingDecl : expected ' or \"\n");
7294 ctxt->wellFormed = 0;
7295 ctxt->disableSAX = 1;
7296 }
7297 if (encoding != NULL) {
7298 xmlCharEncoding enc;
7299 xmlCharEncodingHandlerPtr handler;
7300
7301 if (ctxt->input->encoding != NULL)
7302 xmlFree((xmlChar *) ctxt->input->encoding);
7303 ctxt->input->encoding = encoding;
7304
7305 enc = xmlParseCharEncoding((const char *) encoding);
7306 /*
7307 * registered set of known encodings
7308 */
7309 if (enc != XML_CHAR_ENCODING_ERROR) {
7310 xmlSwitchEncoding(ctxt, enc);
7311 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007312 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007313 xmlFree(encoding);
7314 return(NULL);
7315 }
7316 } else {
7317 /*
7318 * fallback for unknown encodings
7319 */
7320 handler = xmlFindCharEncodingHandler((const char *) encoding);
7321 if (handler != NULL) {
7322 xmlSwitchToEncoding(ctxt, handler);
7323 } else {
7324 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7326 ctxt->sax->error(ctxt->userData,
7327 "Unsupported encoding %s\n", encoding);
7328 return(NULL);
7329 }
7330 }
7331 }
7332 }
7333 return(encoding);
7334}
7335
7336/**
7337 * xmlParseSDDecl:
7338 * @ctxt: an XML parser context
7339 *
7340 * parse the XML standalone declaration
7341 *
7342 * [32] SDDecl ::= S 'standalone' Eq
7343 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7344 *
7345 * [ VC: Standalone Document Declaration ]
7346 * TODO The standalone document declaration must have the value "no"
7347 * if any external markup declarations contain declarations of:
7348 * - attributes with default values, if elements to which these
7349 * attributes apply appear in the document without specifications
7350 * of values for these attributes, or
7351 * - entities (other than amp, lt, gt, apos, quot), if references
7352 * to those entities appear in the document, or
7353 * - attributes with values subject to normalization, where the
7354 * attribute appears in the document with a value which will change
7355 * as a result of normalization, or
7356 * - element types with element content, if white space occurs directly
7357 * within any instance of those types.
7358 *
7359 * Returns 1 if standalone, 0 otherwise
7360 */
7361
7362int
7363xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7364 int standalone = -1;
7365
7366 SKIP_BLANKS;
7367 if ((RAW == 's') && (NXT(1) == 't') &&
7368 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7369 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7370 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7371 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7372 SKIP(10);
7373 SKIP_BLANKS;
7374 if (RAW != '=') {
7375 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7377 ctxt->sax->error(ctxt->userData,
7378 "XML standalone declaration : expected '='\n");
7379 ctxt->wellFormed = 0;
7380 ctxt->disableSAX = 1;
7381 return(standalone);
7382 }
7383 NEXT;
7384 SKIP_BLANKS;
7385 if (RAW == '\''){
7386 NEXT;
7387 if ((RAW == 'n') && (NXT(1) == 'o')) {
7388 standalone = 0;
7389 SKIP(2);
7390 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7391 (NXT(2) == 's')) {
7392 standalone = 1;
7393 SKIP(3);
7394 } else {
7395 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7397 ctxt->sax->error(ctxt->userData,
7398 "standalone accepts only 'yes' or 'no'\n");
7399 ctxt->wellFormed = 0;
7400 ctxt->disableSAX = 1;
7401 }
7402 if (RAW != '\'') {
7403 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7405 ctxt->sax->error(ctxt->userData, "String not closed\n");
7406 ctxt->wellFormed = 0;
7407 ctxt->disableSAX = 1;
7408 } else
7409 NEXT;
7410 } else if (RAW == '"'){
7411 NEXT;
7412 if ((RAW == 'n') && (NXT(1) == 'o')) {
7413 standalone = 0;
7414 SKIP(2);
7415 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7416 (NXT(2) == 's')) {
7417 standalone = 1;
7418 SKIP(3);
7419 } else {
7420 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7421 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7422 ctxt->sax->error(ctxt->userData,
7423 "standalone accepts only 'yes' or 'no'\n");
7424 ctxt->wellFormed = 0;
7425 ctxt->disableSAX = 1;
7426 }
7427 if (RAW != '"') {
7428 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7430 ctxt->sax->error(ctxt->userData, "String not closed\n");
7431 ctxt->wellFormed = 0;
7432 ctxt->disableSAX = 1;
7433 } else
7434 NEXT;
7435 } else {
7436 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7437 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7438 ctxt->sax->error(ctxt->userData,
7439 "Standalone value not found\n");
7440 ctxt->wellFormed = 0;
7441 ctxt->disableSAX = 1;
7442 }
7443 }
7444 return(standalone);
7445}
7446
7447/**
7448 * xmlParseXMLDecl:
7449 * @ctxt: an XML parser context
7450 *
7451 * parse an XML declaration header
7452 *
7453 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7454 */
7455
7456void
7457xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7458 xmlChar *version;
7459
7460 /*
7461 * We know that '<?xml' is here.
7462 */
7463 SKIP(5);
7464
7465 if (!IS_BLANK(RAW)) {
7466 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7468 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7469 ctxt->wellFormed = 0;
7470 ctxt->disableSAX = 1;
7471 }
7472 SKIP_BLANKS;
7473
7474 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007475 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007476 */
7477 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007478 if (version == NULL) {
7479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7480 ctxt->sax->error(ctxt->userData,
7481 "Malformed declaration expecting version\n");
7482 ctxt->wellFormed = 0;
7483 ctxt->disableSAX = 1;
7484 } else {
7485 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7486 /*
7487 * TODO: Blueberry should be detected here
7488 */
7489 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7490 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7491 version);
7492 }
7493 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007494 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007495 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007496 }
Owen Taylor3473f882001-02-23 17:55:21 +00007497
7498 /*
7499 * We may have the encoding declaration
7500 */
7501 if (!IS_BLANK(RAW)) {
7502 if ((RAW == '?') && (NXT(1) == '>')) {
7503 SKIP(2);
7504 return;
7505 }
7506 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7508 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7509 ctxt->wellFormed = 0;
7510 ctxt->disableSAX = 1;
7511 }
7512 xmlParseEncodingDecl(ctxt);
7513 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7514 /*
7515 * The XML REC instructs us to stop parsing right here
7516 */
7517 return;
7518 }
7519
7520 /*
7521 * We may have the standalone status.
7522 */
7523 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7524 if ((RAW == '?') && (NXT(1) == '>')) {
7525 SKIP(2);
7526 return;
7527 }
7528 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7530 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7531 ctxt->wellFormed = 0;
7532 ctxt->disableSAX = 1;
7533 }
7534 SKIP_BLANKS;
7535 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7536
7537 SKIP_BLANKS;
7538 if ((RAW == '?') && (NXT(1) == '>')) {
7539 SKIP(2);
7540 } else if (RAW == '>') {
7541 /* Deprecated old WD ... */
7542 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7544 ctxt->sax->error(ctxt->userData,
7545 "XML declaration must end-up with '?>'\n");
7546 ctxt->wellFormed = 0;
7547 ctxt->disableSAX = 1;
7548 NEXT;
7549 } else {
7550 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7552 ctxt->sax->error(ctxt->userData,
7553 "parsing XML declaration: '?>' expected\n");
7554 ctxt->wellFormed = 0;
7555 ctxt->disableSAX = 1;
7556 MOVETO_ENDTAG(CUR_PTR);
7557 NEXT;
7558 }
7559}
7560
7561/**
7562 * xmlParseMisc:
7563 * @ctxt: an XML parser context
7564 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007565 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007566 *
7567 * [27] Misc ::= Comment | PI | S
7568 */
7569
7570void
7571xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007572 while (((RAW == '<') && (NXT(1) == '?')) ||
7573 ((RAW == '<') && (NXT(1) == '!') &&
7574 (NXT(2) == '-') && (NXT(3) == '-')) ||
7575 IS_BLANK(CUR)) {
7576 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007577 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007578 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007579 NEXT;
7580 } else
7581 xmlParseComment(ctxt);
7582 }
7583}
7584
7585/**
7586 * xmlParseDocument:
7587 * @ctxt: an XML parser context
7588 *
7589 * parse an XML document (and build a tree if using the standard SAX
7590 * interface).
7591 *
7592 * [1] document ::= prolog element Misc*
7593 *
7594 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7595 *
7596 * Returns 0, -1 in case of error. the parser context is augmented
7597 * as a result of the parsing.
7598 */
7599
7600int
7601xmlParseDocument(xmlParserCtxtPtr ctxt) {
7602 xmlChar start[4];
7603 xmlCharEncoding enc;
7604
7605 xmlInitParser();
7606
7607 GROW;
7608
7609 /*
7610 * SAX: beginning of the document processing.
7611 */
7612 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7613 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7614
Daniel Veillard50f34372001-08-03 12:06:36 +00007615 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007616 /*
7617 * Get the 4 first bytes and decode the charset
7618 * if enc != XML_CHAR_ENCODING_NONE
7619 * plug some encoding conversion routines.
7620 */
7621 start[0] = RAW;
7622 start[1] = NXT(1);
7623 start[2] = NXT(2);
7624 start[3] = NXT(3);
7625 enc = xmlDetectCharEncoding(start, 4);
7626 if (enc != XML_CHAR_ENCODING_NONE) {
7627 xmlSwitchEncoding(ctxt, enc);
7628 }
Owen Taylor3473f882001-02-23 17:55:21 +00007629 }
7630
7631
7632 if (CUR == 0) {
7633 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7635 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7636 ctxt->wellFormed = 0;
7637 ctxt->disableSAX = 1;
7638 }
7639
7640 /*
7641 * Check for the XMLDecl in the Prolog.
7642 */
7643 GROW;
7644 if ((RAW == '<') && (NXT(1) == '?') &&
7645 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7646 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7647
7648 /*
7649 * Note that we will switch encoding on the fly.
7650 */
7651 xmlParseXMLDecl(ctxt);
7652 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7653 /*
7654 * The XML REC instructs us to stop parsing right here
7655 */
7656 return(-1);
7657 }
7658 ctxt->standalone = ctxt->input->standalone;
7659 SKIP_BLANKS;
7660 } else {
7661 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7662 }
7663 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7664 ctxt->sax->startDocument(ctxt->userData);
7665
7666 /*
7667 * The Misc part of the Prolog
7668 */
7669 GROW;
7670 xmlParseMisc(ctxt);
7671
7672 /*
7673 * Then possibly doc type declaration(s) and more Misc
7674 * (doctypedecl Misc*)?
7675 */
7676 GROW;
7677 if ((RAW == '<') && (NXT(1) == '!') &&
7678 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7679 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7680 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7681 (NXT(8) == 'E')) {
7682
7683 ctxt->inSubset = 1;
7684 xmlParseDocTypeDecl(ctxt);
7685 if (RAW == '[') {
7686 ctxt->instate = XML_PARSER_DTD;
7687 xmlParseInternalSubset(ctxt);
7688 }
7689
7690 /*
7691 * Create and update the external subset.
7692 */
7693 ctxt->inSubset = 2;
7694 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7695 (!ctxt->disableSAX))
7696 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7697 ctxt->extSubSystem, ctxt->extSubURI);
7698 ctxt->inSubset = 0;
7699
7700
7701 ctxt->instate = XML_PARSER_PROLOG;
7702 xmlParseMisc(ctxt);
7703 }
7704
7705 /*
7706 * Time to start parsing the tree itself
7707 */
7708 GROW;
7709 if (RAW != '<') {
7710 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7712 ctxt->sax->error(ctxt->userData,
7713 "Start tag expected, '<' not found\n");
7714 ctxt->wellFormed = 0;
7715 ctxt->disableSAX = 1;
7716 ctxt->instate = XML_PARSER_EOF;
7717 } else {
7718 ctxt->instate = XML_PARSER_CONTENT;
7719 xmlParseElement(ctxt);
7720 ctxt->instate = XML_PARSER_EPILOG;
7721
7722
7723 /*
7724 * The Misc part at the end
7725 */
7726 xmlParseMisc(ctxt);
7727
Daniel Veillard561b7f82002-03-20 21:55:57 +00007728 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007729 ctxt->errNo = XML_ERR_DOCUMENT_END;
7730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7731 ctxt->sax->error(ctxt->userData,
7732 "Extra content at the end of the document\n");
7733 ctxt->wellFormed = 0;
7734 ctxt->disableSAX = 1;
7735 }
7736 ctxt->instate = XML_PARSER_EOF;
7737 }
7738
7739 /*
7740 * SAX: end of the document processing.
7741 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007742 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007743 ctxt->sax->endDocument(ctxt->userData);
7744
Daniel Veillard5997aca2002-03-18 18:36:20 +00007745 /*
7746 * Remove locally kept entity definitions if the tree was not built
7747 */
7748 if ((ctxt->myDoc != NULL) &&
7749 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7750 xmlFreeDoc(ctxt->myDoc);
7751 ctxt->myDoc = NULL;
7752 }
7753
Daniel Veillardc7612992002-02-17 22:47:37 +00007754 if (! ctxt->wellFormed) {
7755 ctxt->valid = 0;
7756 return(-1);
7757 }
Owen Taylor3473f882001-02-23 17:55:21 +00007758 return(0);
7759}
7760
7761/**
7762 * xmlParseExtParsedEnt:
7763 * @ctxt: an XML parser context
7764 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007765 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007766 * An external general parsed entity is well-formed if it matches the
7767 * production labeled extParsedEnt.
7768 *
7769 * [78] extParsedEnt ::= TextDecl? content
7770 *
7771 * Returns 0, -1 in case of error. the parser context is augmented
7772 * as a result of the parsing.
7773 */
7774
7775int
7776xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7777 xmlChar start[4];
7778 xmlCharEncoding enc;
7779
7780 xmlDefaultSAXHandlerInit();
7781
7782 GROW;
7783
7784 /*
7785 * SAX: beginning of the document processing.
7786 */
7787 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7788 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7789
7790 /*
7791 * Get the 4 first bytes and decode the charset
7792 * if enc != XML_CHAR_ENCODING_NONE
7793 * plug some encoding conversion routines.
7794 */
7795 start[0] = RAW;
7796 start[1] = NXT(1);
7797 start[2] = NXT(2);
7798 start[3] = NXT(3);
7799 enc = xmlDetectCharEncoding(start, 4);
7800 if (enc != XML_CHAR_ENCODING_NONE) {
7801 xmlSwitchEncoding(ctxt, enc);
7802 }
7803
7804
7805 if (CUR == 0) {
7806 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7808 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7809 ctxt->wellFormed = 0;
7810 ctxt->disableSAX = 1;
7811 }
7812
7813 /*
7814 * Check for the XMLDecl in the Prolog.
7815 */
7816 GROW;
7817 if ((RAW == '<') && (NXT(1) == '?') &&
7818 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7819 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7820
7821 /*
7822 * Note that we will switch encoding on the fly.
7823 */
7824 xmlParseXMLDecl(ctxt);
7825 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7826 /*
7827 * The XML REC instructs us to stop parsing right here
7828 */
7829 return(-1);
7830 }
7831 SKIP_BLANKS;
7832 } else {
7833 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7834 }
7835 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7836 ctxt->sax->startDocument(ctxt->userData);
7837
7838 /*
7839 * Doing validity checking on chunk doesn't make sense
7840 */
7841 ctxt->instate = XML_PARSER_CONTENT;
7842 ctxt->validate = 0;
7843 ctxt->loadsubset = 0;
7844 ctxt->depth = 0;
7845
7846 xmlParseContent(ctxt);
7847
7848 if ((RAW == '<') && (NXT(1) == '/')) {
7849 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7851 ctxt->sax->error(ctxt->userData,
7852 "chunk is not well balanced\n");
7853 ctxt->wellFormed = 0;
7854 ctxt->disableSAX = 1;
7855 } else if (RAW != 0) {
7856 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7858 ctxt->sax->error(ctxt->userData,
7859 "extra content at the end of well balanced chunk\n");
7860 ctxt->wellFormed = 0;
7861 ctxt->disableSAX = 1;
7862 }
7863
7864 /*
7865 * SAX: end of the document processing.
7866 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007867 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007868 ctxt->sax->endDocument(ctxt->userData);
7869
7870 if (! ctxt->wellFormed) return(-1);
7871 return(0);
7872}
7873
7874/************************************************************************
7875 * *
7876 * Progressive parsing interfaces *
7877 * *
7878 ************************************************************************/
7879
7880/**
7881 * xmlParseLookupSequence:
7882 * @ctxt: an XML parser context
7883 * @first: the first char to lookup
7884 * @next: the next char to lookup or zero
7885 * @third: the next char to lookup or zero
7886 *
7887 * Try to find if a sequence (first, next, third) or just (first next) or
7888 * (first) is available in the input stream.
7889 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7890 * to avoid rescanning sequences of bytes, it DOES change the state of the
7891 * parser, do not use liberally.
7892 *
7893 * Returns the index to the current parsing point if the full sequence
7894 * is available, -1 otherwise.
7895 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007896static int
Owen Taylor3473f882001-02-23 17:55:21 +00007897xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7898 xmlChar next, xmlChar third) {
7899 int base, len;
7900 xmlParserInputPtr in;
7901 const xmlChar *buf;
7902
7903 in = ctxt->input;
7904 if (in == NULL) return(-1);
7905 base = in->cur - in->base;
7906 if (base < 0) return(-1);
7907 if (ctxt->checkIndex > base)
7908 base = ctxt->checkIndex;
7909 if (in->buf == NULL) {
7910 buf = in->base;
7911 len = in->length;
7912 } else {
7913 buf = in->buf->buffer->content;
7914 len = in->buf->buffer->use;
7915 }
7916 /* take into account the sequence length */
7917 if (third) len -= 2;
7918 else if (next) len --;
7919 for (;base < len;base++) {
7920 if (buf[base] == first) {
7921 if (third != 0) {
7922 if ((buf[base + 1] != next) ||
7923 (buf[base + 2] != third)) continue;
7924 } else if (next != 0) {
7925 if (buf[base + 1] != next) continue;
7926 }
7927 ctxt->checkIndex = 0;
7928#ifdef DEBUG_PUSH
7929 if (next == 0)
7930 xmlGenericError(xmlGenericErrorContext,
7931 "PP: lookup '%c' found at %d\n",
7932 first, base);
7933 else if (third == 0)
7934 xmlGenericError(xmlGenericErrorContext,
7935 "PP: lookup '%c%c' found at %d\n",
7936 first, next, base);
7937 else
7938 xmlGenericError(xmlGenericErrorContext,
7939 "PP: lookup '%c%c%c' found at %d\n",
7940 first, next, third, base);
7941#endif
7942 return(base - (in->cur - in->base));
7943 }
7944 }
7945 ctxt->checkIndex = base;
7946#ifdef DEBUG_PUSH
7947 if (next == 0)
7948 xmlGenericError(xmlGenericErrorContext,
7949 "PP: lookup '%c' failed\n", first);
7950 else if (third == 0)
7951 xmlGenericError(xmlGenericErrorContext,
7952 "PP: lookup '%c%c' failed\n", first, next);
7953 else
7954 xmlGenericError(xmlGenericErrorContext,
7955 "PP: lookup '%c%c%c' failed\n", first, next, third);
7956#endif
7957 return(-1);
7958}
7959
7960/**
7961 * xmlParseTryOrFinish:
7962 * @ctxt: an XML parser context
7963 * @terminate: last chunk indicator
7964 *
7965 * Try to progress on parsing
7966 *
7967 * Returns zero if no parsing was possible
7968 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007969static int
Owen Taylor3473f882001-02-23 17:55:21 +00007970xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7971 int ret = 0;
7972 int avail;
7973 xmlChar cur, next;
7974
7975#ifdef DEBUG_PUSH
7976 switch (ctxt->instate) {
7977 case XML_PARSER_EOF:
7978 xmlGenericError(xmlGenericErrorContext,
7979 "PP: try EOF\n"); break;
7980 case XML_PARSER_START:
7981 xmlGenericError(xmlGenericErrorContext,
7982 "PP: try START\n"); break;
7983 case XML_PARSER_MISC:
7984 xmlGenericError(xmlGenericErrorContext,
7985 "PP: try MISC\n");break;
7986 case XML_PARSER_COMMENT:
7987 xmlGenericError(xmlGenericErrorContext,
7988 "PP: try COMMENT\n");break;
7989 case XML_PARSER_PROLOG:
7990 xmlGenericError(xmlGenericErrorContext,
7991 "PP: try PROLOG\n");break;
7992 case XML_PARSER_START_TAG:
7993 xmlGenericError(xmlGenericErrorContext,
7994 "PP: try START_TAG\n");break;
7995 case XML_PARSER_CONTENT:
7996 xmlGenericError(xmlGenericErrorContext,
7997 "PP: try CONTENT\n");break;
7998 case XML_PARSER_CDATA_SECTION:
7999 xmlGenericError(xmlGenericErrorContext,
8000 "PP: try CDATA_SECTION\n");break;
8001 case XML_PARSER_END_TAG:
8002 xmlGenericError(xmlGenericErrorContext,
8003 "PP: try END_TAG\n");break;
8004 case XML_PARSER_ENTITY_DECL:
8005 xmlGenericError(xmlGenericErrorContext,
8006 "PP: try ENTITY_DECL\n");break;
8007 case XML_PARSER_ENTITY_VALUE:
8008 xmlGenericError(xmlGenericErrorContext,
8009 "PP: try ENTITY_VALUE\n");break;
8010 case XML_PARSER_ATTRIBUTE_VALUE:
8011 xmlGenericError(xmlGenericErrorContext,
8012 "PP: try ATTRIBUTE_VALUE\n");break;
8013 case XML_PARSER_DTD:
8014 xmlGenericError(xmlGenericErrorContext,
8015 "PP: try DTD\n");break;
8016 case XML_PARSER_EPILOG:
8017 xmlGenericError(xmlGenericErrorContext,
8018 "PP: try EPILOG\n");break;
8019 case XML_PARSER_PI:
8020 xmlGenericError(xmlGenericErrorContext,
8021 "PP: try PI\n");break;
8022 case XML_PARSER_IGNORE:
8023 xmlGenericError(xmlGenericErrorContext,
8024 "PP: try IGNORE\n");break;
8025 }
8026#endif
8027
8028 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008029 SHRINK;
8030
Owen Taylor3473f882001-02-23 17:55:21 +00008031 /*
8032 * Pop-up of finished entities.
8033 */
8034 while ((RAW == 0) && (ctxt->inputNr > 1))
8035 xmlPopInput(ctxt);
8036
8037 if (ctxt->input ==NULL) break;
8038 if (ctxt->input->buf == NULL)
8039 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008040 else {
8041 /*
8042 * If we are operating on converted input, try to flush
8043 * remainng chars to avoid them stalling in the non-converted
8044 * buffer.
8045 */
8046 if ((ctxt->input->buf->raw != NULL) &&
8047 (ctxt->input->buf->raw->use > 0)) {
8048 int base = ctxt->input->base -
8049 ctxt->input->buf->buffer->content;
8050 int current = ctxt->input->cur - ctxt->input->base;
8051
8052 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8053 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8054 ctxt->input->cur = ctxt->input->base + current;
8055 ctxt->input->end =
8056 &ctxt->input->buf->buffer->content[
8057 ctxt->input->buf->buffer->use];
8058 }
8059 avail = ctxt->input->buf->buffer->use -
8060 (ctxt->input->cur - ctxt->input->base);
8061 }
Owen Taylor3473f882001-02-23 17:55:21 +00008062 if (avail < 1)
8063 goto done;
8064 switch (ctxt->instate) {
8065 case XML_PARSER_EOF:
8066 /*
8067 * Document parsing is done !
8068 */
8069 goto done;
8070 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008071 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8072 xmlChar start[4];
8073 xmlCharEncoding enc;
8074
8075 /*
8076 * Very first chars read from the document flow.
8077 */
8078 if (avail < 4)
8079 goto done;
8080
8081 /*
8082 * Get the 4 first bytes and decode the charset
8083 * if enc != XML_CHAR_ENCODING_NONE
8084 * plug some encoding conversion routines.
8085 */
8086 start[0] = RAW;
8087 start[1] = NXT(1);
8088 start[2] = NXT(2);
8089 start[3] = NXT(3);
8090 enc = xmlDetectCharEncoding(start, 4);
8091 if (enc != XML_CHAR_ENCODING_NONE) {
8092 xmlSwitchEncoding(ctxt, enc);
8093 }
8094 break;
8095 }
Owen Taylor3473f882001-02-23 17:55:21 +00008096
8097 cur = ctxt->input->cur[0];
8098 next = ctxt->input->cur[1];
8099 if (cur == 0) {
8100 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8101 ctxt->sax->setDocumentLocator(ctxt->userData,
8102 &xmlDefaultSAXLocator);
8103 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8105 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8106 ctxt->wellFormed = 0;
8107 ctxt->disableSAX = 1;
8108 ctxt->instate = XML_PARSER_EOF;
8109#ifdef DEBUG_PUSH
8110 xmlGenericError(xmlGenericErrorContext,
8111 "PP: entering EOF\n");
8112#endif
8113 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8114 ctxt->sax->endDocument(ctxt->userData);
8115 goto done;
8116 }
8117 if ((cur == '<') && (next == '?')) {
8118 /* PI or XML decl */
8119 if (avail < 5) return(ret);
8120 if ((!terminate) &&
8121 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8122 return(ret);
8123 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8124 ctxt->sax->setDocumentLocator(ctxt->userData,
8125 &xmlDefaultSAXLocator);
8126 if ((ctxt->input->cur[2] == 'x') &&
8127 (ctxt->input->cur[3] == 'm') &&
8128 (ctxt->input->cur[4] == 'l') &&
8129 (IS_BLANK(ctxt->input->cur[5]))) {
8130 ret += 5;
8131#ifdef DEBUG_PUSH
8132 xmlGenericError(xmlGenericErrorContext,
8133 "PP: Parsing XML Decl\n");
8134#endif
8135 xmlParseXMLDecl(ctxt);
8136 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8137 /*
8138 * The XML REC instructs us to stop parsing right
8139 * here
8140 */
8141 ctxt->instate = XML_PARSER_EOF;
8142 return(0);
8143 }
8144 ctxt->standalone = ctxt->input->standalone;
8145 if ((ctxt->encoding == NULL) &&
8146 (ctxt->input->encoding != NULL))
8147 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8148 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8149 (!ctxt->disableSAX))
8150 ctxt->sax->startDocument(ctxt->userData);
8151 ctxt->instate = XML_PARSER_MISC;
8152#ifdef DEBUG_PUSH
8153 xmlGenericError(xmlGenericErrorContext,
8154 "PP: entering MISC\n");
8155#endif
8156 } else {
8157 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8158 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8159 (!ctxt->disableSAX))
8160 ctxt->sax->startDocument(ctxt->userData);
8161 ctxt->instate = XML_PARSER_MISC;
8162#ifdef DEBUG_PUSH
8163 xmlGenericError(xmlGenericErrorContext,
8164 "PP: entering MISC\n");
8165#endif
8166 }
8167 } else {
8168 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8169 ctxt->sax->setDocumentLocator(ctxt->userData,
8170 &xmlDefaultSAXLocator);
8171 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8172 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8173 (!ctxt->disableSAX))
8174 ctxt->sax->startDocument(ctxt->userData);
8175 ctxt->instate = XML_PARSER_MISC;
8176#ifdef DEBUG_PUSH
8177 xmlGenericError(xmlGenericErrorContext,
8178 "PP: entering MISC\n");
8179#endif
8180 }
8181 break;
8182 case XML_PARSER_MISC:
8183 SKIP_BLANKS;
8184 if (ctxt->input->buf == NULL)
8185 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8186 else
8187 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8188 if (avail < 2)
8189 goto done;
8190 cur = ctxt->input->cur[0];
8191 next = ctxt->input->cur[1];
8192 if ((cur == '<') && (next == '?')) {
8193 if ((!terminate) &&
8194 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8195 goto done;
8196#ifdef DEBUG_PUSH
8197 xmlGenericError(xmlGenericErrorContext,
8198 "PP: Parsing PI\n");
8199#endif
8200 xmlParsePI(ctxt);
8201 } else if ((cur == '<') && (next == '!') &&
8202 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8203 if ((!terminate) &&
8204 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8205 goto done;
8206#ifdef DEBUG_PUSH
8207 xmlGenericError(xmlGenericErrorContext,
8208 "PP: Parsing Comment\n");
8209#endif
8210 xmlParseComment(ctxt);
8211 ctxt->instate = XML_PARSER_MISC;
8212 } else if ((cur == '<') && (next == '!') &&
8213 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8214 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8215 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8216 (ctxt->input->cur[8] == 'E')) {
8217 if ((!terminate) &&
8218 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8219 goto done;
8220#ifdef DEBUG_PUSH
8221 xmlGenericError(xmlGenericErrorContext,
8222 "PP: Parsing internal subset\n");
8223#endif
8224 ctxt->inSubset = 1;
8225 xmlParseDocTypeDecl(ctxt);
8226 if (RAW == '[') {
8227 ctxt->instate = XML_PARSER_DTD;
8228#ifdef DEBUG_PUSH
8229 xmlGenericError(xmlGenericErrorContext,
8230 "PP: entering DTD\n");
8231#endif
8232 } else {
8233 /*
8234 * Create and update the external subset.
8235 */
8236 ctxt->inSubset = 2;
8237 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8238 (ctxt->sax->externalSubset != NULL))
8239 ctxt->sax->externalSubset(ctxt->userData,
8240 ctxt->intSubName, ctxt->extSubSystem,
8241 ctxt->extSubURI);
8242 ctxt->inSubset = 0;
8243 ctxt->instate = XML_PARSER_PROLOG;
8244#ifdef DEBUG_PUSH
8245 xmlGenericError(xmlGenericErrorContext,
8246 "PP: entering PROLOG\n");
8247#endif
8248 }
8249 } else if ((cur == '<') && (next == '!') &&
8250 (avail < 9)) {
8251 goto done;
8252 } else {
8253 ctxt->instate = XML_PARSER_START_TAG;
8254#ifdef DEBUG_PUSH
8255 xmlGenericError(xmlGenericErrorContext,
8256 "PP: entering START_TAG\n");
8257#endif
8258 }
8259 break;
8260 case XML_PARSER_IGNORE:
8261 xmlGenericError(xmlGenericErrorContext,
8262 "PP: internal error, state == IGNORE");
8263 ctxt->instate = XML_PARSER_DTD;
8264#ifdef DEBUG_PUSH
8265 xmlGenericError(xmlGenericErrorContext,
8266 "PP: entering DTD\n");
8267#endif
8268 break;
8269 case XML_PARSER_PROLOG:
8270 SKIP_BLANKS;
8271 if (ctxt->input->buf == NULL)
8272 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8273 else
8274 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8275 if (avail < 2)
8276 goto done;
8277 cur = ctxt->input->cur[0];
8278 next = ctxt->input->cur[1];
8279 if ((cur == '<') && (next == '?')) {
8280 if ((!terminate) &&
8281 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8282 goto done;
8283#ifdef DEBUG_PUSH
8284 xmlGenericError(xmlGenericErrorContext,
8285 "PP: Parsing PI\n");
8286#endif
8287 xmlParsePI(ctxt);
8288 } else if ((cur == '<') && (next == '!') &&
8289 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8290 if ((!terminate) &&
8291 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8292 goto done;
8293#ifdef DEBUG_PUSH
8294 xmlGenericError(xmlGenericErrorContext,
8295 "PP: Parsing Comment\n");
8296#endif
8297 xmlParseComment(ctxt);
8298 ctxt->instate = XML_PARSER_PROLOG;
8299 } else if ((cur == '<') && (next == '!') &&
8300 (avail < 4)) {
8301 goto done;
8302 } else {
8303 ctxt->instate = XML_PARSER_START_TAG;
8304#ifdef DEBUG_PUSH
8305 xmlGenericError(xmlGenericErrorContext,
8306 "PP: entering START_TAG\n");
8307#endif
8308 }
8309 break;
8310 case XML_PARSER_EPILOG:
8311 SKIP_BLANKS;
8312 if (ctxt->input->buf == NULL)
8313 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8314 else
8315 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8316 if (avail < 2)
8317 goto done;
8318 cur = ctxt->input->cur[0];
8319 next = ctxt->input->cur[1];
8320 if ((cur == '<') && (next == '?')) {
8321 if ((!terminate) &&
8322 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8323 goto done;
8324#ifdef DEBUG_PUSH
8325 xmlGenericError(xmlGenericErrorContext,
8326 "PP: Parsing PI\n");
8327#endif
8328 xmlParsePI(ctxt);
8329 ctxt->instate = XML_PARSER_EPILOG;
8330 } else if ((cur == '<') && (next == '!') &&
8331 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8332 if ((!terminate) &&
8333 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8334 goto done;
8335#ifdef DEBUG_PUSH
8336 xmlGenericError(xmlGenericErrorContext,
8337 "PP: Parsing Comment\n");
8338#endif
8339 xmlParseComment(ctxt);
8340 ctxt->instate = XML_PARSER_EPILOG;
8341 } else if ((cur == '<') && (next == '!') &&
8342 (avail < 4)) {
8343 goto done;
8344 } else {
8345 ctxt->errNo = XML_ERR_DOCUMENT_END;
8346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8347 ctxt->sax->error(ctxt->userData,
8348 "Extra content at the end of the document\n");
8349 ctxt->wellFormed = 0;
8350 ctxt->disableSAX = 1;
8351 ctxt->instate = XML_PARSER_EOF;
8352#ifdef DEBUG_PUSH
8353 xmlGenericError(xmlGenericErrorContext,
8354 "PP: entering EOF\n");
8355#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008356 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008357 ctxt->sax->endDocument(ctxt->userData);
8358 goto done;
8359 }
8360 break;
8361 case XML_PARSER_START_TAG: {
8362 xmlChar *name, *oldname;
8363
8364 if ((avail < 2) && (ctxt->inputNr == 1))
8365 goto done;
8366 cur = ctxt->input->cur[0];
8367 if (cur != '<') {
8368 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8370 ctxt->sax->error(ctxt->userData,
8371 "Start tag expect, '<' not found\n");
8372 ctxt->wellFormed = 0;
8373 ctxt->disableSAX = 1;
8374 ctxt->instate = XML_PARSER_EOF;
8375#ifdef DEBUG_PUSH
8376 xmlGenericError(xmlGenericErrorContext,
8377 "PP: entering EOF\n");
8378#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008379 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008380 ctxt->sax->endDocument(ctxt->userData);
8381 goto done;
8382 }
8383 if ((!terminate) &&
8384 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8385 goto done;
8386 if (ctxt->spaceNr == 0)
8387 spacePush(ctxt, -1);
8388 else
8389 spacePush(ctxt, *ctxt->space);
8390 name = xmlParseStartTag(ctxt);
8391 if (name == NULL) {
8392 spacePop(ctxt);
8393 ctxt->instate = XML_PARSER_EOF;
8394#ifdef DEBUG_PUSH
8395 xmlGenericError(xmlGenericErrorContext,
8396 "PP: entering EOF\n");
8397#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008398 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008399 ctxt->sax->endDocument(ctxt->userData);
8400 goto done;
8401 }
8402 namePush(ctxt, xmlStrdup(name));
8403
8404 /*
8405 * [ VC: Root Element Type ]
8406 * The Name in the document type declaration must match
8407 * the element type of the root element.
8408 */
8409 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8410 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8411 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8412
8413 /*
8414 * Check for an Empty Element.
8415 */
8416 if ((RAW == '/') && (NXT(1) == '>')) {
8417 SKIP(2);
8418 if ((ctxt->sax != NULL) &&
8419 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8420 ctxt->sax->endElement(ctxt->userData, name);
8421 xmlFree(name);
8422 oldname = namePop(ctxt);
8423 spacePop(ctxt);
8424 if (oldname != NULL) {
8425#ifdef DEBUG_STACK
8426 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8427#endif
8428 xmlFree(oldname);
8429 }
8430 if (ctxt->name == NULL) {
8431 ctxt->instate = XML_PARSER_EPILOG;
8432#ifdef DEBUG_PUSH
8433 xmlGenericError(xmlGenericErrorContext,
8434 "PP: entering EPILOG\n");
8435#endif
8436 } else {
8437 ctxt->instate = XML_PARSER_CONTENT;
8438#ifdef DEBUG_PUSH
8439 xmlGenericError(xmlGenericErrorContext,
8440 "PP: entering CONTENT\n");
8441#endif
8442 }
8443 break;
8444 }
8445 if (RAW == '>') {
8446 NEXT;
8447 } else {
8448 ctxt->errNo = XML_ERR_GT_REQUIRED;
8449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8450 ctxt->sax->error(ctxt->userData,
8451 "Couldn't find end of Start Tag %s\n",
8452 name);
8453 ctxt->wellFormed = 0;
8454 ctxt->disableSAX = 1;
8455
8456 /*
8457 * end of parsing of this node.
8458 */
8459 nodePop(ctxt);
8460 oldname = namePop(ctxt);
8461 spacePop(ctxt);
8462 if (oldname != NULL) {
8463#ifdef DEBUG_STACK
8464 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8465#endif
8466 xmlFree(oldname);
8467 }
8468 }
8469 xmlFree(name);
8470 ctxt->instate = XML_PARSER_CONTENT;
8471#ifdef DEBUG_PUSH
8472 xmlGenericError(xmlGenericErrorContext,
8473 "PP: entering CONTENT\n");
8474#endif
8475 break;
8476 }
8477 case XML_PARSER_CONTENT: {
8478 const xmlChar *test;
8479 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008480 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008481
8482 /*
8483 * Handle preparsed entities and charRef
8484 */
8485 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008486 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008487
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008488 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008489 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8490 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008491 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008492 ctxt->token = 0;
8493 }
8494 if ((avail < 2) && (ctxt->inputNr == 1))
8495 goto done;
8496 cur = ctxt->input->cur[0];
8497 next = ctxt->input->cur[1];
8498
8499 test = CUR_PTR;
8500 cons = ctxt->input->consumed;
8501 tok = ctxt->token;
8502 if ((cur == '<') && (next == '?')) {
8503 if ((!terminate) &&
8504 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8505 goto done;
8506#ifdef DEBUG_PUSH
8507 xmlGenericError(xmlGenericErrorContext,
8508 "PP: Parsing PI\n");
8509#endif
8510 xmlParsePI(ctxt);
8511 } else if ((cur == '<') && (next == '!') &&
8512 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8513 if ((!terminate) &&
8514 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8515 goto done;
8516#ifdef DEBUG_PUSH
8517 xmlGenericError(xmlGenericErrorContext,
8518 "PP: Parsing Comment\n");
8519#endif
8520 xmlParseComment(ctxt);
8521 ctxt->instate = XML_PARSER_CONTENT;
8522 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8523 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8524 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8525 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8526 (ctxt->input->cur[8] == '[')) {
8527 SKIP(9);
8528 ctxt->instate = XML_PARSER_CDATA_SECTION;
8529#ifdef DEBUG_PUSH
8530 xmlGenericError(xmlGenericErrorContext,
8531 "PP: entering CDATA_SECTION\n");
8532#endif
8533 break;
8534 } else if ((cur == '<') && (next == '!') &&
8535 (avail < 9)) {
8536 goto done;
8537 } else if ((cur == '<') && (next == '/')) {
8538 ctxt->instate = XML_PARSER_END_TAG;
8539#ifdef DEBUG_PUSH
8540 xmlGenericError(xmlGenericErrorContext,
8541 "PP: entering END_TAG\n");
8542#endif
8543 break;
8544 } else if (cur == '<') {
8545 ctxt->instate = XML_PARSER_START_TAG;
8546#ifdef DEBUG_PUSH
8547 xmlGenericError(xmlGenericErrorContext,
8548 "PP: entering START_TAG\n");
8549#endif
8550 break;
8551 } else if (cur == '&') {
8552 if ((!terminate) &&
8553 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8554 goto done;
8555#ifdef DEBUG_PUSH
8556 xmlGenericError(xmlGenericErrorContext,
8557 "PP: Parsing Reference\n");
8558#endif
8559 xmlParseReference(ctxt);
8560 } else {
8561 /* TODO Avoid the extra copy, handle directly !!! */
8562 /*
8563 * Goal of the following test is:
8564 * - minimize calls to the SAX 'character' callback
8565 * when they are mergeable
8566 * - handle an problem for isBlank when we only parse
8567 * a sequence of blank chars and the next one is
8568 * not available to check against '<' presence.
8569 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008570 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008571 * of the parser.
8572 */
8573 if ((ctxt->inputNr == 1) &&
8574 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8575 if ((!terminate) &&
8576 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8577 goto done;
8578 }
8579 ctxt->checkIndex = 0;
8580#ifdef DEBUG_PUSH
8581 xmlGenericError(xmlGenericErrorContext,
8582 "PP: Parsing char data\n");
8583#endif
8584 xmlParseCharData(ctxt, 0);
8585 }
8586 /*
8587 * Pop-up of finished entities.
8588 */
8589 while ((RAW == 0) && (ctxt->inputNr > 1))
8590 xmlPopInput(ctxt);
8591 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8592 (tok == ctxt->token)) {
8593 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8595 ctxt->sax->error(ctxt->userData,
8596 "detected an error in element content\n");
8597 ctxt->wellFormed = 0;
8598 ctxt->disableSAX = 1;
8599 ctxt->instate = XML_PARSER_EOF;
8600 break;
8601 }
8602 break;
8603 }
8604 case XML_PARSER_CDATA_SECTION: {
8605 /*
8606 * The Push mode need to have the SAX callback for
8607 * cdataBlock merge back contiguous callbacks.
8608 */
8609 int base;
8610
8611 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8612 if (base < 0) {
8613 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8614 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8615 if (ctxt->sax->cdataBlock != NULL)
8616 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8617 XML_PARSER_BIG_BUFFER_SIZE);
8618 }
8619 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8620 ctxt->checkIndex = 0;
8621 }
8622 goto done;
8623 } else {
8624 if ((ctxt->sax != NULL) && (base > 0) &&
8625 (!ctxt->disableSAX)) {
8626 if (ctxt->sax->cdataBlock != NULL)
8627 ctxt->sax->cdataBlock(ctxt->userData,
8628 ctxt->input->cur, base);
8629 }
8630 SKIP(base + 3);
8631 ctxt->checkIndex = 0;
8632 ctxt->instate = XML_PARSER_CONTENT;
8633#ifdef DEBUG_PUSH
8634 xmlGenericError(xmlGenericErrorContext,
8635 "PP: entering CONTENT\n");
8636#endif
8637 }
8638 break;
8639 }
8640 case XML_PARSER_END_TAG:
8641 if (avail < 2)
8642 goto done;
8643 if ((!terminate) &&
8644 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8645 goto done;
8646 xmlParseEndTag(ctxt);
8647 if (ctxt->name == NULL) {
8648 ctxt->instate = XML_PARSER_EPILOG;
8649#ifdef DEBUG_PUSH
8650 xmlGenericError(xmlGenericErrorContext,
8651 "PP: entering EPILOG\n");
8652#endif
8653 } else {
8654 ctxt->instate = XML_PARSER_CONTENT;
8655#ifdef DEBUG_PUSH
8656 xmlGenericError(xmlGenericErrorContext,
8657 "PP: entering CONTENT\n");
8658#endif
8659 }
8660 break;
8661 case XML_PARSER_DTD: {
8662 /*
8663 * Sorry but progressive parsing of the internal subset
8664 * is not expected to be supported. We first check that
8665 * the full content of the internal subset is available and
8666 * the parsing is launched only at that point.
8667 * Internal subset ends up with "']' S? '>'" in an unescaped
8668 * section and not in a ']]>' sequence which are conditional
8669 * sections (whoever argued to keep that crap in XML deserve
8670 * a place in hell !).
8671 */
8672 int base, i;
8673 xmlChar *buf;
8674 xmlChar quote = 0;
8675
8676 base = ctxt->input->cur - ctxt->input->base;
8677 if (base < 0) return(0);
8678 if (ctxt->checkIndex > base)
8679 base = ctxt->checkIndex;
8680 buf = ctxt->input->buf->buffer->content;
8681 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8682 base++) {
8683 if (quote != 0) {
8684 if (buf[base] == quote)
8685 quote = 0;
8686 continue;
8687 }
8688 if (buf[base] == '"') {
8689 quote = '"';
8690 continue;
8691 }
8692 if (buf[base] == '\'') {
8693 quote = '\'';
8694 continue;
8695 }
8696 if (buf[base] == ']') {
8697 if ((unsigned int) base +1 >=
8698 ctxt->input->buf->buffer->use)
8699 break;
8700 if (buf[base + 1] == ']') {
8701 /* conditional crap, skip both ']' ! */
8702 base++;
8703 continue;
8704 }
8705 for (i = 0;
8706 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8707 i++) {
8708 if (buf[base + i] == '>')
8709 goto found_end_int_subset;
8710 }
8711 break;
8712 }
8713 }
8714 /*
8715 * We didn't found the end of the Internal subset
8716 */
8717 if (quote == 0)
8718 ctxt->checkIndex = base;
8719#ifdef DEBUG_PUSH
8720 if (next == 0)
8721 xmlGenericError(xmlGenericErrorContext,
8722 "PP: lookup of int subset end filed\n");
8723#endif
8724 goto done;
8725
8726found_end_int_subset:
8727 xmlParseInternalSubset(ctxt);
8728 ctxt->inSubset = 2;
8729 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8730 (ctxt->sax->externalSubset != NULL))
8731 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8732 ctxt->extSubSystem, ctxt->extSubURI);
8733 ctxt->inSubset = 0;
8734 ctxt->instate = XML_PARSER_PROLOG;
8735 ctxt->checkIndex = 0;
8736#ifdef DEBUG_PUSH
8737 xmlGenericError(xmlGenericErrorContext,
8738 "PP: entering PROLOG\n");
8739#endif
8740 break;
8741 }
8742 case XML_PARSER_COMMENT:
8743 xmlGenericError(xmlGenericErrorContext,
8744 "PP: internal error, state == COMMENT\n");
8745 ctxt->instate = XML_PARSER_CONTENT;
8746#ifdef DEBUG_PUSH
8747 xmlGenericError(xmlGenericErrorContext,
8748 "PP: entering CONTENT\n");
8749#endif
8750 break;
8751 case XML_PARSER_PI:
8752 xmlGenericError(xmlGenericErrorContext,
8753 "PP: internal error, state == PI\n");
8754 ctxt->instate = XML_PARSER_CONTENT;
8755#ifdef DEBUG_PUSH
8756 xmlGenericError(xmlGenericErrorContext,
8757 "PP: entering CONTENT\n");
8758#endif
8759 break;
8760 case XML_PARSER_ENTITY_DECL:
8761 xmlGenericError(xmlGenericErrorContext,
8762 "PP: internal error, state == ENTITY_DECL\n");
8763 ctxt->instate = XML_PARSER_DTD;
8764#ifdef DEBUG_PUSH
8765 xmlGenericError(xmlGenericErrorContext,
8766 "PP: entering DTD\n");
8767#endif
8768 break;
8769 case XML_PARSER_ENTITY_VALUE:
8770 xmlGenericError(xmlGenericErrorContext,
8771 "PP: internal error, state == ENTITY_VALUE\n");
8772 ctxt->instate = XML_PARSER_CONTENT;
8773#ifdef DEBUG_PUSH
8774 xmlGenericError(xmlGenericErrorContext,
8775 "PP: entering DTD\n");
8776#endif
8777 break;
8778 case XML_PARSER_ATTRIBUTE_VALUE:
8779 xmlGenericError(xmlGenericErrorContext,
8780 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8781 ctxt->instate = XML_PARSER_START_TAG;
8782#ifdef DEBUG_PUSH
8783 xmlGenericError(xmlGenericErrorContext,
8784 "PP: entering START_TAG\n");
8785#endif
8786 break;
8787 case XML_PARSER_SYSTEM_LITERAL:
8788 xmlGenericError(xmlGenericErrorContext,
8789 "PP: internal error, state == SYSTEM_LITERAL\n");
8790 ctxt->instate = XML_PARSER_START_TAG;
8791#ifdef DEBUG_PUSH
8792 xmlGenericError(xmlGenericErrorContext,
8793 "PP: entering START_TAG\n");
8794#endif
8795 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008796 case XML_PARSER_PUBLIC_LITERAL:
8797 xmlGenericError(xmlGenericErrorContext,
8798 "PP: internal error, state == PUBLIC_LITERAL\n");
8799 ctxt->instate = XML_PARSER_START_TAG;
8800#ifdef DEBUG_PUSH
8801 xmlGenericError(xmlGenericErrorContext,
8802 "PP: entering START_TAG\n");
8803#endif
8804 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008805 }
8806 }
8807done:
8808#ifdef DEBUG_PUSH
8809 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8810#endif
8811 return(ret);
8812}
8813
8814/**
Owen Taylor3473f882001-02-23 17:55:21 +00008815 * xmlParseChunk:
8816 * @ctxt: an XML parser context
8817 * @chunk: an char array
8818 * @size: the size in byte of the chunk
8819 * @terminate: last chunk indicator
8820 *
8821 * Parse a Chunk of memory
8822 *
8823 * Returns zero if no error, the xmlParserErrors otherwise.
8824 */
8825int
8826xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8827 int terminate) {
8828 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8829 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8830 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8831 int cur = ctxt->input->cur - ctxt->input->base;
8832
8833 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8834 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8835 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008836 ctxt->input->end =
8837 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008838#ifdef DEBUG_PUSH
8839 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8840#endif
8841
8842 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8843 xmlParseTryOrFinish(ctxt, terminate);
8844 } else if (ctxt->instate != XML_PARSER_EOF) {
8845 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8846 xmlParserInputBufferPtr in = ctxt->input->buf;
8847 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8848 (in->raw != NULL)) {
8849 int nbchars;
8850
8851 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8852 if (nbchars < 0) {
8853 xmlGenericError(xmlGenericErrorContext,
8854 "xmlParseChunk: encoder error\n");
8855 return(XML_ERR_INVALID_ENCODING);
8856 }
8857 }
8858 }
8859 }
8860 xmlParseTryOrFinish(ctxt, terminate);
8861 if (terminate) {
8862 /*
8863 * Check for termination
8864 */
8865 if ((ctxt->instate != XML_PARSER_EOF) &&
8866 (ctxt->instate != XML_PARSER_EPILOG)) {
8867 ctxt->errNo = XML_ERR_DOCUMENT_END;
8868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8869 ctxt->sax->error(ctxt->userData,
8870 "Extra content at the end of the document\n");
8871 ctxt->wellFormed = 0;
8872 ctxt->disableSAX = 1;
8873 }
8874 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008875 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008876 ctxt->sax->endDocument(ctxt->userData);
8877 }
8878 ctxt->instate = XML_PARSER_EOF;
8879 }
8880 return((xmlParserErrors) ctxt->errNo);
8881}
8882
8883/************************************************************************
8884 * *
8885 * I/O front end functions to the parser *
8886 * *
8887 ************************************************************************/
8888
8889/**
8890 * xmlStopParser:
8891 * @ctxt: an XML parser context
8892 *
8893 * Blocks further parser processing
8894 */
8895void
8896xmlStopParser(xmlParserCtxtPtr ctxt) {
8897 ctxt->instate = XML_PARSER_EOF;
8898 if (ctxt->input != NULL)
8899 ctxt->input->cur = BAD_CAST"";
8900}
8901
8902/**
8903 * xmlCreatePushParserCtxt:
8904 * @sax: a SAX handler
8905 * @user_data: The user data returned on SAX callbacks
8906 * @chunk: a pointer to an array of chars
8907 * @size: number of chars in the array
8908 * @filename: an optional file name or URI
8909 *
8910 * Create a parser context for using the XML parser in push mode
8911 * To allow content encoding detection, @size should be >= 4
8912 * The value of @filename is used for fetching external entities
8913 * and error/warning reports.
8914 *
8915 * Returns the new parser context or NULL
8916 */
8917xmlParserCtxtPtr
8918xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8919 const char *chunk, int size, const char *filename) {
8920 xmlParserCtxtPtr ctxt;
8921 xmlParserInputPtr inputStream;
8922 xmlParserInputBufferPtr buf;
8923 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8924
8925 /*
8926 * plug some encoding conversion routines
8927 */
8928 if ((chunk != NULL) && (size >= 4))
8929 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8930
8931 buf = xmlAllocParserInputBuffer(enc);
8932 if (buf == NULL) return(NULL);
8933
8934 ctxt = xmlNewParserCtxt();
8935 if (ctxt == NULL) {
8936 xmlFree(buf);
8937 return(NULL);
8938 }
8939 if (sax != NULL) {
8940 if (ctxt->sax != &xmlDefaultSAXHandler)
8941 xmlFree(ctxt->sax);
8942 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8943 if (ctxt->sax == NULL) {
8944 xmlFree(buf);
8945 xmlFree(ctxt);
8946 return(NULL);
8947 }
8948 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8949 if (user_data != NULL)
8950 ctxt->userData = user_data;
8951 }
8952 if (filename == NULL) {
8953 ctxt->directory = NULL;
8954 } else {
8955 ctxt->directory = xmlParserGetDirectory(filename);
8956 }
8957
8958 inputStream = xmlNewInputStream(ctxt);
8959 if (inputStream == NULL) {
8960 xmlFreeParserCtxt(ctxt);
8961 return(NULL);
8962 }
8963
8964 if (filename == NULL)
8965 inputStream->filename = NULL;
8966 else
8967 inputStream->filename = xmlMemStrdup(filename);
8968 inputStream->buf = buf;
8969 inputStream->base = inputStream->buf->buffer->content;
8970 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008971 inputStream->end =
8972 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008973
8974 inputPush(ctxt, inputStream);
8975
8976 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8977 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008978 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8979 int cur = ctxt->input->cur - ctxt->input->base;
8980
Owen Taylor3473f882001-02-23 17:55:21 +00008981 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008982
8983 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8984 ctxt->input->cur = ctxt->input->base + cur;
8985 ctxt->input->end =
8986 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008987#ifdef DEBUG_PUSH
8988 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8989#endif
8990 }
8991
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008992 if (enc != XML_CHAR_ENCODING_NONE) {
8993 xmlSwitchEncoding(ctxt, enc);
8994 }
8995
Owen Taylor3473f882001-02-23 17:55:21 +00008996 return(ctxt);
8997}
8998
8999/**
9000 * xmlCreateIOParserCtxt:
9001 * @sax: a SAX handler
9002 * @user_data: The user data returned on SAX callbacks
9003 * @ioread: an I/O read function
9004 * @ioclose: an I/O close function
9005 * @ioctx: an I/O handler
9006 * @enc: the charset encoding if known
9007 *
9008 * Create a parser context for using the XML parser with an existing
9009 * I/O stream
9010 *
9011 * Returns the new parser context or NULL
9012 */
9013xmlParserCtxtPtr
9014xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9015 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9016 void *ioctx, xmlCharEncoding enc) {
9017 xmlParserCtxtPtr ctxt;
9018 xmlParserInputPtr inputStream;
9019 xmlParserInputBufferPtr buf;
9020
9021 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9022 if (buf == NULL) return(NULL);
9023
9024 ctxt = xmlNewParserCtxt();
9025 if (ctxt == NULL) {
9026 xmlFree(buf);
9027 return(NULL);
9028 }
9029 if (sax != NULL) {
9030 if (ctxt->sax != &xmlDefaultSAXHandler)
9031 xmlFree(ctxt->sax);
9032 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9033 if (ctxt->sax == NULL) {
9034 xmlFree(buf);
9035 xmlFree(ctxt);
9036 return(NULL);
9037 }
9038 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9039 if (user_data != NULL)
9040 ctxt->userData = user_data;
9041 }
9042
9043 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9044 if (inputStream == NULL) {
9045 xmlFreeParserCtxt(ctxt);
9046 return(NULL);
9047 }
9048 inputPush(ctxt, inputStream);
9049
9050 return(ctxt);
9051}
9052
9053/************************************************************************
9054 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009055 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009056 * *
9057 ************************************************************************/
9058
9059/**
9060 * xmlIOParseDTD:
9061 * @sax: the SAX handler block or NULL
9062 * @input: an Input Buffer
9063 * @enc: the charset encoding if known
9064 *
9065 * Load and parse a DTD
9066 *
9067 * Returns the resulting xmlDtdPtr or NULL in case of error.
9068 * @input will be freed at parsing end.
9069 */
9070
9071xmlDtdPtr
9072xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9073 xmlCharEncoding enc) {
9074 xmlDtdPtr ret = NULL;
9075 xmlParserCtxtPtr ctxt;
9076 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009077 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009078
9079 if (input == NULL)
9080 return(NULL);
9081
9082 ctxt = xmlNewParserCtxt();
9083 if (ctxt == NULL) {
9084 return(NULL);
9085 }
9086
9087 /*
9088 * Set-up the SAX context
9089 */
9090 if (sax != NULL) {
9091 if (ctxt->sax != NULL)
9092 xmlFree(ctxt->sax);
9093 ctxt->sax = sax;
9094 ctxt->userData = NULL;
9095 }
9096
9097 /*
9098 * generate a parser input from the I/O handler
9099 */
9100
9101 pinput = xmlNewIOInputStream(ctxt, input, enc);
9102 if (pinput == NULL) {
9103 if (sax != NULL) ctxt->sax = NULL;
9104 xmlFreeParserCtxt(ctxt);
9105 return(NULL);
9106 }
9107
9108 /*
9109 * plug some encoding conversion routines here.
9110 */
9111 xmlPushInput(ctxt, pinput);
9112
9113 pinput->filename = NULL;
9114 pinput->line = 1;
9115 pinput->col = 1;
9116 pinput->base = ctxt->input->cur;
9117 pinput->cur = ctxt->input->cur;
9118 pinput->free = NULL;
9119
9120 /*
9121 * let's parse that entity knowing it's an external subset.
9122 */
9123 ctxt->inSubset = 2;
9124 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9125 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9126 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009127
9128 if (enc == XML_CHAR_ENCODING_NONE) {
9129 /*
9130 * Get the 4 first bytes and decode the charset
9131 * if enc != XML_CHAR_ENCODING_NONE
9132 * plug some encoding conversion routines.
9133 */
9134 start[0] = RAW;
9135 start[1] = NXT(1);
9136 start[2] = NXT(2);
9137 start[3] = NXT(3);
9138 enc = xmlDetectCharEncoding(start, 4);
9139 if (enc != XML_CHAR_ENCODING_NONE) {
9140 xmlSwitchEncoding(ctxt, enc);
9141 }
9142 }
9143
Owen Taylor3473f882001-02-23 17:55:21 +00009144 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9145
9146 if (ctxt->myDoc != NULL) {
9147 if (ctxt->wellFormed) {
9148 ret = ctxt->myDoc->extSubset;
9149 ctxt->myDoc->extSubset = NULL;
9150 } else {
9151 ret = NULL;
9152 }
9153 xmlFreeDoc(ctxt->myDoc);
9154 ctxt->myDoc = NULL;
9155 }
9156 if (sax != NULL) ctxt->sax = NULL;
9157 xmlFreeParserCtxt(ctxt);
9158
9159 return(ret);
9160}
9161
9162/**
9163 * xmlSAXParseDTD:
9164 * @sax: the SAX handler block
9165 * @ExternalID: a NAME* containing the External ID of the DTD
9166 * @SystemID: a NAME* containing the URL to the DTD
9167 *
9168 * Load and parse an external subset.
9169 *
9170 * Returns the resulting xmlDtdPtr or NULL in case of error.
9171 */
9172
9173xmlDtdPtr
9174xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9175 const xmlChar *SystemID) {
9176 xmlDtdPtr ret = NULL;
9177 xmlParserCtxtPtr ctxt;
9178 xmlParserInputPtr input = NULL;
9179 xmlCharEncoding enc;
9180
9181 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9182
9183 ctxt = xmlNewParserCtxt();
9184 if (ctxt == NULL) {
9185 return(NULL);
9186 }
9187
9188 /*
9189 * Set-up the SAX context
9190 */
9191 if (sax != NULL) {
9192 if (ctxt->sax != NULL)
9193 xmlFree(ctxt->sax);
9194 ctxt->sax = sax;
9195 ctxt->userData = NULL;
9196 }
9197
9198 /*
9199 * Ask the Entity resolver to load the damn thing
9200 */
9201
9202 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9203 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9204 if (input == NULL) {
9205 if (sax != NULL) ctxt->sax = NULL;
9206 xmlFreeParserCtxt(ctxt);
9207 return(NULL);
9208 }
9209
9210 /*
9211 * plug some encoding conversion routines here.
9212 */
9213 xmlPushInput(ctxt, input);
9214 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9215 xmlSwitchEncoding(ctxt, enc);
9216
9217 if (input->filename == NULL)
9218 input->filename = (char *) xmlStrdup(SystemID);
9219 input->line = 1;
9220 input->col = 1;
9221 input->base = ctxt->input->cur;
9222 input->cur = ctxt->input->cur;
9223 input->free = NULL;
9224
9225 /*
9226 * let's parse that entity knowing it's an external subset.
9227 */
9228 ctxt->inSubset = 2;
9229 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9230 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9231 ExternalID, SystemID);
9232 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9233
9234 if (ctxt->myDoc != NULL) {
9235 if (ctxt->wellFormed) {
9236 ret = ctxt->myDoc->extSubset;
9237 ctxt->myDoc->extSubset = NULL;
9238 } else {
9239 ret = NULL;
9240 }
9241 xmlFreeDoc(ctxt->myDoc);
9242 ctxt->myDoc = NULL;
9243 }
9244 if (sax != NULL) ctxt->sax = NULL;
9245 xmlFreeParserCtxt(ctxt);
9246
9247 return(ret);
9248}
9249
9250/**
9251 * xmlParseDTD:
9252 * @ExternalID: a NAME* containing the External ID of the DTD
9253 * @SystemID: a NAME* containing the URL to the DTD
9254 *
9255 * Load and parse an external subset.
9256 *
9257 * Returns the resulting xmlDtdPtr or NULL in case of error.
9258 */
9259
9260xmlDtdPtr
9261xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9262 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9263}
9264
9265/************************************************************************
9266 * *
9267 * Front ends when parsing an Entity *
9268 * *
9269 ************************************************************************/
9270
9271/**
Owen Taylor3473f882001-02-23 17:55:21 +00009272 * xmlParseCtxtExternalEntity:
9273 * @ctx: the existing parsing context
9274 * @URL: the URL for the entity to load
9275 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009276 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009277 *
9278 * Parse an external general entity within an existing parsing context
9279 * An external general parsed entity is well-formed if it matches the
9280 * production labeled extParsedEnt.
9281 *
9282 * [78] extParsedEnt ::= TextDecl? content
9283 *
9284 * Returns 0 if the entity is well formed, -1 in case of args problem and
9285 * the parser error code otherwise
9286 */
9287
9288int
9289xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009290 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009291 xmlParserCtxtPtr ctxt;
9292 xmlDocPtr newDoc;
9293 xmlSAXHandlerPtr oldsax = NULL;
9294 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009295 xmlChar start[4];
9296 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009297
9298 if (ctx->depth > 40) {
9299 return(XML_ERR_ENTITY_LOOP);
9300 }
9301
Daniel Veillardcda96922001-08-21 10:56:31 +00009302 if (lst != NULL)
9303 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009304 if ((URL == NULL) && (ID == NULL))
9305 return(-1);
9306 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9307 return(-1);
9308
9309
9310 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9311 if (ctxt == NULL) return(-1);
9312 ctxt->userData = ctxt;
9313 oldsax = ctxt->sax;
9314 ctxt->sax = ctx->sax;
9315 newDoc = xmlNewDoc(BAD_CAST "1.0");
9316 if (newDoc == NULL) {
9317 xmlFreeParserCtxt(ctxt);
9318 return(-1);
9319 }
9320 if (ctx->myDoc != NULL) {
9321 newDoc->intSubset = ctx->myDoc->intSubset;
9322 newDoc->extSubset = ctx->myDoc->extSubset;
9323 }
9324 if (ctx->myDoc->URL != NULL) {
9325 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9326 }
9327 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9328 if (newDoc->children == NULL) {
9329 ctxt->sax = oldsax;
9330 xmlFreeParserCtxt(ctxt);
9331 newDoc->intSubset = NULL;
9332 newDoc->extSubset = NULL;
9333 xmlFreeDoc(newDoc);
9334 return(-1);
9335 }
9336 nodePush(ctxt, newDoc->children);
9337 if (ctx->myDoc == NULL) {
9338 ctxt->myDoc = newDoc;
9339 } else {
9340 ctxt->myDoc = ctx->myDoc;
9341 newDoc->children->doc = ctx->myDoc;
9342 }
9343
Daniel Veillard87a764e2001-06-20 17:41:10 +00009344 /*
9345 * Get the 4 first bytes and decode the charset
9346 * if enc != XML_CHAR_ENCODING_NONE
9347 * plug some encoding conversion routines.
9348 */
9349 GROW
9350 start[0] = RAW;
9351 start[1] = NXT(1);
9352 start[2] = NXT(2);
9353 start[3] = NXT(3);
9354 enc = xmlDetectCharEncoding(start, 4);
9355 if (enc != XML_CHAR_ENCODING_NONE) {
9356 xmlSwitchEncoding(ctxt, enc);
9357 }
9358
Owen Taylor3473f882001-02-23 17:55:21 +00009359 /*
9360 * Parse a possible text declaration first
9361 */
Owen Taylor3473f882001-02-23 17:55:21 +00009362 if ((RAW == '<') && (NXT(1) == '?') &&
9363 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9364 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9365 xmlParseTextDecl(ctxt);
9366 }
9367
9368 /*
9369 * Doing validity checking on chunk doesn't make sense
9370 */
9371 ctxt->instate = XML_PARSER_CONTENT;
9372 ctxt->validate = ctx->validate;
9373 ctxt->loadsubset = ctx->loadsubset;
9374 ctxt->depth = ctx->depth + 1;
9375 ctxt->replaceEntities = ctx->replaceEntities;
9376 if (ctxt->validate) {
9377 ctxt->vctxt.error = ctx->vctxt.error;
9378 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009379 } else {
9380 ctxt->vctxt.error = NULL;
9381 ctxt->vctxt.warning = NULL;
9382 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009383 ctxt->vctxt.nodeTab = NULL;
9384 ctxt->vctxt.nodeNr = 0;
9385 ctxt->vctxt.nodeMax = 0;
9386 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009387
9388 xmlParseContent(ctxt);
9389
9390 if ((RAW == '<') && (NXT(1) == '/')) {
9391 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9393 ctxt->sax->error(ctxt->userData,
9394 "chunk is not well balanced\n");
9395 ctxt->wellFormed = 0;
9396 ctxt->disableSAX = 1;
9397 } else if (RAW != 0) {
9398 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9399 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9400 ctxt->sax->error(ctxt->userData,
9401 "extra content at the end of well balanced chunk\n");
9402 ctxt->wellFormed = 0;
9403 ctxt->disableSAX = 1;
9404 }
9405 if (ctxt->node != newDoc->children) {
9406 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9408 ctxt->sax->error(ctxt->userData,
9409 "chunk is not well balanced\n");
9410 ctxt->wellFormed = 0;
9411 ctxt->disableSAX = 1;
9412 }
9413
9414 if (!ctxt->wellFormed) {
9415 if (ctxt->errNo == 0)
9416 ret = 1;
9417 else
9418 ret = ctxt->errNo;
9419 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009420 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009421 xmlNodePtr cur;
9422
9423 /*
9424 * Return the newly created nodeset after unlinking it from
9425 * they pseudo parent.
9426 */
9427 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009428 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009429 while (cur != NULL) {
9430 cur->parent = NULL;
9431 cur = cur->next;
9432 }
9433 newDoc->children->children = NULL;
9434 }
9435 ret = 0;
9436 }
9437 ctxt->sax = oldsax;
9438 xmlFreeParserCtxt(ctxt);
9439 newDoc->intSubset = NULL;
9440 newDoc->extSubset = NULL;
9441 xmlFreeDoc(newDoc);
9442
9443 return(ret);
9444}
9445
9446/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009447 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009448 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009449 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009450 * @sax: the SAX handler bloc (possibly NULL)
9451 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9452 * @depth: Used for loop detection, use 0
9453 * @URL: the URL for the entity to load
9454 * @ID: the System ID for the entity to load
9455 * @list: the return value for the set of parsed nodes
9456 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009457 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009458 *
9459 * Returns 0 if the entity is well formed, -1 in case of args problem and
9460 * the parser error code otherwise
9461 */
9462
Daniel Veillard257d9102001-05-08 10:41:44 +00009463static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009464xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9465 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009466 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009467 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009468 xmlParserCtxtPtr ctxt;
9469 xmlDocPtr newDoc;
9470 xmlSAXHandlerPtr oldsax = NULL;
9471 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009472 xmlChar start[4];
9473 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009474
9475 if (depth > 40) {
9476 return(XML_ERR_ENTITY_LOOP);
9477 }
9478
9479
9480
9481 if (list != NULL)
9482 *list = NULL;
9483 if ((URL == NULL) && (ID == NULL))
9484 return(-1);
9485 if (doc == NULL) /* @@ relax but check for dereferences */
9486 return(-1);
9487
9488
9489 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9490 if (ctxt == NULL) return(-1);
9491 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009492 if (oldctxt != NULL) {
9493 ctxt->_private = oldctxt->_private;
9494 ctxt->loadsubset = oldctxt->loadsubset;
9495 ctxt->validate = oldctxt->validate;
9496 ctxt->external = oldctxt->external;
9497 } else {
9498 /*
9499 * Doing validity checking on chunk without context
9500 * doesn't make sense
9501 */
9502 ctxt->_private = NULL;
9503 ctxt->validate = 0;
9504 ctxt->external = 2;
9505 ctxt->loadsubset = 0;
9506 }
Owen Taylor3473f882001-02-23 17:55:21 +00009507 if (sax != NULL) {
9508 oldsax = ctxt->sax;
9509 ctxt->sax = sax;
9510 if (user_data != NULL)
9511 ctxt->userData = user_data;
9512 }
9513 newDoc = xmlNewDoc(BAD_CAST "1.0");
9514 if (newDoc == NULL) {
9515 xmlFreeParserCtxt(ctxt);
9516 return(-1);
9517 }
9518 if (doc != NULL) {
9519 newDoc->intSubset = doc->intSubset;
9520 newDoc->extSubset = doc->extSubset;
9521 }
9522 if (doc->URL != NULL) {
9523 newDoc->URL = xmlStrdup(doc->URL);
9524 }
9525 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9526 if (newDoc->children == NULL) {
9527 if (sax != NULL)
9528 ctxt->sax = oldsax;
9529 xmlFreeParserCtxt(ctxt);
9530 newDoc->intSubset = NULL;
9531 newDoc->extSubset = NULL;
9532 xmlFreeDoc(newDoc);
9533 return(-1);
9534 }
9535 nodePush(ctxt, newDoc->children);
9536 if (doc == NULL) {
9537 ctxt->myDoc = newDoc;
9538 } else {
9539 ctxt->myDoc = doc;
9540 newDoc->children->doc = doc;
9541 }
9542
Daniel Veillard87a764e2001-06-20 17:41:10 +00009543 /*
9544 * Get the 4 first bytes and decode the charset
9545 * if enc != XML_CHAR_ENCODING_NONE
9546 * plug some encoding conversion routines.
9547 */
9548 GROW;
9549 start[0] = RAW;
9550 start[1] = NXT(1);
9551 start[2] = NXT(2);
9552 start[3] = NXT(3);
9553 enc = xmlDetectCharEncoding(start, 4);
9554 if (enc != XML_CHAR_ENCODING_NONE) {
9555 xmlSwitchEncoding(ctxt, enc);
9556 }
9557
Owen Taylor3473f882001-02-23 17:55:21 +00009558 /*
9559 * Parse a possible text declaration first
9560 */
Owen Taylor3473f882001-02-23 17:55:21 +00009561 if ((RAW == '<') && (NXT(1) == '?') &&
9562 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9563 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9564 xmlParseTextDecl(ctxt);
9565 }
9566
Owen Taylor3473f882001-02-23 17:55:21 +00009567 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009568 ctxt->depth = depth;
9569
9570 xmlParseContent(ctxt);
9571
Daniel Veillard561b7f82002-03-20 21:55:57 +00009572 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009573 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9575 ctxt->sax->error(ctxt->userData,
9576 "chunk is not well balanced\n");
9577 ctxt->wellFormed = 0;
9578 ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009579 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009580 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9582 ctxt->sax->error(ctxt->userData,
9583 "extra content at the end of well balanced chunk\n");
9584 ctxt->wellFormed = 0;
9585 ctxt->disableSAX = 1;
9586 }
9587 if (ctxt->node != newDoc->children) {
9588 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9590 ctxt->sax->error(ctxt->userData,
9591 "chunk is not well balanced\n");
9592 ctxt->wellFormed = 0;
9593 ctxt->disableSAX = 1;
9594 }
9595
9596 if (!ctxt->wellFormed) {
9597 if (ctxt->errNo == 0)
9598 ret = 1;
9599 else
9600 ret = ctxt->errNo;
9601 } else {
9602 if (list != NULL) {
9603 xmlNodePtr cur;
9604
9605 /*
9606 * Return the newly created nodeset after unlinking it from
9607 * they pseudo parent.
9608 */
9609 cur = newDoc->children->children;
9610 *list = cur;
9611 while (cur != NULL) {
9612 cur->parent = NULL;
9613 cur = cur->next;
9614 }
9615 newDoc->children->children = NULL;
9616 }
9617 ret = 0;
9618 }
9619 if (sax != NULL)
9620 ctxt->sax = oldsax;
9621 xmlFreeParserCtxt(ctxt);
9622 newDoc->intSubset = NULL;
9623 newDoc->extSubset = NULL;
9624 xmlFreeDoc(newDoc);
9625
9626 return(ret);
9627}
9628
9629/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009630 * xmlParseExternalEntity:
9631 * @doc: the document the chunk pertains to
9632 * @sax: the SAX handler bloc (possibly NULL)
9633 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9634 * @depth: Used for loop detection, use 0
9635 * @URL: the URL for the entity to load
9636 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009637 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009638 *
9639 * Parse an external general entity
9640 * An external general parsed entity is well-formed if it matches the
9641 * production labeled extParsedEnt.
9642 *
9643 * [78] extParsedEnt ::= TextDecl? content
9644 *
9645 * Returns 0 if the entity is well formed, -1 in case of args problem and
9646 * the parser error code otherwise
9647 */
9648
9649int
9650xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009651 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009652 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009653 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009654}
9655
9656/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009657 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009658 * @doc: the document the chunk pertains to
9659 * @sax: the SAX handler bloc (possibly NULL)
9660 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9661 * @depth: Used for loop detection, use 0
9662 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009663 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009664 *
9665 * Parse a well-balanced chunk of an XML document
9666 * called by the parser
9667 * The allowed sequence for the Well Balanced Chunk is the one defined by
9668 * the content production in the XML grammar:
9669 *
9670 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9671 *
9672 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9673 * the parser error code otherwise
9674 */
9675
9676int
9677xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009678 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009679 xmlParserCtxtPtr ctxt;
9680 xmlDocPtr newDoc;
9681 xmlSAXHandlerPtr oldsax = NULL;
9682 int size;
9683 int ret = 0;
9684
9685 if (depth > 40) {
9686 return(XML_ERR_ENTITY_LOOP);
9687 }
9688
9689
Daniel Veillardcda96922001-08-21 10:56:31 +00009690 if (lst != NULL)
9691 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009692 if (string == NULL)
9693 return(-1);
9694
9695 size = xmlStrlen(string);
9696
9697 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9698 if (ctxt == NULL) return(-1);
9699 ctxt->userData = ctxt;
9700 if (sax != NULL) {
9701 oldsax = ctxt->sax;
9702 ctxt->sax = sax;
9703 if (user_data != NULL)
9704 ctxt->userData = user_data;
9705 }
9706 newDoc = xmlNewDoc(BAD_CAST "1.0");
9707 if (newDoc == NULL) {
9708 xmlFreeParserCtxt(ctxt);
9709 return(-1);
9710 }
9711 if (doc != NULL) {
9712 newDoc->intSubset = doc->intSubset;
9713 newDoc->extSubset = doc->extSubset;
9714 }
9715 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9716 if (newDoc->children == NULL) {
9717 if (sax != NULL)
9718 ctxt->sax = oldsax;
9719 xmlFreeParserCtxt(ctxt);
9720 newDoc->intSubset = NULL;
9721 newDoc->extSubset = NULL;
9722 xmlFreeDoc(newDoc);
9723 return(-1);
9724 }
9725 nodePush(ctxt, newDoc->children);
9726 if (doc == NULL) {
9727 ctxt->myDoc = newDoc;
9728 } else {
9729 ctxt->myDoc = doc;
9730 newDoc->children->doc = doc;
9731 }
9732 ctxt->instate = XML_PARSER_CONTENT;
9733 ctxt->depth = depth;
9734
9735 /*
9736 * Doing validity checking on chunk doesn't make sense
9737 */
9738 ctxt->validate = 0;
9739 ctxt->loadsubset = 0;
9740
9741 xmlParseContent(ctxt);
9742
9743 if ((RAW == '<') && (NXT(1) == '/')) {
9744 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9746 ctxt->sax->error(ctxt->userData,
9747 "chunk is not well balanced\n");
9748 ctxt->wellFormed = 0;
9749 ctxt->disableSAX = 1;
9750 } else if (RAW != 0) {
9751 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9753 ctxt->sax->error(ctxt->userData,
9754 "extra content at the end of well balanced chunk\n");
9755 ctxt->wellFormed = 0;
9756 ctxt->disableSAX = 1;
9757 }
9758 if (ctxt->node != newDoc->children) {
9759 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9761 ctxt->sax->error(ctxt->userData,
9762 "chunk is not well balanced\n");
9763 ctxt->wellFormed = 0;
9764 ctxt->disableSAX = 1;
9765 }
9766
9767 if (!ctxt->wellFormed) {
9768 if (ctxt->errNo == 0)
9769 ret = 1;
9770 else
9771 ret = ctxt->errNo;
9772 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009773 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009774 xmlNodePtr cur;
9775
9776 /*
9777 * Return the newly created nodeset after unlinking it from
9778 * they pseudo parent.
9779 */
9780 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009781 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009782 while (cur != NULL) {
9783 cur->parent = NULL;
9784 cur = cur->next;
9785 }
9786 newDoc->children->children = NULL;
9787 }
9788 ret = 0;
9789 }
9790 if (sax != NULL)
9791 ctxt->sax = oldsax;
9792 xmlFreeParserCtxt(ctxt);
9793 newDoc->intSubset = NULL;
9794 newDoc->extSubset = NULL;
9795 xmlFreeDoc(newDoc);
9796
9797 return(ret);
9798}
9799
9800/**
9801 * xmlSAXParseEntity:
9802 * @sax: the SAX handler block
9803 * @filename: the filename
9804 *
9805 * parse an XML external entity out of context and build a tree.
9806 * It use the given SAX function block to handle the parsing callback.
9807 * If sax is NULL, fallback to the default DOM tree building routines.
9808 *
9809 * [78] extParsedEnt ::= TextDecl? content
9810 *
9811 * This correspond to a "Well Balanced" chunk
9812 *
9813 * Returns the resulting document tree
9814 */
9815
9816xmlDocPtr
9817xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9818 xmlDocPtr ret;
9819 xmlParserCtxtPtr ctxt;
9820 char *directory = NULL;
9821
9822 ctxt = xmlCreateFileParserCtxt(filename);
9823 if (ctxt == NULL) {
9824 return(NULL);
9825 }
9826 if (sax != NULL) {
9827 if (ctxt->sax != NULL)
9828 xmlFree(ctxt->sax);
9829 ctxt->sax = sax;
9830 ctxt->userData = NULL;
9831 }
9832
9833 if ((ctxt->directory == NULL) && (directory == NULL))
9834 directory = xmlParserGetDirectory(filename);
9835
9836 xmlParseExtParsedEnt(ctxt);
9837
9838 if (ctxt->wellFormed)
9839 ret = ctxt->myDoc;
9840 else {
9841 ret = NULL;
9842 xmlFreeDoc(ctxt->myDoc);
9843 ctxt->myDoc = NULL;
9844 }
9845 if (sax != NULL)
9846 ctxt->sax = NULL;
9847 xmlFreeParserCtxt(ctxt);
9848
9849 return(ret);
9850}
9851
9852/**
9853 * xmlParseEntity:
9854 * @filename: the filename
9855 *
9856 * parse an XML external entity out of context and build a tree.
9857 *
9858 * [78] extParsedEnt ::= TextDecl? content
9859 *
9860 * This correspond to a "Well Balanced" chunk
9861 *
9862 * Returns the resulting document tree
9863 */
9864
9865xmlDocPtr
9866xmlParseEntity(const char *filename) {
9867 return(xmlSAXParseEntity(NULL, filename));
9868}
9869
9870/**
9871 * xmlCreateEntityParserCtxt:
9872 * @URL: the entity URL
9873 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009874 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009875 *
9876 * Create a parser context for an external entity
9877 * Automatic support for ZLIB/Compress compressed document is provided
9878 * by default if found at compile-time.
9879 *
9880 * Returns the new parser context or NULL
9881 */
9882xmlParserCtxtPtr
9883xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9884 const xmlChar *base) {
9885 xmlParserCtxtPtr ctxt;
9886 xmlParserInputPtr inputStream;
9887 char *directory = NULL;
9888 xmlChar *uri;
9889
9890 ctxt = xmlNewParserCtxt();
9891 if (ctxt == NULL) {
9892 return(NULL);
9893 }
9894
9895 uri = xmlBuildURI(URL, base);
9896
9897 if (uri == NULL) {
9898 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9899 if (inputStream == NULL) {
9900 xmlFreeParserCtxt(ctxt);
9901 return(NULL);
9902 }
9903
9904 inputPush(ctxt, inputStream);
9905
9906 if ((ctxt->directory == NULL) && (directory == NULL))
9907 directory = xmlParserGetDirectory((char *)URL);
9908 if ((ctxt->directory == NULL) && (directory != NULL))
9909 ctxt->directory = directory;
9910 } else {
9911 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9912 if (inputStream == NULL) {
9913 xmlFree(uri);
9914 xmlFreeParserCtxt(ctxt);
9915 return(NULL);
9916 }
9917
9918 inputPush(ctxt, inputStream);
9919
9920 if ((ctxt->directory == NULL) && (directory == NULL))
9921 directory = xmlParserGetDirectory((char *)uri);
9922 if ((ctxt->directory == NULL) && (directory != NULL))
9923 ctxt->directory = directory;
9924 xmlFree(uri);
9925 }
9926
9927 return(ctxt);
9928}
9929
9930/************************************************************************
9931 * *
9932 * Front ends when parsing from a file *
9933 * *
9934 ************************************************************************/
9935
9936/**
9937 * xmlCreateFileParserCtxt:
9938 * @filename: the filename
9939 *
9940 * Create a parser context for a file content.
9941 * Automatic support for ZLIB/Compress compressed document is provided
9942 * by default if found at compile-time.
9943 *
9944 * Returns the new parser context or NULL
9945 */
9946xmlParserCtxtPtr
9947xmlCreateFileParserCtxt(const char *filename)
9948{
9949 xmlParserCtxtPtr ctxt;
9950 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009951 char *directory = NULL;
9952
Owen Taylor3473f882001-02-23 17:55:21 +00009953 ctxt = xmlNewParserCtxt();
9954 if (ctxt == NULL) {
9955 if (xmlDefaultSAXHandler.error != NULL) {
9956 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9957 }
9958 return(NULL);
9959 }
9960
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009961 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009962 if (inputStream == NULL) {
9963 xmlFreeParserCtxt(ctxt);
9964 return(NULL);
9965 }
9966
Owen Taylor3473f882001-02-23 17:55:21 +00009967 inputPush(ctxt, inputStream);
9968 if ((ctxt->directory == NULL) && (directory == NULL))
9969 directory = xmlParserGetDirectory(filename);
9970 if ((ctxt->directory == NULL) && (directory != NULL))
9971 ctxt->directory = directory;
9972
9973 return(ctxt);
9974}
9975
9976/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009977 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009978 * @sax: the SAX handler block
9979 * @filename: the filename
9980 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9981 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009982 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009983 *
9984 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9985 * compressed document is provided by default if found at compile-time.
9986 * It use the given SAX function block to handle the parsing callback.
9987 * If sax is NULL, fallback to the default DOM tree building routines.
9988 *
Daniel Veillarde19fc232002-04-22 16:01:24 +00009989 * User data (void *) is stored within the parser context in the
9990 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +00009991 *
Owen Taylor3473f882001-02-23 17:55:21 +00009992 * Returns the resulting document tree
9993 */
9994
9995xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009996xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9997 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009998 xmlDocPtr ret;
9999 xmlParserCtxtPtr ctxt;
10000 char *directory = NULL;
10001
Daniel Veillard635ef722001-10-29 11:48:19 +000010002 xmlInitParser();
10003
Owen Taylor3473f882001-02-23 17:55:21 +000010004 ctxt = xmlCreateFileParserCtxt(filename);
10005 if (ctxt == NULL) {
10006 return(NULL);
10007 }
10008 if (sax != NULL) {
10009 if (ctxt->sax != NULL)
10010 xmlFree(ctxt->sax);
10011 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010012 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010013 if (data!=NULL) {
10014 ctxt->_private=data;
10015 }
Owen Taylor3473f882001-02-23 17:55:21 +000010016
10017 if ((ctxt->directory == NULL) && (directory == NULL))
10018 directory = xmlParserGetDirectory(filename);
10019 if ((ctxt->directory == NULL) && (directory != NULL))
10020 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10021
10022 xmlParseDocument(ctxt);
10023
10024 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10025 else {
10026 ret = NULL;
10027 xmlFreeDoc(ctxt->myDoc);
10028 ctxt->myDoc = NULL;
10029 }
10030 if (sax != NULL)
10031 ctxt->sax = NULL;
10032 xmlFreeParserCtxt(ctxt);
10033
10034 return(ret);
10035}
10036
10037/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010038 * xmlSAXParseFile:
10039 * @sax: the SAX handler block
10040 * @filename: the filename
10041 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10042 * documents
10043 *
10044 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10045 * compressed document is provided by default if found at compile-time.
10046 * It use the given SAX function block to handle the parsing callback.
10047 * If sax is NULL, fallback to the default DOM tree building routines.
10048 *
10049 * Returns the resulting document tree
10050 */
10051
10052xmlDocPtr
10053xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10054 int recovery) {
10055 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10056}
10057
10058/**
Owen Taylor3473f882001-02-23 17:55:21 +000010059 * xmlRecoverDoc:
10060 * @cur: a pointer to an array of xmlChar
10061 *
10062 * parse an XML in-memory document and build a tree.
10063 * In the case the document is not Well Formed, a tree is built anyway
10064 *
10065 * Returns the resulting document tree
10066 */
10067
10068xmlDocPtr
10069xmlRecoverDoc(xmlChar *cur) {
10070 return(xmlSAXParseDoc(NULL, cur, 1));
10071}
10072
10073/**
10074 * xmlParseFile:
10075 * @filename: the filename
10076 *
10077 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10078 * compressed document is provided by default if found at compile-time.
10079 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010080 * Returns the resulting document tree if the file was wellformed,
10081 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010082 */
10083
10084xmlDocPtr
10085xmlParseFile(const char *filename) {
10086 return(xmlSAXParseFile(NULL, filename, 0));
10087}
10088
10089/**
10090 * xmlRecoverFile:
10091 * @filename: the filename
10092 *
10093 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10094 * compressed document is provided by default if found at compile-time.
10095 * In the case the document is not Well Formed, a tree is built anyway
10096 *
10097 * Returns the resulting document tree
10098 */
10099
10100xmlDocPtr
10101xmlRecoverFile(const char *filename) {
10102 return(xmlSAXParseFile(NULL, filename, 1));
10103}
10104
10105
10106/**
10107 * xmlSetupParserForBuffer:
10108 * @ctxt: an XML parser context
10109 * @buffer: a xmlChar * buffer
10110 * @filename: a file name
10111 *
10112 * Setup the parser context to parse a new buffer; Clears any prior
10113 * contents from the parser context. The buffer parameter must not be
10114 * NULL, but the filename parameter can be
10115 */
10116void
10117xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10118 const char* filename)
10119{
10120 xmlParserInputPtr input;
10121
10122 input = xmlNewInputStream(ctxt);
10123 if (input == NULL) {
10124 perror("malloc");
10125 xmlFree(ctxt);
10126 return;
10127 }
10128
10129 xmlClearParserCtxt(ctxt);
10130 if (filename != NULL)
10131 input->filename = xmlMemStrdup(filename);
10132 input->base = buffer;
10133 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010134 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010135 inputPush(ctxt, input);
10136}
10137
10138/**
10139 * xmlSAXUserParseFile:
10140 * @sax: a SAX handler
10141 * @user_data: The user data returned on SAX callbacks
10142 * @filename: a file name
10143 *
10144 * parse an XML file and call the given SAX handler routines.
10145 * Automatic support for ZLIB/Compress compressed document is provided
10146 *
10147 * Returns 0 in case of success or a error number otherwise
10148 */
10149int
10150xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10151 const char *filename) {
10152 int ret = 0;
10153 xmlParserCtxtPtr ctxt;
10154
10155 ctxt = xmlCreateFileParserCtxt(filename);
10156 if (ctxt == NULL) return -1;
10157 if (ctxt->sax != &xmlDefaultSAXHandler)
10158 xmlFree(ctxt->sax);
10159 ctxt->sax = sax;
10160 if (user_data != NULL)
10161 ctxt->userData = user_data;
10162
10163 xmlParseDocument(ctxt);
10164
10165 if (ctxt->wellFormed)
10166 ret = 0;
10167 else {
10168 if (ctxt->errNo != 0)
10169 ret = ctxt->errNo;
10170 else
10171 ret = -1;
10172 }
10173 if (sax != NULL)
10174 ctxt->sax = NULL;
10175 xmlFreeParserCtxt(ctxt);
10176
10177 return ret;
10178}
10179
10180/************************************************************************
10181 * *
10182 * Front ends when parsing from memory *
10183 * *
10184 ************************************************************************/
10185
10186/**
10187 * xmlCreateMemoryParserCtxt:
10188 * @buffer: a pointer to a char array
10189 * @size: the size of the array
10190 *
10191 * Create a parser context for an XML in-memory document.
10192 *
10193 * Returns the new parser context or NULL
10194 */
10195xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010196xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010197 xmlParserCtxtPtr ctxt;
10198 xmlParserInputPtr input;
10199 xmlParserInputBufferPtr buf;
10200
10201 if (buffer == NULL)
10202 return(NULL);
10203 if (size <= 0)
10204 return(NULL);
10205
10206 ctxt = xmlNewParserCtxt();
10207 if (ctxt == NULL)
10208 return(NULL);
10209
10210 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10211 if (buf == NULL) return(NULL);
10212
10213 input = xmlNewInputStream(ctxt);
10214 if (input == NULL) {
10215 xmlFreeParserCtxt(ctxt);
10216 return(NULL);
10217 }
10218
10219 input->filename = NULL;
10220 input->buf = buf;
10221 input->base = input->buf->buffer->content;
10222 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010223 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010224
10225 inputPush(ctxt, input);
10226 return(ctxt);
10227}
10228
10229/**
10230 * xmlSAXParseMemory:
10231 * @sax: the SAX handler block
10232 * @buffer: an pointer to a char array
10233 * @size: the size of the array
10234 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10235 * documents
10236 *
10237 * parse an XML in-memory block and use the given SAX function block
10238 * to handle the parsing callback. If sax is NULL, fallback to the default
10239 * DOM tree building routines.
10240 *
10241 * Returns the resulting document tree
10242 */
10243xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010244xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10245 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010246 xmlDocPtr ret;
10247 xmlParserCtxtPtr ctxt;
10248
10249 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10250 if (ctxt == NULL) return(NULL);
10251 if (sax != NULL) {
10252 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010253 }
10254
10255 xmlParseDocument(ctxt);
10256
10257 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10258 else {
10259 ret = NULL;
10260 xmlFreeDoc(ctxt->myDoc);
10261 ctxt->myDoc = NULL;
10262 }
10263 if (sax != NULL)
10264 ctxt->sax = NULL;
10265 xmlFreeParserCtxt(ctxt);
10266
10267 return(ret);
10268}
10269
10270/**
10271 * xmlParseMemory:
10272 * @buffer: an pointer to a char array
10273 * @size: the size of the array
10274 *
10275 * parse an XML in-memory block and build a tree.
10276 *
10277 * Returns the resulting document tree
10278 */
10279
Daniel Veillard50822cb2001-07-26 20:05:51 +000010280xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010281 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10282}
10283
10284/**
10285 * xmlRecoverMemory:
10286 * @buffer: an pointer to a char array
10287 * @size: the size of the array
10288 *
10289 * parse an XML in-memory block and build a tree.
10290 * In the case the document is not Well Formed, a tree is built anyway
10291 *
10292 * Returns the resulting document tree
10293 */
10294
Daniel Veillard50822cb2001-07-26 20:05:51 +000010295xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010296 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10297}
10298
10299/**
10300 * xmlSAXUserParseMemory:
10301 * @sax: a SAX handler
10302 * @user_data: The user data returned on SAX callbacks
10303 * @buffer: an in-memory XML document input
10304 * @size: the length of the XML document in bytes
10305 *
10306 * A better SAX parsing routine.
10307 * parse an XML in-memory buffer and call the given SAX handler routines.
10308 *
10309 * Returns 0 in case of success or a error number otherwise
10310 */
10311int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010312 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010313 int ret = 0;
10314 xmlParserCtxtPtr ctxt;
10315 xmlSAXHandlerPtr oldsax = NULL;
10316
10317 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10318 if (ctxt == NULL) return -1;
10319 if (sax != NULL) {
10320 oldsax = ctxt->sax;
10321 ctxt->sax = sax;
10322 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010323 if (user_data != NULL)
10324 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010325
10326 xmlParseDocument(ctxt);
10327
10328 if (ctxt->wellFormed)
10329 ret = 0;
10330 else {
10331 if (ctxt->errNo != 0)
10332 ret = ctxt->errNo;
10333 else
10334 ret = -1;
10335 }
10336 if (sax != NULL) {
10337 ctxt->sax = oldsax;
10338 }
10339 xmlFreeParserCtxt(ctxt);
10340
10341 return ret;
10342}
10343
10344/**
10345 * xmlCreateDocParserCtxt:
10346 * @cur: a pointer to an array of xmlChar
10347 *
10348 * Creates a parser context for an XML in-memory document.
10349 *
10350 * Returns the new parser context or NULL
10351 */
10352xmlParserCtxtPtr
10353xmlCreateDocParserCtxt(xmlChar *cur) {
10354 int len;
10355
10356 if (cur == NULL)
10357 return(NULL);
10358 len = xmlStrlen(cur);
10359 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10360}
10361
10362/**
10363 * xmlSAXParseDoc:
10364 * @sax: the SAX handler block
10365 * @cur: a pointer to an array of xmlChar
10366 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10367 * documents
10368 *
10369 * parse an XML in-memory document and build a tree.
10370 * It use the given SAX function block to handle the parsing callback.
10371 * If sax is NULL, fallback to the default DOM tree building routines.
10372 *
10373 * Returns the resulting document tree
10374 */
10375
10376xmlDocPtr
10377xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10378 xmlDocPtr ret;
10379 xmlParserCtxtPtr ctxt;
10380
10381 if (cur == NULL) return(NULL);
10382
10383
10384 ctxt = xmlCreateDocParserCtxt(cur);
10385 if (ctxt == NULL) return(NULL);
10386 if (sax != NULL) {
10387 ctxt->sax = sax;
10388 ctxt->userData = NULL;
10389 }
10390
10391 xmlParseDocument(ctxt);
10392 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10393 else {
10394 ret = NULL;
10395 xmlFreeDoc(ctxt->myDoc);
10396 ctxt->myDoc = NULL;
10397 }
10398 if (sax != NULL)
10399 ctxt->sax = NULL;
10400 xmlFreeParserCtxt(ctxt);
10401
10402 return(ret);
10403}
10404
10405/**
10406 * xmlParseDoc:
10407 * @cur: a pointer to an array of xmlChar
10408 *
10409 * parse an XML in-memory document and build a tree.
10410 *
10411 * Returns the resulting document tree
10412 */
10413
10414xmlDocPtr
10415xmlParseDoc(xmlChar *cur) {
10416 return(xmlSAXParseDoc(NULL, cur, 0));
10417}
10418
Daniel Veillard8107a222002-01-13 14:10:10 +000010419/************************************************************************
10420 * *
10421 * Specific function to keep track of entities references *
10422 * and used by the XSLT debugger *
10423 * *
10424 ************************************************************************/
10425
10426static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10427
10428/**
10429 * xmlAddEntityReference:
10430 * @ent : A valid entity
10431 * @firstNode : A valid first node for children of entity
10432 * @lastNode : A valid last node of children entity
10433 *
10434 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10435 */
10436static void
10437xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10438 xmlNodePtr lastNode)
10439{
10440 if (xmlEntityRefFunc != NULL) {
10441 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10442 }
10443}
10444
10445
10446/**
10447 * xmlSetEntityReferenceFunc:
10448 * @func : A valid function
10449 *
10450 * Set the function to call call back when a xml reference has been made
10451 */
10452void
10453xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10454{
10455 xmlEntityRefFunc = func;
10456}
Owen Taylor3473f882001-02-23 17:55:21 +000010457
10458/************************************************************************
10459 * *
10460 * Miscellaneous *
10461 * *
10462 ************************************************************************/
10463
10464#ifdef LIBXML_XPATH_ENABLED
10465#include <libxml/xpath.h>
10466#endif
10467
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010468extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010469static int xmlParserInitialized = 0;
10470
10471/**
10472 * xmlInitParser:
10473 *
10474 * Initialization function for the XML parser.
10475 * This is not reentrant. Call once before processing in case of
10476 * use in multithreaded programs.
10477 */
10478
10479void
10480xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010481 if (xmlParserInitialized != 0)
10482 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010483
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010484 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10485 (xmlGenericError == NULL))
10486 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010487 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010488 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010489 xmlInitCharEncodingHandlers();
10490 xmlInitializePredefinedEntities();
10491 xmlDefaultSAXHandlerInit();
10492 xmlRegisterDefaultInputCallbacks();
10493 xmlRegisterDefaultOutputCallbacks();
10494#ifdef LIBXML_HTML_ENABLED
10495 htmlInitAutoClose();
10496 htmlDefaultSAXHandlerInit();
10497#endif
10498#ifdef LIBXML_XPATH_ENABLED
10499 xmlXPathInit();
10500#endif
10501 xmlParserInitialized = 1;
10502}
10503
10504/**
10505 * xmlCleanupParser:
10506 *
10507 * Cleanup function for the XML parser. It tries to reclaim all
10508 * parsing related global memory allocated for the parser processing.
10509 * It doesn't deallocate any document related memory. Calling this
10510 * function should not prevent reusing the parser.
10511 */
10512
10513void
10514xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010515 xmlCleanupCharEncodingHandlers();
10516 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010517#ifdef LIBXML_CATALOG_ENABLED
10518 xmlCatalogCleanup();
10519#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010520 xmlCleanupThreads();
10521 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010522}