blob: 4abdb9d7aac8e8261fc86038ba032a2a01ff1842 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Owen Taylor3473f882001-02-23 17:55:21 +0000108/************************************************************************
109 * *
110 * Parser stacks related functions and macros *
111 * *
112 ************************************************************************/
113
114xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
115 const xmlChar ** str);
116
117/*
118 * Generic function for accessing stacks in the Parser Context
119 */
120
121#define PUSH_AND_POP(scope, type, name) \
122scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
123 if (ctxt->name##Nr >= ctxt->name##Max) { \
124 ctxt->name##Max *= 2; \
125 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
126 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
127 if (ctxt->name##Tab == NULL) { \
128 xmlGenericError(xmlGenericErrorContext, \
129 "realloc failed !\n"); \
130 return(0); \
131 } \
132 } \
133 ctxt->name##Tab[ctxt->name##Nr] = value; \
134 ctxt->name = value; \
135 return(ctxt->name##Nr++); \
136} \
137scope type name##Pop(xmlParserCtxtPtr ctxt) { \
138 type ret; \
139 if (ctxt->name##Nr <= 0) return(0); \
140 ctxt->name##Nr--; \
141 if (ctxt->name##Nr > 0) \
142 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
143 else \
144 ctxt->name = NULL; \
145 ret = ctxt->name##Tab[ctxt->name##Nr]; \
146 ctxt->name##Tab[ctxt->name##Nr] = 0; \
147 return(ret); \
148} \
149
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000150/**
151 * inputPop:
152 * @ctxt: an XML parser context
153 *
154 * Pops the top parser input from the input stack
155 *
156 * Returns the input just removed
157 */
158/**
159 * inputPush:
160 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 *
163 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000164 *
165 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000166 */
167/**
168 * namePop:
169 * @ctxt: an XML parser context
170 *
171 * Pops the top element name from the name stack
172 *
173 * Returns the name just removed
174 */
175/**
176 * namePush:
177 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 *
180 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000181 *
182 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 */
184/**
185 * nodePop:
186 * @ctxt: an XML parser context
187 *
188 * Pops the top element node from the node stack
189 *
190 * Returns the node just removed
191 */
192/**
193 * nodePush:
194 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000196 *
197 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000198 *
199 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000200 */
Owen Taylor3473f882001-02-23 17:55:21 +0000201/*
202 * Those macros actually generate the functions
203 */
204PUSH_AND_POP(extern, xmlParserInputPtr, input)
205PUSH_AND_POP(extern, xmlNodePtr, node)
206PUSH_AND_POP(extern, xmlChar*, name)
207
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000208static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000209 if (ctxt->spaceNr >= ctxt->spaceMax) {
210 ctxt->spaceMax *= 2;
211 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
212 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
213 if (ctxt->spaceTab == NULL) {
214 xmlGenericError(xmlGenericErrorContext,
215 "realloc failed !\n");
216 return(0);
217 }
218 }
219 ctxt->spaceTab[ctxt->spaceNr] = val;
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
221 return(ctxt->spaceNr++);
222}
223
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000224static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000225 int ret;
226 if (ctxt->spaceNr <= 0) return(0);
227 ctxt->spaceNr--;
228 if (ctxt->spaceNr > 0)
229 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
230 else
231 ctxt->space = NULL;
232 ret = ctxt->spaceTab[ctxt->spaceNr];
233 ctxt->spaceTab[ctxt->spaceNr] = -1;
234 return(ret);
235}
236
237/*
238 * Macros for accessing the content. Those should be used only by the parser,
239 * and not exported.
240 *
241 * Dirty macros, i.e. one often need to make assumption on the context to
242 * use them
243 *
244 * CUR_PTR return the current pointer to the xmlChar to be parsed.
245 * To be used with extreme caution since operations consuming
246 * characters may move the input buffer to a different location !
247 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
248 * This should be used internally by the parser
249 * only to compare to ASCII values otherwise it would break when
250 * running with UTF-8 encoding.
251 * RAW same as CUR but in the input buffer, bypass any token
252 * extraction that may have been done
253 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
254 * to compare on ASCII based substring.
255 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
256 * strings within the parser.
257 *
258 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
259 *
260 * NEXT Skip to the next character, this does the proper decoding
261 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000262 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000263 * CUR_CHAR(l) returns the current unicode character (int), set l
264 * to the number of xmlChars used for the encoding [0-5].
265 * CUR_SCHAR same but operate on a string instead of the context
266 * COPY_BUF copy the current unicode char to the target buffer, increment
267 * the index
268 * GROW, SHRINK handling of input buffers
269 */
270
Daniel Veillard561b7f82002-03-20 21:55:57 +0000271#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
272#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
Owen Taylor3473f882001-02-23 17:55:21 +0000273#define NXT(val) ctxt->input->cur[(val)]
274#define CUR_PTR ctxt->input->cur
275
276#define SKIP(val) do { \
277 ctxt->nbChars += (val),ctxt->input->cur += (val); \
278 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000279 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
281 xmlPopInput(ctxt); \
282 } while (0)
283
Daniel Veillard46de64e2002-05-29 08:21:33 +0000284#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
285 xmlSHRINK (ctxt);
286
287static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
288 xmlParserInputShrink(ctxt->input);
289 if ((*ctxt->input->cur == 0) &&
290 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
291 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000292 }
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard46de64e2002-05-29 08:21:33 +0000294#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
295 xmlGROW (ctxt);
296
297static void xmlGROW (xmlParserCtxtPtr ctxt) {
298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
299 if ((*ctxt->input->cur == 0) &&
300 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
301 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000302 }
Owen Taylor3473f882001-02-23 17:55:21 +0000303
304#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
305
306#define NEXT xmlNextChar(ctxt)
307
Daniel Veillard21a0f912001-02-25 19:54:14 +0000308#define NEXT1 { \
309 ctxt->input->cur++; \
310 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000311 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000312 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
313 }
314
Owen Taylor3473f882001-02-23 17:55:21 +0000315#define NEXTL(l) do { \
316 if (*(ctxt->input->cur) == '\n') { \
317 ctxt->input->line++; ctxt->input->col = 1; \
318 } else ctxt->input->col++; \
319 ctxt->token = 0; ctxt->input->cur += l; \
320 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000321 } while (0)
322
323#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
324#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
325
326#define COPY_BUF(l,b,i,v) \
327 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000328 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000329
330/**
331 * xmlSkipBlankChars:
332 * @ctxt: the XML parser context
333 *
334 * skip all blanks character found at that point in the input streams.
335 * It pops up finished entities in the process if allowable at that point.
336 *
337 * Returns the number of space chars skipped
338 */
339
340int
341xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000342 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000343
Daniel Veillard02141ea2001-04-30 11:46:40 +0000344 if (ctxt->token != 0) {
345 if (!IS_BLANK(ctxt->token))
346 return(0);
347 ctxt->token = 0;
348 res++;
349 }
Owen Taylor3473f882001-02-23 17:55:21 +0000350 /*
351 * It's Okay to use CUR/NEXT here since all the blanks are on
352 * the ASCII range.
353 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000354 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
355 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000356 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000357 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000358 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000359 cur = ctxt->input->cur;
360 while (IS_BLANK(*cur)) {
361 if (*cur == '\n') {
362 ctxt->input->line++; ctxt->input->col = 1;
363 }
364 cur++;
365 res++;
366 if (*cur == 0) {
367 ctxt->input->cur = cur;
368 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
369 cur = ctxt->input->cur;
370 }
371 }
372 ctxt->input->cur = cur;
373 } else {
374 int cur;
375 do {
376 cur = CUR;
377 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
378 NEXT;
379 cur = CUR;
380 res++;
381 }
382 while ((cur == 0) && (ctxt->inputNr > 1) &&
383 (ctxt->instate != XML_PARSER_COMMENT)) {
384 xmlPopInput(ctxt);
385 cur = CUR;
386 }
387 /*
388 * Need to handle support of entities branching here
389 */
390 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
391 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
392 }
Owen Taylor3473f882001-02-23 17:55:21 +0000393 return(res);
394}
395
396/************************************************************************
397 * *
398 * Commodity functions to handle entities *
399 * *
400 ************************************************************************/
401
402/**
403 * xmlPopInput:
404 * @ctxt: an XML parser context
405 *
406 * xmlPopInput: the current input pointed by ctxt->input came to an end
407 * pop it and return the next char.
408 *
409 * Returns the current xmlChar in the parser context
410 */
411xmlChar
412xmlPopInput(xmlParserCtxtPtr ctxt) {
413 if (ctxt->inputNr == 1) return(0); /* End of main Input */
414 if (xmlParserDebugEntities)
415 xmlGenericError(xmlGenericErrorContext,
416 "Popping input %d\n", ctxt->inputNr);
417 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000418 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000419 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
420 return(xmlPopInput(ctxt));
421 return(CUR);
422}
423
424/**
425 * xmlPushInput:
426 * @ctxt: an XML parser context
427 * @input: an XML parser input fragment (entity, XML fragment ...).
428 *
429 * xmlPushInput: switch to a new input stream which is stacked on top
430 * of the previous one(s).
431 */
432void
433xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
434 if (input == NULL) return;
435
436 if (xmlParserDebugEntities) {
437 if ((ctxt->input != NULL) && (ctxt->input->filename))
438 xmlGenericError(xmlGenericErrorContext,
439 "%s(%d): ", ctxt->input->filename,
440 ctxt->input->line);
441 xmlGenericError(xmlGenericErrorContext,
442 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
443 }
444 inputPush(ctxt, input);
445 GROW;
446}
447
448/**
449 * xmlParseCharRef:
450 * @ctxt: an XML parser context
451 *
452 * parse Reference declarations
453 *
454 * [66] CharRef ::= '&#' [0-9]+ ';' |
455 * '&#x' [0-9a-fA-F]+ ';'
456 *
457 * [ WFC: Legal Character ]
458 * Characters referred to using character references must match the
459 * production for Char.
460 *
461 * Returns the value parsed (as an int), 0 in case of error
462 */
463int
464xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000465 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000466 int count = 0;
467
468 if (ctxt->token != 0) {
469 val = ctxt->token;
470 ctxt->token = 0;
471 return(val);
472 }
473 /*
474 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
475 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000476 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000477 (NXT(2) == 'x')) {
478 SKIP(3);
479 GROW;
480 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000481 if (count++ > 20) {
482 count = 0;
483 GROW;
484 }
485 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000486 val = val * 16 + (CUR - '0');
487 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
488 val = val * 16 + (CUR - 'a') + 10;
489 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
490 val = val * 16 + (CUR - 'A') + 10;
491 else {
492 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
493 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
494 ctxt->sax->error(ctxt->userData,
495 "xmlParseCharRef: invalid hexadecimal value\n");
496 ctxt->wellFormed = 0;
497 ctxt->disableSAX = 1;
498 val = 0;
499 break;
500 }
501 NEXT;
502 count++;
503 }
504 if (RAW == ';') {
505 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
506 ctxt->nbChars ++;
507 ctxt->input->cur++;
508 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000509 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000510 SKIP(2);
511 GROW;
512 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000513 if (count++ > 20) {
514 count = 0;
515 GROW;
516 }
517 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000518 val = val * 10 + (CUR - '0');
519 else {
520 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
522 ctxt->sax->error(ctxt->userData,
523 "xmlParseCharRef: invalid decimal value\n");
524 ctxt->wellFormed = 0;
525 ctxt->disableSAX = 1;
526 val = 0;
527 break;
528 }
529 NEXT;
530 count++;
531 }
532 if (RAW == ';') {
533 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
534 ctxt->nbChars ++;
535 ctxt->input->cur++;
536 }
537 } else {
538 ctxt->errNo = XML_ERR_INVALID_CHARREF;
539 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
540 ctxt->sax->error(ctxt->userData,
541 "xmlParseCharRef: invalid value\n");
542 ctxt->wellFormed = 0;
543 ctxt->disableSAX = 1;
544 }
545
546 /*
547 * [ WFC: Legal Character ]
548 * Characters referred to using character references must match the
549 * production for Char.
550 */
551 if (IS_CHAR(val)) {
552 return(val);
553 } else {
554 ctxt->errNo = XML_ERR_INVALID_CHAR;
555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000556 ctxt->sax->error(ctxt->userData,
557 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000558 val);
559 ctxt->wellFormed = 0;
560 ctxt->disableSAX = 1;
561 }
562 return(0);
563}
564
565/**
566 * xmlParseStringCharRef:
567 * @ctxt: an XML parser context
568 * @str: a pointer to an index in the string
569 *
570 * parse Reference declarations, variant parsing from a string rather
571 * than an an input flow.
572 *
573 * [66] CharRef ::= '&#' [0-9]+ ';' |
574 * '&#x' [0-9a-fA-F]+ ';'
575 *
576 * [ WFC: Legal Character ]
577 * Characters referred to using character references must match the
578 * production for Char.
579 *
580 * Returns the value parsed (as an int), 0 in case of error, str will be
581 * updated to the current value of the index
582 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000583static int
Owen Taylor3473f882001-02-23 17:55:21 +0000584xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
585 const xmlChar *ptr;
586 xmlChar cur;
587 int val = 0;
588
589 if ((str == NULL) || (*str == NULL)) return(0);
590 ptr = *str;
591 cur = *ptr;
592 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
593 ptr += 3;
594 cur = *ptr;
595 while (cur != ';') { /* Non input consuming loop */
596 if ((cur >= '0') && (cur <= '9'))
597 val = val * 16 + (cur - '0');
598 else if ((cur >= 'a') && (cur <= 'f'))
599 val = val * 16 + (cur - 'a') + 10;
600 else if ((cur >= 'A') && (cur <= 'F'))
601 val = val * 16 + (cur - 'A') + 10;
602 else {
603 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
605 ctxt->sax->error(ctxt->userData,
606 "xmlParseStringCharRef: invalid hexadecimal value\n");
607 ctxt->wellFormed = 0;
608 ctxt->disableSAX = 1;
609 val = 0;
610 break;
611 }
612 ptr++;
613 cur = *ptr;
614 }
615 if (cur == ';')
616 ptr++;
617 } else if ((cur == '&') && (ptr[1] == '#')){
618 ptr += 2;
619 cur = *ptr;
620 while (cur != ';') { /* Non input consuming loops */
621 if ((cur >= '0') && (cur <= '9'))
622 val = val * 10 + (cur - '0');
623 else {
624 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
626 ctxt->sax->error(ctxt->userData,
627 "xmlParseStringCharRef: invalid decimal value\n");
628 ctxt->wellFormed = 0;
629 ctxt->disableSAX = 1;
630 val = 0;
631 break;
632 }
633 ptr++;
634 cur = *ptr;
635 }
636 if (cur == ';')
637 ptr++;
638 } else {
639 ctxt->errNo = XML_ERR_INVALID_CHARREF;
640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
641 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000642 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000643 ctxt->wellFormed = 0;
644 ctxt->disableSAX = 1;
645 return(0);
646 }
647 *str = ptr;
648
649 /*
650 * [ WFC: Legal Character ]
651 * Characters referred to using character references must match the
652 * production for Char.
653 */
654 if (IS_CHAR(val)) {
655 return(val);
656 } else {
657 ctxt->errNo = XML_ERR_INVALID_CHAR;
658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
659 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000660 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000661 ctxt->wellFormed = 0;
662 ctxt->disableSAX = 1;
663 }
664 return(0);
665}
666
667/**
668 * xmlParserHandlePEReference:
669 * @ctxt: the parser context
670 *
671 * [69] PEReference ::= '%' Name ';'
672 *
673 * [ WFC: No Recursion ]
674 * A parsed entity must not contain a recursive
675 * reference to itself, either directly or indirectly.
676 *
677 * [ WFC: Entity Declared ]
678 * In a document without any DTD, a document with only an internal DTD
679 * subset which contains no parameter entity references, or a document
680 * with "standalone='yes'", ... ... The declaration of a parameter
681 * entity must precede any reference to it...
682 *
683 * [ VC: Entity Declared ]
684 * In a document with an external subset or external parameter entities
685 * with "standalone='no'", ... ... The declaration of a parameter entity
686 * must precede any reference to it...
687 *
688 * [ WFC: In DTD ]
689 * Parameter-entity references may only appear in the DTD.
690 * NOTE: misleading but this is handled.
691 *
692 * A PEReference may have been detected in the current input stream
693 * the handling is done accordingly to
694 * http://www.w3.org/TR/REC-xml#entproc
695 * i.e.
696 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000697 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000698 */
699void
700xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
701 xmlChar *name;
702 xmlEntityPtr entity = NULL;
703 xmlParserInputPtr input;
704
705 if (ctxt->token != 0) {
706 return;
707 }
708 if (RAW != '%') return;
709 switch(ctxt->instate) {
710 case XML_PARSER_CDATA_SECTION:
711 return;
712 case XML_PARSER_COMMENT:
713 return;
714 case XML_PARSER_START_TAG:
715 return;
716 case XML_PARSER_END_TAG:
717 return;
718 case XML_PARSER_EOF:
719 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
721 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
722 ctxt->wellFormed = 0;
723 ctxt->disableSAX = 1;
724 return;
725 case XML_PARSER_PROLOG:
726 case XML_PARSER_START:
727 case XML_PARSER_MISC:
728 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
730 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
731 ctxt->wellFormed = 0;
732 ctxt->disableSAX = 1;
733 return;
734 case XML_PARSER_ENTITY_DECL:
735 case XML_PARSER_CONTENT:
736 case XML_PARSER_ATTRIBUTE_VALUE:
737 case XML_PARSER_PI:
738 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000739 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000740 /* we just ignore it there */
741 return;
742 case XML_PARSER_EPILOG:
743 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
745 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
746 ctxt->wellFormed = 0;
747 ctxt->disableSAX = 1;
748 return;
749 case XML_PARSER_ENTITY_VALUE:
750 /*
751 * NOTE: in the case of entity values, we don't do the
752 * substitution here since we need the literal
753 * entity value to be able to save the internal
754 * subset of the document.
755 * This will be handled by xmlStringDecodeEntities
756 */
757 return;
758 case XML_PARSER_DTD:
759 /*
760 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
761 * In the internal DTD subset, parameter-entity references
762 * can occur only where markup declarations can occur, not
763 * within markup declarations.
764 * In that case this is handled in xmlParseMarkupDecl
765 */
766 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
767 return;
768 break;
769 case XML_PARSER_IGNORE:
770 return;
771 }
772
773 NEXT;
774 name = xmlParseName(ctxt);
775 if (xmlParserDebugEntities)
776 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000777 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000778 if (name == NULL) {
779 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000781 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000782 ctxt->wellFormed = 0;
783 ctxt->disableSAX = 1;
784 } else {
785 if (RAW == ';') {
786 NEXT;
787 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
788 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
789 if (entity == NULL) {
790
791 /*
792 * [ WFC: Entity Declared ]
793 * In a document without any DTD, a document with only an
794 * internal DTD subset which contains no parameter entity
795 * references, or a document with "standalone='yes'", ...
796 * ... The declaration of a parameter entity must precede
797 * any reference to it...
798 */
799 if ((ctxt->standalone == 1) ||
800 ((ctxt->hasExternalSubset == 0) &&
801 (ctxt->hasPErefs == 0))) {
802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
803 ctxt->sax->error(ctxt->userData,
804 "PEReference: %%%s; not found\n", name);
805 ctxt->wellFormed = 0;
806 ctxt->disableSAX = 1;
807 } else {
808 /*
809 * [ VC: Entity Declared ]
810 * In a document with an external subset or external
811 * parameter entities with "standalone='no'", ...
812 * ... The declaration of a parameter entity must precede
813 * any reference to it...
814 */
815 if ((!ctxt->disableSAX) &&
816 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
817 ctxt->vctxt.error(ctxt->vctxt.userData,
818 "PEReference: %%%s; not found\n", name);
819 } else if ((!ctxt->disableSAX) &&
820 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
821 ctxt->sax->warning(ctxt->userData,
822 "PEReference: %%%s; not found\n", name);
823 ctxt->valid = 0;
824 }
825 } else {
826 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
827 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000828 xmlChar start[4];
829 xmlCharEncoding enc;
830
Owen Taylor3473f882001-02-23 17:55:21 +0000831 /*
832 * handle the extra spaces added before and after
833 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000834 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000835 */
836 input = xmlNewEntityInputStream(ctxt, entity);
837 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000838
839 /*
840 * Get the 4 first bytes and decode the charset
841 * if enc != XML_CHAR_ENCODING_NONE
842 * plug some encoding conversion routines.
843 */
844 GROW
Daniel Veillard561b7f82002-03-20 21:55:57 +0000845 start[0] = RAW;
846 start[1] = NXT(1);
847 start[2] = NXT(2);
848 start[3] = NXT(3);
849 enc = xmlDetectCharEncoding(start, 4);
850 if (enc != XML_CHAR_ENCODING_NONE) {
851 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000852 }
853
Owen Taylor3473f882001-02-23 17:55:21 +0000854 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
855 (RAW == '<') && (NXT(1) == '?') &&
856 (NXT(2) == 'x') && (NXT(3) == 'm') &&
857 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
858 xmlParseTextDecl(ctxt);
859 }
860 if (ctxt->token == 0)
861 ctxt->token = ' ';
862 } else {
863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
864 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000865 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000866 name);
867 ctxt->wellFormed = 0;
868 ctxt->disableSAX = 1;
869 }
870 }
871 } else {
872 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
874 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000875 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000876 ctxt->wellFormed = 0;
877 ctxt->disableSAX = 1;
878 }
879 xmlFree(name);
880 }
881}
882
883/*
884 * Macro used to grow the current buffer.
885 */
886#define growBuffer(buffer) { \
887 buffer##_size *= 2; \
888 buffer = (xmlChar *) \
889 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
890 if (buffer == NULL) { \
891 perror("realloc failed"); \
892 return(NULL); \
893 } \
894}
895
896/**
897 * xmlStringDecodeEntities:
898 * @ctxt: the parser context
899 * @str: the input string
900 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
901 * @end: an end marker xmlChar, 0 if none
902 * @end2: an end marker xmlChar, 0 if none
903 * @end3: an end marker xmlChar, 0 if none
904 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000905 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000906 *
907 * [67] Reference ::= EntityRef | CharRef
908 *
909 * [69] PEReference ::= '%' Name ';'
910 *
911 * Returns A newly allocated string with the substitution done. The caller
912 * must deallocate it !
913 */
914xmlChar *
915xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
916 xmlChar end, xmlChar end2, xmlChar end3) {
917 xmlChar *buffer = NULL;
918 int buffer_size = 0;
919
920 xmlChar *current = NULL;
921 xmlEntityPtr ent;
922 int c,l;
923 int nbchars = 0;
924
925 if (str == NULL)
926 return(NULL);
927
928 if (ctxt->depth > 40) {
929 ctxt->errNo = XML_ERR_ENTITY_LOOP;
930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
931 ctxt->sax->error(ctxt->userData,
932 "Detected entity reference loop\n");
933 ctxt->wellFormed = 0;
934 ctxt->disableSAX = 1;
935 return(NULL);
936 }
937
938 /*
939 * allocate a translation buffer.
940 */
941 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
942 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
943 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000944 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000945 return(NULL);
946 }
947
948 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000949 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000950 * we are operating on already parsed values.
951 */
952 c = CUR_SCHAR(str, l);
953 while ((c != 0) && (c != end) && /* non input consuming loop */
954 (c != end2) && (c != end3)) {
955
956 if (c == 0) break;
957 if ((c == '&') && (str[1] == '#')) {
958 int val = xmlParseStringCharRef(ctxt, &str);
959 if (val != 0) {
960 COPY_BUF(0,buffer,nbchars,val);
961 }
962 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
963 if (xmlParserDebugEntities)
964 xmlGenericError(xmlGenericErrorContext,
965 "String decoding Entity Reference: %.30s\n",
966 str);
967 ent = xmlParseStringEntityRef(ctxt, &str);
968 if ((ent != NULL) &&
969 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
970 if (ent->content != NULL) {
971 COPY_BUF(0,buffer,nbchars,ent->content[0]);
972 } else {
973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
974 ctxt->sax->error(ctxt->userData,
975 "internal error entity has no content\n");
976 }
977 } else if ((ent != NULL) && (ent->content != NULL)) {
978 xmlChar *rep;
979
980 ctxt->depth++;
981 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
982 0, 0, 0);
983 ctxt->depth--;
984 if (rep != NULL) {
985 current = rep;
986 while (*current != 0) { /* non input consuming loop */
987 buffer[nbchars++] = *current++;
988 if (nbchars >
989 buffer_size - XML_PARSER_BUFFER_SIZE) {
990 growBuffer(buffer);
991 }
992 }
993 xmlFree(rep);
994 }
995 } else if (ent != NULL) {
996 int i = xmlStrlen(ent->name);
997 const xmlChar *cur = ent->name;
998
999 buffer[nbchars++] = '&';
1000 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1001 growBuffer(buffer);
1002 }
1003 for (;i > 0;i--)
1004 buffer[nbchars++] = *cur++;
1005 buffer[nbchars++] = ';';
1006 }
1007 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1008 if (xmlParserDebugEntities)
1009 xmlGenericError(xmlGenericErrorContext,
1010 "String decoding PE Reference: %.30s\n", str);
1011 ent = xmlParseStringPEReference(ctxt, &str);
1012 if (ent != NULL) {
1013 xmlChar *rep;
1014
1015 ctxt->depth++;
1016 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1017 0, 0, 0);
1018 ctxt->depth--;
1019 if (rep != NULL) {
1020 current = rep;
1021 while (*current != 0) { /* non input consuming loop */
1022 buffer[nbchars++] = *current++;
1023 if (nbchars >
1024 buffer_size - XML_PARSER_BUFFER_SIZE) {
1025 growBuffer(buffer);
1026 }
1027 }
1028 xmlFree(rep);
1029 }
1030 }
1031 } else {
1032 COPY_BUF(l,buffer,nbchars,c);
1033 str += l;
1034 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1035 growBuffer(buffer);
1036 }
1037 }
1038 c = CUR_SCHAR(str, l);
1039 }
1040 buffer[nbchars++] = 0;
1041 return(buffer);
1042}
1043
1044
1045/************************************************************************
1046 * *
1047 * Commodity functions to handle xmlChars *
1048 * *
1049 ************************************************************************/
1050
1051/**
1052 * xmlStrndup:
1053 * @cur: the input xmlChar *
1054 * @len: the len of @cur
1055 *
1056 * a strndup for array of xmlChar's
1057 *
1058 * Returns a new xmlChar * or NULL
1059 */
1060xmlChar *
1061xmlStrndup(const xmlChar *cur, int len) {
1062 xmlChar *ret;
1063
1064 if ((cur == NULL) || (len < 0)) return(NULL);
1065 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1066 if (ret == NULL) {
1067 xmlGenericError(xmlGenericErrorContext,
1068 "malloc of %ld byte failed\n",
1069 (len + 1) * (long)sizeof(xmlChar));
1070 return(NULL);
1071 }
1072 memcpy(ret, cur, len * sizeof(xmlChar));
1073 ret[len] = 0;
1074 return(ret);
1075}
1076
1077/**
1078 * xmlStrdup:
1079 * @cur: the input xmlChar *
1080 *
1081 * a strdup for array of xmlChar's. Since they are supposed to be
1082 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1083 * a termination mark of '0'.
1084 *
1085 * Returns a new xmlChar * or NULL
1086 */
1087xmlChar *
1088xmlStrdup(const xmlChar *cur) {
1089 const xmlChar *p = cur;
1090
1091 if (cur == NULL) return(NULL);
1092 while (*p != 0) p++; /* non input consuming */
1093 return(xmlStrndup(cur, p - cur));
1094}
1095
1096/**
1097 * xmlCharStrndup:
1098 * @cur: the input char *
1099 * @len: the len of @cur
1100 *
1101 * a strndup for char's to xmlChar's
1102 *
1103 * Returns a new xmlChar * or NULL
1104 */
1105
1106xmlChar *
1107xmlCharStrndup(const char *cur, int len) {
1108 int i;
1109 xmlChar *ret;
1110
1111 if ((cur == NULL) || (len < 0)) return(NULL);
1112 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1113 if (ret == NULL) {
1114 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1115 (len + 1) * (long)sizeof(xmlChar));
1116 return(NULL);
1117 }
1118 for (i = 0;i < len;i++)
1119 ret[i] = (xmlChar) cur[i];
1120 ret[len] = 0;
1121 return(ret);
1122}
1123
1124/**
1125 * xmlCharStrdup:
1126 * @cur: the input char *
1127 * @len: the len of @cur
1128 *
1129 * a strdup for char's to xmlChar's
1130 *
1131 * Returns a new xmlChar * or NULL
1132 */
1133
1134xmlChar *
1135xmlCharStrdup(const char *cur) {
1136 const char *p = cur;
1137
1138 if (cur == NULL) return(NULL);
1139 while (*p != '\0') p++; /* non input consuming */
1140 return(xmlCharStrndup(cur, p - cur));
1141}
1142
1143/**
1144 * xmlStrcmp:
1145 * @str1: the first xmlChar *
1146 * @str2: the second xmlChar *
1147 *
1148 * a strcmp for xmlChar's
1149 *
1150 * Returns the integer result of the comparison
1151 */
1152
1153int
1154xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1155 register int tmp;
1156
1157 if (str1 == str2) return(0);
1158 if (str1 == NULL) return(-1);
1159 if (str2 == NULL) return(1);
1160 do {
1161 tmp = *str1++ - *str2;
1162 if (tmp != 0) return(tmp);
1163 } while (*str2++ != 0);
1164 return 0;
1165}
1166
1167/**
1168 * xmlStrEqual:
1169 * @str1: the first xmlChar *
1170 * @str2: the second xmlChar *
1171 *
1172 * Check if both string are equal of have same content
1173 * Should be a bit more readable and faster than xmlStrEqual()
1174 *
1175 * Returns 1 if they are equal, 0 if they are different
1176 */
1177
1178int
1179xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1180 if (str1 == str2) return(1);
1181 if (str1 == NULL) return(0);
1182 if (str2 == NULL) return(0);
1183 do {
1184 if (*str1++ != *str2) return(0);
1185 } while (*str2++);
1186 return(1);
1187}
1188
1189/**
1190 * xmlStrncmp:
1191 * @str1: the first xmlChar *
1192 * @str2: the second xmlChar *
1193 * @len: the max comparison length
1194 *
1195 * a strncmp for xmlChar's
1196 *
1197 * Returns the integer result of the comparison
1198 */
1199
1200int
1201xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1202 register int tmp;
1203
1204 if (len <= 0) return(0);
1205 if (str1 == str2) return(0);
1206 if (str1 == NULL) return(-1);
1207 if (str2 == NULL) return(1);
1208 do {
1209 tmp = *str1++ - *str2;
1210 if (tmp != 0 || --len == 0) return(tmp);
1211 } while (*str2++ != 0);
1212 return 0;
1213}
1214
Daniel Veillardb44025c2001-10-11 22:55:55 +00001215static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001216 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1217 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1218 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1219 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1220 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1221 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1222 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1223 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1224 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1225 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1226 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1227 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1228 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1229 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1230 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1231 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1232 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1233 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1234 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1235 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1236 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1237 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1238 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1239 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1240 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1241 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1242 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1243 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1244 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1245 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1246 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1247 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1248};
1249
1250/**
1251 * xmlStrcasecmp:
1252 * @str1: the first xmlChar *
1253 * @str2: the second xmlChar *
1254 *
1255 * a strcasecmp for xmlChar's
1256 *
1257 * Returns the integer result of the comparison
1258 */
1259
1260int
1261xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1262 register int tmp;
1263
1264 if (str1 == str2) return(0);
1265 if (str1 == NULL) return(-1);
1266 if (str2 == NULL) return(1);
1267 do {
1268 tmp = casemap[*str1++] - casemap[*str2];
1269 if (tmp != 0) return(tmp);
1270 } while (*str2++ != 0);
1271 return 0;
1272}
1273
1274/**
1275 * xmlStrncasecmp:
1276 * @str1: the first xmlChar *
1277 * @str2: the second xmlChar *
1278 * @len: the max comparison length
1279 *
1280 * a strncasecmp for xmlChar's
1281 *
1282 * Returns the integer result of the comparison
1283 */
1284
1285int
1286xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1287 register int tmp;
1288
1289 if (len <= 0) return(0);
1290 if (str1 == str2) return(0);
1291 if (str1 == NULL) return(-1);
1292 if (str2 == NULL) return(1);
1293 do {
1294 tmp = casemap[*str1++] - casemap[*str2];
1295 if (tmp != 0 || --len == 0) return(tmp);
1296 } while (*str2++ != 0);
1297 return 0;
1298}
1299
1300/**
1301 * xmlStrchr:
1302 * @str: the xmlChar * array
1303 * @val: the xmlChar to search
1304 *
1305 * a strchr for xmlChar's
1306 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001307 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001308 */
1309
1310const xmlChar *
1311xmlStrchr(const xmlChar *str, xmlChar val) {
1312 if (str == NULL) return(NULL);
1313 while (*str != 0) { /* non input consuming */
1314 if (*str == val) return((xmlChar *) str);
1315 str++;
1316 }
1317 return(NULL);
1318}
1319
1320/**
1321 * xmlStrstr:
1322 * @str: the xmlChar * array (haystack)
1323 * @val: the xmlChar to search (needle)
1324 *
1325 * a strstr for xmlChar's
1326 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001327 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001328 */
1329
1330const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001331xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001332 int n;
1333
1334 if (str == NULL) return(NULL);
1335 if (val == NULL) return(NULL);
1336 n = xmlStrlen(val);
1337
1338 if (n == 0) return(str);
1339 while (*str != 0) { /* non input consuming */
1340 if (*str == *val) {
1341 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1342 }
1343 str++;
1344 }
1345 return(NULL);
1346}
1347
1348/**
1349 * xmlStrcasestr:
1350 * @str: the xmlChar * array (haystack)
1351 * @val: the xmlChar to search (needle)
1352 *
1353 * a case-ignoring strstr for xmlChar's
1354 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001355 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001356 */
1357
1358const xmlChar *
1359xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1360 int n;
1361
1362 if (str == NULL) return(NULL);
1363 if (val == NULL) return(NULL);
1364 n = xmlStrlen(val);
1365
1366 if (n == 0) return(str);
1367 while (*str != 0) { /* non input consuming */
1368 if (casemap[*str] == casemap[*val])
1369 if (!xmlStrncasecmp(str, val, n)) return(str);
1370 str++;
1371 }
1372 return(NULL);
1373}
1374
1375/**
1376 * xmlStrsub:
1377 * @str: the xmlChar * array (haystack)
1378 * @start: the index of the first char (zero based)
1379 * @len: the length of the substring
1380 *
1381 * Extract a substring of a given string
1382 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001383 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001384 */
1385
1386xmlChar *
1387xmlStrsub(const xmlChar *str, int start, int len) {
1388 int i;
1389
1390 if (str == NULL) return(NULL);
1391 if (start < 0) return(NULL);
1392 if (len < 0) return(NULL);
1393
1394 for (i = 0;i < start;i++) {
1395 if (*str == 0) return(NULL);
1396 str++;
1397 }
1398 if (*str == 0) return(NULL);
1399 return(xmlStrndup(str, len));
1400}
1401
1402/**
1403 * xmlStrlen:
1404 * @str: the xmlChar * array
1405 *
1406 * length of a xmlChar's string
1407 *
1408 * Returns the number of xmlChar contained in the ARRAY.
1409 */
1410
1411int
1412xmlStrlen(const xmlChar *str) {
1413 int len = 0;
1414
1415 if (str == NULL) return(0);
1416 while (*str != 0) { /* non input consuming */
1417 str++;
1418 len++;
1419 }
1420 return(len);
1421}
1422
1423/**
1424 * xmlStrncat:
1425 * @cur: the original xmlChar * array
1426 * @add: the xmlChar * array added
1427 * @len: the length of @add
1428 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001429 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001430 * first bytes of @add.
1431 *
1432 * Returns a new xmlChar *, the original @cur is reallocated if needed
1433 * and should not be freed
1434 */
1435
1436xmlChar *
1437xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1438 int size;
1439 xmlChar *ret;
1440
1441 if ((add == NULL) || (len == 0))
1442 return(cur);
1443 if (cur == NULL)
1444 return(xmlStrndup(add, len));
1445
1446 size = xmlStrlen(cur);
1447 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1448 if (ret == NULL) {
1449 xmlGenericError(xmlGenericErrorContext,
1450 "xmlStrncat: realloc of %ld byte failed\n",
1451 (size + len + 1) * (long)sizeof(xmlChar));
1452 return(cur);
1453 }
1454 memcpy(&ret[size], add, len * sizeof(xmlChar));
1455 ret[size + len] = 0;
1456 return(ret);
1457}
1458
1459/**
1460 * xmlStrcat:
1461 * @cur: the original xmlChar * array
1462 * @add: the xmlChar * array added
1463 *
1464 * a strcat for array of xmlChar's. Since they are supposed to be
1465 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1466 * a termination mark of '0'.
1467 *
1468 * Returns a new xmlChar * containing the concatenated string.
1469 */
1470xmlChar *
1471xmlStrcat(xmlChar *cur, const xmlChar *add) {
1472 const xmlChar *p = add;
1473
1474 if (add == NULL) return(cur);
1475 if (cur == NULL)
1476 return(xmlStrdup(add));
1477
1478 while (*p != 0) p++; /* non input consuming */
1479 return(xmlStrncat(cur, add, p - add));
1480}
1481
1482/************************************************************************
1483 * *
1484 * Commodity functions, cleanup needed ? *
1485 * *
1486 ************************************************************************/
1487
1488/**
1489 * areBlanks:
1490 * @ctxt: an XML parser context
1491 * @str: a xmlChar *
1492 * @len: the size of @str
1493 *
1494 * Is this a sequence of blank chars that one can ignore ?
1495 *
1496 * Returns 1 if ignorable 0 otherwise.
1497 */
1498
1499static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1500 int i, ret;
1501 xmlNodePtr lastChild;
1502
Daniel Veillard05c13a22001-09-09 08:38:09 +00001503 /*
1504 * Don't spend time trying to differentiate them, the same callback is
1505 * used !
1506 */
1507 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001508 return(0);
1509
Owen Taylor3473f882001-02-23 17:55:21 +00001510 /*
1511 * Check for xml:space value.
1512 */
1513 if (*(ctxt->space) == 1)
1514 return(0);
1515
1516 /*
1517 * Check that the string is made of blanks
1518 */
1519 for (i = 0;i < len;i++)
1520 if (!(IS_BLANK(str[i]))) return(0);
1521
1522 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001523 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001524 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001525 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001526 if (ctxt->myDoc != NULL) {
1527 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1528 if (ret == 0) return(1);
1529 if (ret == 1) return(0);
1530 }
1531
1532 /*
1533 * Otherwise, heuristic :-\
1534 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001535 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001536 if ((ctxt->node->children == NULL) &&
1537 (RAW == '<') && (NXT(1) == '/')) return(0);
1538
1539 lastChild = xmlGetLastChild(ctxt->node);
1540 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001541 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1542 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001543 } else if (xmlNodeIsText(lastChild))
1544 return(0);
1545 else if ((ctxt->node->children != NULL) &&
1546 (xmlNodeIsText(ctxt->node->children)))
1547 return(0);
1548 return(1);
1549}
1550
Owen Taylor3473f882001-02-23 17:55:21 +00001551/************************************************************************
1552 * *
1553 * Extra stuff for namespace support *
1554 * Relates to http://www.w3.org/TR/WD-xml-names *
1555 * *
1556 ************************************************************************/
1557
1558/**
1559 * xmlSplitQName:
1560 * @ctxt: an XML parser context
1561 * @name: an XML parser context
1562 * @prefix: a xmlChar **
1563 *
1564 * parse an UTF8 encoded XML qualified name string
1565 *
1566 * [NS 5] QName ::= (Prefix ':')? LocalPart
1567 *
1568 * [NS 6] Prefix ::= NCName
1569 *
1570 * [NS 7] LocalPart ::= NCName
1571 *
1572 * Returns the local part, and prefix is updated
1573 * to get the Prefix if any.
1574 */
1575
1576xmlChar *
1577xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1578 xmlChar buf[XML_MAX_NAMELEN + 5];
1579 xmlChar *buffer = NULL;
1580 int len = 0;
1581 int max = XML_MAX_NAMELEN;
1582 xmlChar *ret = NULL;
1583 const xmlChar *cur = name;
1584 int c;
1585
1586 *prefix = NULL;
1587
1588#ifndef XML_XML_NAMESPACE
1589 /* xml: prefix is not really a namespace */
1590 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1591 (cur[2] == 'l') && (cur[3] == ':'))
1592 return(xmlStrdup(name));
1593#endif
1594
1595 /* nasty but valid */
1596 if (cur[0] == ':')
1597 return(xmlStrdup(name));
1598
1599 c = *cur++;
1600 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1601 buf[len++] = c;
1602 c = *cur++;
1603 }
1604 if (len >= max) {
1605 /*
1606 * Okay someone managed to make a huge name, so he's ready to pay
1607 * for the processing speed.
1608 */
1609 max = len * 2;
1610
1611 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1612 if (buffer == NULL) {
1613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1614 ctxt->sax->error(ctxt->userData,
1615 "xmlSplitQName: out of memory\n");
1616 return(NULL);
1617 }
1618 memcpy(buffer, buf, len);
1619 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1620 if (len + 10 > max) {
1621 max *= 2;
1622 buffer = (xmlChar *) xmlRealloc(buffer,
1623 max * sizeof(xmlChar));
1624 if (buffer == NULL) {
1625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1626 ctxt->sax->error(ctxt->userData,
1627 "xmlSplitQName: out of memory\n");
1628 return(NULL);
1629 }
1630 }
1631 buffer[len++] = c;
1632 c = *cur++;
1633 }
1634 buffer[len] = 0;
1635 }
1636
1637 if (buffer == NULL)
1638 ret = xmlStrndup(buf, len);
1639 else {
1640 ret = buffer;
1641 buffer = NULL;
1642 max = XML_MAX_NAMELEN;
1643 }
1644
1645
1646 if (c == ':') {
1647 c = *cur++;
1648 if (c == 0) return(ret);
1649 *prefix = ret;
1650 len = 0;
1651
1652 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1653 buf[len++] = c;
1654 c = *cur++;
1655 }
1656 if (len >= max) {
1657 /*
1658 * Okay someone managed to make a huge name, so he's ready to pay
1659 * for the processing speed.
1660 */
1661 max = len * 2;
1662
1663 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1664 if (buffer == NULL) {
1665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1666 ctxt->sax->error(ctxt->userData,
1667 "xmlSplitQName: out of memory\n");
1668 return(NULL);
1669 }
1670 memcpy(buffer, buf, len);
1671 while (c != 0) { /* tested bigname2.xml */
1672 if (len + 10 > max) {
1673 max *= 2;
1674 buffer = (xmlChar *) xmlRealloc(buffer,
1675 max * sizeof(xmlChar));
1676 if (buffer == NULL) {
1677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1678 ctxt->sax->error(ctxt->userData,
1679 "xmlSplitQName: out of memory\n");
1680 return(NULL);
1681 }
1682 }
1683 buffer[len++] = c;
1684 c = *cur++;
1685 }
1686 buffer[len] = 0;
1687 }
1688
1689 if (buffer == NULL)
1690 ret = xmlStrndup(buf, len);
1691 else {
1692 ret = buffer;
1693 }
1694 }
1695
1696 return(ret);
1697}
1698
1699/************************************************************************
1700 * *
1701 * The parser itself *
1702 * Relates to http://www.w3.org/TR/REC-xml *
1703 * *
1704 ************************************************************************/
1705
Daniel Veillard76d66f42001-05-16 21:05:17 +00001706static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001707/**
1708 * xmlParseName:
1709 * @ctxt: an XML parser context
1710 *
1711 * parse an XML name.
1712 *
1713 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1714 * CombiningChar | Extender
1715 *
1716 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1717 *
1718 * [6] Names ::= Name (S Name)*
1719 *
1720 * Returns the Name parsed or NULL
1721 */
1722
1723xmlChar *
1724xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001725 const xmlChar *in;
1726 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001727 int count = 0;
1728
1729 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001730
1731 /*
1732 * Accelerator for simple ASCII names
1733 */
1734 in = ctxt->input->cur;
1735 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1736 ((*in >= 0x41) && (*in <= 0x5A)) ||
1737 (*in == '_') || (*in == ':')) {
1738 in++;
1739 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1740 ((*in >= 0x41) && (*in <= 0x5A)) ||
1741 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001742 (*in == '_') || (*in == '-') ||
1743 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001744 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001745 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001746 count = in - ctxt->input->cur;
1747 ret = xmlStrndup(ctxt->input->cur, count);
1748 ctxt->input->cur = in;
1749 return(ret);
1750 }
1751 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001752 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001753}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001754
Daniel Veillard46de64e2002-05-29 08:21:33 +00001755/**
1756 * xmlParseNameAndCompare:
1757 * @ctxt: an XML parser context
1758 *
1759 * parse an XML name and compares for match
1760 * (specialized for endtag parsing)
1761 *
1762 *
1763 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1764 * and the name for mismatch
1765 */
1766
1767xmlChar *
1768xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1769 const xmlChar *cmp = other;
1770 const xmlChar *in;
1771 xmlChar *ret;
1772 int count = 0;
1773
1774 GROW;
1775
1776 in = ctxt->input->cur;
1777 while (*in != 0 && *in == *cmp) {
1778 ++in;
1779 ++cmp;
1780 }
1781 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1782 /* success */
1783 ctxt->input->cur = in;
1784 return (xmlChar*) 1;
1785 }
1786 /* failure (or end of input buffer), check with full function */
1787 ret = xmlParseName (ctxt);
1788 if (ret != 0 && xmlStrEqual (ret, other)) {
1789 xmlFree (ret);
1790 return (xmlChar*) 1;
1791 }
1792 return ret;
1793}
1794
Daniel Veillard76d66f42001-05-16 21:05:17 +00001795static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001796xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1797 xmlChar buf[XML_MAX_NAMELEN + 5];
1798 int len = 0, l;
1799 int c;
1800 int count = 0;
1801
1802 /*
1803 * Handler for more complex cases
1804 */
1805 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001806 c = CUR_CHAR(l);
1807 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1808 (!IS_LETTER(c) && (c != '_') &&
1809 (c != ':'))) {
1810 return(NULL);
1811 }
1812
1813 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1814 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1815 (c == '.') || (c == '-') ||
1816 (c == '_') || (c == ':') ||
1817 (IS_COMBINING(c)) ||
1818 (IS_EXTENDER(c)))) {
1819 if (count++ > 100) {
1820 count = 0;
1821 GROW;
1822 }
1823 COPY_BUF(l,buf,len,c);
1824 NEXTL(l);
1825 c = CUR_CHAR(l);
1826 if (len >= XML_MAX_NAMELEN) {
1827 /*
1828 * Okay someone managed to make a huge name, so he's ready to pay
1829 * for the processing speed.
1830 */
1831 xmlChar *buffer;
1832 int max = len * 2;
1833
1834 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1835 if (buffer == NULL) {
1836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1837 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001838 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001839 return(NULL);
1840 }
1841 memcpy(buffer, buf, len);
1842 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1843 (c == '.') || (c == '-') ||
1844 (c == '_') || (c == ':') ||
1845 (IS_COMBINING(c)) ||
1846 (IS_EXTENDER(c))) {
1847 if (count++ > 100) {
1848 count = 0;
1849 GROW;
1850 }
1851 if (len + 10 > max) {
1852 max *= 2;
1853 buffer = (xmlChar *) xmlRealloc(buffer,
1854 max * sizeof(xmlChar));
1855 if (buffer == NULL) {
1856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1857 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001858 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001859 return(NULL);
1860 }
1861 }
1862 COPY_BUF(l,buffer,len,c);
1863 NEXTL(l);
1864 c = CUR_CHAR(l);
1865 }
1866 buffer[len] = 0;
1867 return(buffer);
1868 }
1869 }
1870 return(xmlStrndup(buf, len));
1871}
1872
1873/**
1874 * xmlParseStringName:
1875 * @ctxt: an XML parser context
1876 * @str: a pointer to the string pointer (IN/OUT)
1877 *
1878 * parse an XML name.
1879 *
1880 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1881 * CombiningChar | Extender
1882 *
1883 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1884 *
1885 * [6] Names ::= Name (S Name)*
1886 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001887 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001888 * is updated to the current location in the string.
1889 */
1890
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001891static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001892xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1893 xmlChar buf[XML_MAX_NAMELEN + 5];
1894 const xmlChar *cur = *str;
1895 int len = 0, l;
1896 int c;
1897
1898 c = CUR_SCHAR(cur, l);
1899 if (!IS_LETTER(c) && (c != '_') &&
1900 (c != ':')) {
1901 return(NULL);
1902 }
1903
1904 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1905 (c == '.') || (c == '-') ||
1906 (c == '_') || (c == ':') ||
1907 (IS_COMBINING(c)) ||
1908 (IS_EXTENDER(c))) {
1909 COPY_BUF(l,buf,len,c);
1910 cur += l;
1911 c = CUR_SCHAR(cur, l);
1912 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1913 /*
1914 * Okay someone managed to make a huge name, so he's ready to pay
1915 * for the processing speed.
1916 */
1917 xmlChar *buffer;
1918 int max = len * 2;
1919
1920 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1921 if (buffer == NULL) {
1922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1923 ctxt->sax->error(ctxt->userData,
1924 "xmlParseStringName: out of memory\n");
1925 return(NULL);
1926 }
1927 memcpy(buffer, buf, len);
1928 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1929 (c == '.') || (c == '-') ||
1930 (c == '_') || (c == ':') ||
1931 (IS_COMBINING(c)) ||
1932 (IS_EXTENDER(c))) {
1933 if (len + 10 > max) {
1934 max *= 2;
1935 buffer = (xmlChar *) xmlRealloc(buffer,
1936 max * sizeof(xmlChar));
1937 if (buffer == NULL) {
1938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1939 ctxt->sax->error(ctxt->userData,
1940 "xmlParseStringName: out of memory\n");
1941 return(NULL);
1942 }
1943 }
1944 COPY_BUF(l,buffer,len,c);
1945 cur += l;
1946 c = CUR_SCHAR(cur, l);
1947 }
1948 buffer[len] = 0;
1949 *str = cur;
1950 return(buffer);
1951 }
1952 }
1953 *str = cur;
1954 return(xmlStrndup(buf, len));
1955}
1956
1957/**
1958 * xmlParseNmtoken:
1959 * @ctxt: an XML parser context
1960 *
1961 * parse an XML Nmtoken.
1962 *
1963 * [7] Nmtoken ::= (NameChar)+
1964 *
1965 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1966 *
1967 * Returns the Nmtoken parsed or NULL
1968 */
1969
1970xmlChar *
1971xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1972 xmlChar buf[XML_MAX_NAMELEN + 5];
1973 int len = 0, l;
1974 int c;
1975 int count = 0;
1976
1977 GROW;
1978 c = CUR_CHAR(l);
1979
1980 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1981 (c == '.') || (c == '-') ||
1982 (c == '_') || (c == ':') ||
1983 (IS_COMBINING(c)) ||
1984 (IS_EXTENDER(c))) {
1985 if (count++ > 100) {
1986 count = 0;
1987 GROW;
1988 }
1989 COPY_BUF(l,buf,len,c);
1990 NEXTL(l);
1991 c = CUR_CHAR(l);
1992 if (len >= XML_MAX_NAMELEN) {
1993 /*
1994 * Okay someone managed to make a huge token, so he's ready to pay
1995 * for the processing speed.
1996 */
1997 xmlChar *buffer;
1998 int max = len * 2;
1999
2000 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2001 if (buffer == NULL) {
2002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2003 ctxt->sax->error(ctxt->userData,
2004 "xmlParseNmtoken: out of memory\n");
2005 return(NULL);
2006 }
2007 memcpy(buffer, buf, len);
2008 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2009 (c == '.') || (c == '-') ||
2010 (c == '_') || (c == ':') ||
2011 (IS_COMBINING(c)) ||
2012 (IS_EXTENDER(c))) {
2013 if (count++ > 100) {
2014 count = 0;
2015 GROW;
2016 }
2017 if (len + 10 > max) {
2018 max *= 2;
2019 buffer = (xmlChar *) xmlRealloc(buffer,
2020 max * sizeof(xmlChar));
2021 if (buffer == NULL) {
2022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2023 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002024 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002025 return(NULL);
2026 }
2027 }
2028 COPY_BUF(l,buffer,len,c);
2029 NEXTL(l);
2030 c = CUR_CHAR(l);
2031 }
2032 buffer[len] = 0;
2033 return(buffer);
2034 }
2035 }
2036 if (len == 0)
2037 return(NULL);
2038 return(xmlStrndup(buf, len));
2039}
2040
2041/**
2042 * xmlParseEntityValue:
2043 * @ctxt: an XML parser context
2044 * @orig: if non-NULL store a copy of the original entity value
2045 *
2046 * parse a value for ENTITY declarations
2047 *
2048 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2049 * "'" ([^%&'] | PEReference | Reference)* "'"
2050 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002051 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002052 */
2053
2054xmlChar *
2055xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2056 xmlChar *buf = NULL;
2057 int len = 0;
2058 int size = XML_PARSER_BUFFER_SIZE;
2059 int c, l;
2060 xmlChar stop;
2061 xmlChar *ret = NULL;
2062 const xmlChar *cur = NULL;
2063 xmlParserInputPtr input;
2064
2065 if (RAW == '"') stop = '"';
2066 else if (RAW == '\'') stop = '\'';
2067 else {
2068 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2070 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2071 ctxt->wellFormed = 0;
2072 ctxt->disableSAX = 1;
2073 return(NULL);
2074 }
2075 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2076 if (buf == NULL) {
2077 xmlGenericError(xmlGenericErrorContext,
2078 "malloc of %d byte failed\n", size);
2079 return(NULL);
2080 }
2081
2082 /*
2083 * The content of the entity definition is copied in a buffer.
2084 */
2085
2086 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2087 input = ctxt->input;
2088 GROW;
2089 NEXT;
2090 c = CUR_CHAR(l);
2091 /*
2092 * NOTE: 4.4.5 Included in Literal
2093 * When a parameter entity reference appears in a literal entity
2094 * value, ... a single or double quote character in the replacement
2095 * text is always treated as a normal data character and will not
2096 * terminate the literal.
2097 * In practice it means we stop the loop only when back at parsing
2098 * the initial entity and the quote is found
2099 */
2100 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2101 (ctxt->input != input))) {
2102 if (len + 5 >= size) {
2103 size *= 2;
2104 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2105 if (buf == NULL) {
2106 xmlGenericError(xmlGenericErrorContext,
2107 "realloc of %d byte failed\n", size);
2108 return(NULL);
2109 }
2110 }
2111 COPY_BUF(l,buf,len,c);
2112 NEXTL(l);
2113 /*
2114 * Pop-up of finished entities.
2115 */
2116 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2117 xmlPopInput(ctxt);
2118
2119 GROW;
2120 c = CUR_CHAR(l);
2121 if (c == 0) {
2122 GROW;
2123 c = CUR_CHAR(l);
2124 }
2125 }
2126 buf[len] = 0;
2127
2128 /*
2129 * Raise problem w.r.t. '&' and '%' being used in non-entities
2130 * reference constructs. Note Charref will be handled in
2131 * xmlStringDecodeEntities()
2132 */
2133 cur = buf;
2134 while (*cur != 0) { /* non input consuming */
2135 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2136 xmlChar *name;
2137 xmlChar tmp = *cur;
2138
2139 cur++;
2140 name = xmlParseStringName(ctxt, &cur);
2141 if ((name == NULL) || (*cur != ';')) {
2142 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2144 ctxt->sax->error(ctxt->userData,
2145 "EntityValue: '%c' forbidden except for entities references\n",
2146 tmp);
2147 ctxt->wellFormed = 0;
2148 ctxt->disableSAX = 1;
2149 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002150 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2151 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002152 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2154 ctxt->sax->error(ctxt->userData,
2155 "EntityValue: PEReferences forbidden in internal subset\n",
2156 tmp);
2157 ctxt->wellFormed = 0;
2158 ctxt->disableSAX = 1;
2159 }
2160 if (name != NULL)
2161 xmlFree(name);
2162 }
2163 cur++;
2164 }
2165
2166 /*
2167 * Then PEReference entities are substituted.
2168 */
2169 if (c != stop) {
2170 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2172 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2173 ctxt->wellFormed = 0;
2174 ctxt->disableSAX = 1;
2175 xmlFree(buf);
2176 } else {
2177 NEXT;
2178 /*
2179 * NOTE: 4.4.7 Bypassed
2180 * When a general entity reference appears in the EntityValue in
2181 * an entity declaration, it is bypassed and left as is.
2182 * so XML_SUBSTITUTE_REF is not set here.
2183 */
2184 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2185 0, 0, 0);
2186 if (orig != NULL)
2187 *orig = buf;
2188 else
2189 xmlFree(buf);
2190 }
2191
2192 return(ret);
2193}
2194
2195/**
2196 * xmlParseAttValue:
2197 * @ctxt: an XML parser context
2198 *
2199 * parse a value for an attribute
2200 * Note: the parser won't do substitution of entities here, this
2201 * will be handled later in xmlStringGetNodeList
2202 *
2203 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2204 * "'" ([^<&'] | Reference)* "'"
2205 *
2206 * 3.3.3 Attribute-Value Normalization:
2207 * Before the value of an attribute is passed to the application or
2208 * checked for validity, the XML processor must normalize it as follows:
2209 * - a character reference is processed by appending the referenced
2210 * character to the attribute value
2211 * - an entity reference is processed by recursively processing the
2212 * replacement text of the entity
2213 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2214 * appending #x20 to the normalized value, except that only a single
2215 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2216 * parsed entity or the literal entity value of an internal parsed entity
2217 * - other characters are processed by appending them to the normalized value
2218 * If the declared value is not CDATA, then the XML processor must further
2219 * process the normalized attribute value by discarding any leading and
2220 * trailing space (#x20) characters, and by replacing sequences of space
2221 * (#x20) characters by a single space (#x20) character.
2222 * All attributes for which no declaration has been read should be treated
2223 * by a non-validating parser as if declared CDATA.
2224 *
2225 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2226 */
2227
2228xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002229xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2230
2231xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002232xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2233 xmlChar limit = 0;
2234 xmlChar *buf = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002235 xmlChar *in = NULL;
2236 xmlChar *ret = NULL;
2237 SHRINK;
2238 GROW;
2239 in = CUR_PTR;
2240 if (*in != '"' && *in != '\'') {
2241 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2242 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2243 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2244 ctxt->wellFormed = 0;
2245 ctxt->disableSAX = 1;
2246 return(NULL);
2247 }
2248 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2249 limit = *in;
2250 ++in;
2251
2252 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2253 *in != '&' && *in != '<'
2254 ) {
2255 ++in;
2256 }
2257 if (*in != limit) {
2258 return xmlParseAttValueComplex(ctxt);
2259 }
2260 ++in;
2261 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2262 CUR_PTR = in;
2263 return ret;
2264}
2265
2266xmlChar *
2267xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2268 xmlChar limit = 0;
2269 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002270 int len = 0;
2271 int buf_size = 0;
2272 int c, l;
2273 xmlChar *current = NULL;
2274 xmlEntityPtr ent;
2275
2276
2277 SHRINK;
2278 if (NXT(0) == '"') {
2279 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2280 limit = '"';
2281 NEXT;
2282 } else if (NXT(0) == '\'') {
2283 limit = '\'';
2284 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2285 NEXT;
2286 } else {
2287 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2288 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2289 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2290 ctxt->wellFormed = 0;
2291 ctxt->disableSAX = 1;
2292 return(NULL);
2293 }
2294
2295 /*
2296 * allocate a translation buffer.
2297 */
2298 buf_size = XML_PARSER_BUFFER_SIZE;
2299 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2300 if (buf == NULL) {
2301 perror("xmlParseAttValue: malloc failed");
2302 return(NULL);
2303 }
2304
2305 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002306 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002307 */
2308 c = CUR_CHAR(l);
2309 while (((NXT(0) != limit) && /* checked */
2310 (c != '<')) || (ctxt->token != 0)) {
2311 if (c == 0) break;
2312 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002313 if (ctxt->replaceEntities) {
2314 if (len > buf_size - 10) {
2315 growBuffer(buf);
2316 }
2317 buf[len++] = '&';
2318 } else {
2319 /*
2320 * The reparsing will be done in xmlStringGetNodeList()
2321 * called by the attribute() function in SAX.c
2322 */
2323 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002324
Daniel Veillard319a7422001-09-11 09:27:09 +00002325 if (len > buf_size - 10) {
2326 growBuffer(buf);
2327 }
2328 current = &buffer[0];
2329 while (*current != 0) { /* non input consuming */
2330 buf[len++] = *current++;
2331 }
2332 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002333 }
Owen Taylor3473f882001-02-23 17:55:21 +00002334 } else if (c == '&') {
2335 if (NXT(1) == '#') {
2336 int val = xmlParseCharRef(ctxt);
2337 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002338 if (ctxt->replaceEntities) {
2339 if (len > buf_size - 10) {
2340 growBuffer(buf);
2341 }
2342 buf[len++] = '&';
2343 } else {
2344 /*
2345 * The reparsing will be done in xmlStringGetNodeList()
2346 * called by the attribute() function in SAX.c
2347 */
2348 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002349
Daniel Veillard319a7422001-09-11 09:27:09 +00002350 if (len > buf_size - 10) {
2351 growBuffer(buf);
2352 }
2353 current = &buffer[0];
2354 while (*current != 0) { /* non input consuming */
2355 buf[len++] = *current++;
2356 }
Owen Taylor3473f882001-02-23 17:55:21 +00002357 }
2358 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002359 if (len > buf_size - 10) {
2360 growBuffer(buf);
2361 }
Owen Taylor3473f882001-02-23 17:55:21 +00002362 len += xmlCopyChar(0, &buf[len], val);
2363 }
2364 } else {
2365 ent = xmlParseEntityRef(ctxt);
2366 if ((ent != NULL) &&
2367 (ctxt->replaceEntities != 0)) {
2368 xmlChar *rep;
2369
2370 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2371 rep = xmlStringDecodeEntities(ctxt, ent->content,
2372 XML_SUBSTITUTE_REF, 0, 0, 0);
2373 if (rep != NULL) {
2374 current = rep;
2375 while (*current != 0) { /* non input consuming */
2376 buf[len++] = *current++;
2377 if (len > buf_size - 10) {
2378 growBuffer(buf);
2379 }
2380 }
2381 xmlFree(rep);
2382 }
2383 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002384 if (len > buf_size - 10) {
2385 growBuffer(buf);
2386 }
Owen Taylor3473f882001-02-23 17:55:21 +00002387 if (ent->content != NULL)
2388 buf[len++] = ent->content[0];
2389 }
2390 } else if (ent != NULL) {
2391 int i = xmlStrlen(ent->name);
2392 const xmlChar *cur = ent->name;
2393
2394 /*
2395 * This may look absurd but is needed to detect
2396 * entities problems
2397 */
2398 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2399 (ent->content != NULL)) {
2400 xmlChar *rep;
2401 rep = xmlStringDecodeEntities(ctxt, ent->content,
2402 XML_SUBSTITUTE_REF, 0, 0, 0);
2403 if (rep != NULL)
2404 xmlFree(rep);
2405 }
2406
2407 /*
2408 * Just output the reference
2409 */
2410 buf[len++] = '&';
2411 if (len > buf_size - i - 10) {
2412 growBuffer(buf);
2413 }
2414 for (;i > 0;i--)
2415 buf[len++] = *cur++;
2416 buf[len++] = ';';
2417 }
2418 }
2419 } else {
2420 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2421 COPY_BUF(l,buf,len,0x20);
2422 if (len > buf_size - 10) {
2423 growBuffer(buf);
2424 }
2425 } else {
2426 COPY_BUF(l,buf,len,c);
2427 if (len > buf_size - 10) {
2428 growBuffer(buf);
2429 }
2430 }
2431 NEXTL(l);
2432 }
2433 GROW;
2434 c = CUR_CHAR(l);
2435 }
2436 buf[len++] = 0;
2437 if (RAW == '<') {
2438 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2440 ctxt->sax->error(ctxt->userData,
2441 "Unescaped '<' not allowed in attributes values\n");
2442 ctxt->wellFormed = 0;
2443 ctxt->disableSAX = 1;
2444 } else if (RAW != limit) {
2445 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2446 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2447 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2448 ctxt->wellFormed = 0;
2449 ctxt->disableSAX = 1;
2450 } else
2451 NEXT;
2452 return(buf);
2453}
2454
2455/**
2456 * xmlParseSystemLiteral:
2457 * @ctxt: an XML parser context
2458 *
2459 * parse an XML Literal
2460 *
2461 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2462 *
2463 * Returns the SystemLiteral parsed or NULL
2464 */
2465
2466xmlChar *
2467xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2468 xmlChar *buf = NULL;
2469 int len = 0;
2470 int size = XML_PARSER_BUFFER_SIZE;
2471 int cur, l;
2472 xmlChar stop;
2473 int state = ctxt->instate;
2474 int count = 0;
2475
2476 SHRINK;
2477 if (RAW == '"') {
2478 NEXT;
2479 stop = '"';
2480 } else if (RAW == '\'') {
2481 NEXT;
2482 stop = '\'';
2483 } else {
2484 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2486 ctxt->sax->error(ctxt->userData,
2487 "SystemLiteral \" or ' expected\n");
2488 ctxt->wellFormed = 0;
2489 ctxt->disableSAX = 1;
2490 return(NULL);
2491 }
2492
2493 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2494 if (buf == NULL) {
2495 xmlGenericError(xmlGenericErrorContext,
2496 "malloc of %d byte failed\n", size);
2497 return(NULL);
2498 }
2499 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2500 cur = CUR_CHAR(l);
2501 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2502 if (len + 5 >= size) {
2503 size *= 2;
2504 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2505 if (buf == NULL) {
2506 xmlGenericError(xmlGenericErrorContext,
2507 "realloc of %d byte failed\n", size);
2508 ctxt->instate = (xmlParserInputState) state;
2509 return(NULL);
2510 }
2511 }
2512 count++;
2513 if (count > 50) {
2514 GROW;
2515 count = 0;
2516 }
2517 COPY_BUF(l,buf,len,cur);
2518 NEXTL(l);
2519 cur = CUR_CHAR(l);
2520 if (cur == 0) {
2521 GROW;
2522 SHRINK;
2523 cur = CUR_CHAR(l);
2524 }
2525 }
2526 buf[len] = 0;
2527 ctxt->instate = (xmlParserInputState) state;
2528 if (!IS_CHAR(cur)) {
2529 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2531 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2532 ctxt->wellFormed = 0;
2533 ctxt->disableSAX = 1;
2534 } else {
2535 NEXT;
2536 }
2537 return(buf);
2538}
2539
2540/**
2541 * xmlParsePubidLiteral:
2542 * @ctxt: an XML parser context
2543 *
2544 * parse an XML public literal
2545 *
2546 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2547 *
2548 * Returns the PubidLiteral parsed or NULL.
2549 */
2550
2551xmlChar *
2552xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2553 xmlChar *buf = NULL;
2554 int len = 0;
2555 int size = XML_PARSER_BUFFER_SIZE;
2556 xmlChar cur;
2557 xmlChar stop;
2558 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002559 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002560
2561 SHRINK;
2562 if (RAW == '"') {
2563 NEXT;
2564 stop = '"';
2565 } else if (RAW == '\'') {
2566 NEXT;
2567 stop = '\'';
2568 } else {
2569 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2571 ctxt->sax->error(ctxt->userData,
2572 "SystemLiteral \" or ' expected\n");
2573 ctxt->wellFormed = 0;
2574 ctxt->disableSAX = 1;
2575 return(NULL);
2576 }
2577 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2578 if (buf == NULL) {
2579 xmlGenericError(xmlGenericErrorContext,
2580 "malloc of %d byte failed\n", size);
2581 return(NULL);
2582 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002583 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002584 cur = CUR;
2585 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2586 if (len + 1 >= size) {
2587 size *= 2;
2588 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2589 if (buf == NULL) {
2590 xmlGenericError(xmlGenericErrorContext,
2591 "realloc of %d byte failed\n", size);
2592 return(NULL);
2593 }
2594 }
2595 buf[len++] = cur;
2596 count++;
2597 if (count > 50) {
2598 GROW;
2599 count = 0;
2600 }
2601 NEXT;
2602 cur = CUR;
2603 if (cur == 0) {
2604 GROW;
2605 SHRINK;
2606 cur = CUR;
2607 }
2608 }
2609 buf[len] = 0;
2610 if (cur != stop) {
2611 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2613 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2614 ctxt->wellFormed = 0;
2615 ctxt->disableSAX = 1;
2616 } else {
2617 NEXT;
2618 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002619 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002620 return(buf);
2621}
2622
Daniel Veillard48b2f892001-02-25 16:11:03 +00002623void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002624/**
2625 * xmlParseCharData:
2626 * @ctxt: an XML parser context
2627 * @cdata: int indicating whether we are within a CDATA section
2628 *
2629 * parse a CharData section.
2630 * if we are within a CDATA section ']]>' marks an end of section.
2631 *
2632 * The right angle bracket (>) may be represented using the string "&gt;",
2633 * and must, for compatibility, be escaped using "&gt;" or a character
2634 * reference when it appears in the string "]]>" in content, when that
2635 * string is not marking the end of a CDATA section.
2636 *
2637 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2638 */
2639
2640void
2641xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002642 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002643 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002644 int line = ctxt->input->line;
2645 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002646
2647 SHRINK;
2648 GROW;
2649 /*
2650 * Accelerated common case where input don't need to be
2651 * modified before passing it to the handler.
2652 */
2653 if ((ctxt->token == 0) && (!cdata)) {
2654 in = ctxt->input->cur;
2655 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002656get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002657 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2658 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002659 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002660 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002661 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002662 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002663 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002664 ctxt->input->line++;
2665 in++;
2666 }
2667 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002668 }
2669 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002670 if ((in[1] == ']') && (in[2] == '>')) {
2671 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2673 ctxt->sax->error(ctxt->userData,
2674 "Sequence ']]>' not allowed in content\n");
2675 ctxt->input->cur = in;
2676 ctxt->wellFormed = 0;
2677 ctxt->disableSAX = 1;
2678 return;
2679 }
2680 in++;
2681 goto get_more;
2682 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002683 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002684 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002685 if (IS_BLANK(*ctxt->input->cur)) {
2686 const xmlChar *tmp = ctxt->input->cur;
2687 ctxt->input->cur = in;
2688 if (areBlanks(ctxt, tmp, nbchar)) {
2689 if (ctxt->sax->ignorableWhitespace != NULL)
2690 ctxt->sax->ignorableWhitespace(ctxt->userData,
2691 tmp, nbchar);
2692 } else {
2693 if (ctxt->sax->characters != NULL)
2694 ctxt->sax->characters(ctxt->userData,
2695 tmp, nbchar);
2696 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002697 line = ctxt->input->line;
2698 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002699 } else {
2700 if (ctxt->sax->characters != NULL)
2701 ctxt->sax->characters(ctxt->userData,
2702 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002703 line = ctxt->input->line;
2704 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002705 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002706 }
2707 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002708 if (*in == 0xD) {
2709 in++;
2710 if (*in == 0xA) {
2711 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002712 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002713 ctxt->input->line++;
2714 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002715 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002716 in--;
2717 }
2718 if (*in == '<') {
2719 return;
2720 }
2721 if (*in == '&') {
2722 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002723 }
2724 SHRINK;
2725 GROW;
2726 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002727 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002728 nbchar = 0;
2729 }
Daniel Veillard50582112001-03-26 22:52:16 +00002730 ctxt->input->line = line;
2731 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002732 xmlParseCharDataComplex(ctxt, cdata);
2733}
2734
2735void
2736xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002737 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2738 int nbchar = 0;
2739 int cur, l;
2740 int count = 0;
2741
2742 SHRINK;
2743 GROW;
2744 cur = CUR_CHAR(l);
2745 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2746 ((cur != '&') || (ctxt->token == '&')) &&
2747 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2748 if ((cur == ']') && (NXT(1) == ']') &&
2749 (NXT(2) == '>')) {
2750 if (cdata) break;
2751 else {
2752 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2753 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2754 ctxt->sax->error(ctxt->userData,
2755 "Sequence ']]>' not allowed in content\n");
2756 /* Should this be relaxed ??? I see a "must here */
2757 ctxt->wellFormed = 0;
2758 ctxt->disableSAX = 1;
2759 }
2760 }
2761 COPY_BUF(l,buf,nbchar,cur);
2762 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2763 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002764 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002765 */
2766 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2767 if (areBlanks(ctxt, buf, nbchar)) {
2768 if (ctxt->sax->ignorableWhitespace != NULL)
2769 ctxt->sax->ignorableWhitespace(ctxt->userData,
2770 buf, nbchar);
2771 } else {
2772 if (ctxt->sax->characters != NULL)
2773 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2774 }
2775 }
2776 nbchar = 0;
2777 }
2778 count++;
2779 if (count > 50) {
2780 GROW;
2781 count = 0;
2782 }
2783 NEXTL(l);
2784 cur = CUR_CHAR(l);
2785 }
2786 if (nbchar != 0) {
2787 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002788 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002789 */
2790 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2791 if (areBlanks(ctxt, buf, nbchar)) {
2792 if (ctxt->sax->ignorableWhitespace != NULL)
2793 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2794 } else {
2795 if (ctxt->sax->characters != NULL)
2796 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2797 }
2798 }
2799 }
2800}
2801
2802/**
2803 * xmlParseExternalID:
2804 * @ctxt: an XML parser context
2805 * @publicID: a xmlChar** receiving PubidLiteral
2806 * @strict: indicate whether we should restrict parsing to only
2807 * production [75], see NOTE below
2808 *
2809 * Parse an External ID or a Public ID
2810 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002811 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002812 * 'PUBLIC' S PubidLiteral S SystemLiteral
2813 *
2814 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2815 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2816 *
2817 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2818 *
2819 * Returns the function returns SystemLiteral and in the second
2820 * case publicID receives PubidLiteral, is strict is off
2821 * it is possible to return NULL and have publicID set.
2822 */
2823
2824xmlChar *
2825xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2826 xmlChar *URI = NULL;
2827
2828 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002829
2830 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002831 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2832 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2833 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2834 SKIP(6);
2835 if (!IS_BLANK(CUR)) {
2836 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2838 ctxt->sax->error(ctxt->userData,
2839 "Space required after 'SYSTEM'\n");
2840 ctxt->wellFormed = 0;
2841 ctxt->disableSAX = 1;
2842 }
2843 SKIP_BLANKS;
2844 URI = xmlParseSystemLiteral(ctxt);
2845 if (URI == NULL) {
2846 ctxt->errNo = XML_ERR_URI_REQUIRED;
2847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2848 ctxt->sax->error(ctxt->userData,
2849 "xmlParseExternalID: SYSTEM, no URI\n");
2850 ctxt->wellFormed = 0;
2851 ctxt->disableSAX = 1;
2852 }
2853 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2854 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2855 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2856 SKIP(6);
2857 if (!IS_BLANK(CUR)) {
2858 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2860 ctxt->sax->error(ctxt->userData,
2861 "Space required after 'PUBLIC'\n");
2862 ctxt->wellFormed = 0;
2863 ctxt->disableSAX = 1;
2864 }
2865 SKIP_BLANKS;
2866 *publicID = xmlParsePubidLiteral(ctxt);
2867 if (*publicID == NULL) {
2868 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2870 ctxt->sax->error(ctxt->userData,
2871 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2872 ctxt->wellFormed = 0;
2873 ctxt->disableSAX = 1;
2874 }
2875 if (strict) {
2876 /*
2877 * We don't handle [83] so "S SystemLiteral" is required.
2878 */
2879 if (!IS_BLANK(CUR)) {
2880 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2882 ctxt->sax->error(ctxt->userData,
2883 "Space required after the Public Identifier\n");
2884 ctxt->wellFormed = 0;
2885 ctxt->disableSAX = 1;
2886 }
2887 } else {
2888 /*
2889 * We handle [83] so we return immediately, if
2890 * "S SystemLiteral" is not detected. From a purely parsing
2891 * point of view that's a nice mess.
2892 */
2893 const xmlChar *ptr;
2894 GROW;
2895
2896 ptr = CUR_PTR;
2897 if (!IS_BLANK(*ptr)) return(NULL);
2898
2899 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2900 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2901 }
2902 SKIP_BLANKS;
2903 URI = xmlParseSystemLiteral(ctxt);
2904 if (URI == NULL) {
2905 ctxt->errNo = XML_ERR_URI_REQUIRED;
2906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2907 ctxt->sax->error(ctxt->userData,
2908 "xmlParseExternalID: PUBLIC, no URI\n");
2909 ctxt->wellFormed = 0;
2910 ctxt->disableSAX = 1;
2911 }
2912 }
2913 return(URI);
2914}
2915
2916/**
2917 * xmlParseComment:
2918 * @ctxt: an XML parser context
2919 *
2920 * Skip an XML (SGML) comment <!-- .... -->
2921 * The spec says that "For compatibility, the string "--" (double-hyphen)
2922 * must not occur within comments. "
2923 *
2924 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2925 */
2926void
2927xmlParseComment(xmlParserCtxtPtr ctxt) {
2928 xmlChar *buf = NULL;
2929 int len;
2930 int size = XML_PARSER_BUFFER_SIZE;
2931 int q, ql;
2932 int r, rl;
2933 int cur, l;
2934 xmlParserInputState state;
2935 xmlParserInputPtr input = ctxt->input;
2936 int count = 0;
2937
2938 /*
2939 * Check that there is a comment right here.
2940 */
2941 if ((RAW != '<') || (NXT(1) != '!') ||
2942 (NXT(2) != '-') || (NXT(3) != '-')) return;
2943
2944 state = ctxt->instate;
2945 ctxt->instate = XML_PARSER_COMMENT;
2946 SHRINK;
2947 SKIP(4);
2948 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2949 if (buf == NULL) {
2950 xmlGenericError(xmlGenericErrorContext,
2951 "malloc of %d byte failed\n", size);
2952 ctxt->instate = state;
2953 return;
2954 }
2955 q = CUR_CHAR(ql);
2956 NEXTL(ql);
2957 r = CUR_CHAR(rl);
2958 NEXTL(rl);
2959 cur = CUR_CHAR(l);
2960 len = 0;
2961 while (IS_CHAR(cur) && /* checked */
2962 ((cur != '>') ||
2963 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002964 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002965 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2967 ctxt->sax->error(ctxt->userData,
2968 "Comment must not contain '--' (double-hyphen)`\n");
2969 ctxt->wellFormed = 0;
2970 ctxt->disableSAX = 1;
2971 }
2972 if (len + 5 >= size) {
2973 size *= 2;
2974 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2975 if (buf == NULL) {
2976 xmlGenericError(xmlGenericErrorContext,
2977 "realloc of %d byte failed\n", size);
2978 ctxt->instate = state;
2979 return;
2980 }
2981 }
2982 COPY_BUF(ql,buf,len,q);
2983 q = r;
2984 ql = rl;
2985 r = cur;
2986 rl = l;
2987
2988 count++;
2989 if (count > 50) {
2990 GROW;
2991 count = 0;
2992 }
2993 NEXTL(l);
2994 cur = CUR_CHAR(l);
2995 if (cur == 0) {
2996 SHRINK;
2997 GROW;
2998 cur = CUR_CHAR(l);
2999 }
3000 }
3001 buf[len] = 0;
3002 if (!IS_CHAR(cur)) {
3003 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3005 ctxt->sax->error(ctxt->userData,
3006 "Comment not terminated \n<!--%.50s\n", buf);
3007 ctxt->wellFormed = 0;
3008 ctxt->disableSAX = 1;
3009 xmlFree(buf);
3010 } else {
3011 if (input != ctxt->input) {
3012 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3014 ctxt->sax->error(ctxt->userData,
3015"Comment doesn't start and stop in the same entity\n");
3016 ctxt->wellFormed = 0;
3017 ctxt->disableSAX = 1;
3018 }
3019 NEXT;
3020 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3021 (!ctxt->disableSAX))
3022 ctxt->sax->comment(ctxt->userData, buf);
3023 xmlFree(buf);
3024 }
3025 ctxt->instate = state;
3026}
3027
3028/**
3029 * xmlParsePITarget:
3030 * @ctxt: an XML parser context
3031 *
3032 * parse the name of a PI
3033 *
3034 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3035 *
3036 * Returns the PITarget name or NULL
3037 */
3038
3039xmlChar *
3040xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3041 xmlChar *name;
3042
3043 name = xmlParseName(ctxt);
3044 if ((name != NULL) &&
3045 ((name[0] == 'x') || (name[0] == 'X')) &&
3046 ((name[1] == 'm') || (name[1] == 'M')) &&
3047 ((name[2] == 'l') || (name[2] == 'L'))) {
3048 int i;
3049 if ((name[0] == 'x') && (name[1] == 'm') &&
3050 (name[2] == 'l') && (name[3] == 0)) {
3051 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3053 ctxt->sax->error(ctxt->userData,
3054 "XML declaration allowed only at the start of the document\n");
3055 ctxt->wellFormed = 0;
3056 ctxt->disableSAX = 1;
3057 return(name);
3058 } else if (name[3] == 0) {
3059 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3061 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3062 ctxt->wellFormed = 0;
3063 ctxt->disableSAX = 1;
3064 return(name);
3065 }
3066 for (i = 0;;i++) {
3067 if (xmlW3CPIs[i] == NULL) break;
3068 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3069 return(name);
3070 }
3071 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3072 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3073 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003074 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003075 }
3076 }
3077 return(name);
3078}
3079
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003080#ifdef LIBXML_CATALOG_ENABLED
3081/**
3082 * xmlParseCatalogPI:
3083 * @ctxt: an XML parser context
3084 * @catalog: the PI value string
3085 *
3086 * parse an XML Catalog Processing Instruction.
3087 *
3088 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3089 *
3090 * Occurs only if allowed by the user and if happening in the Misc
3091 * part of the document before any doctype informations
3092 * This will add the given catalog to the parsing context in order
3093 * to be used if there is a resolution need further down in the document
3094 */
3095
3096static void
3097xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3098 xmlChar *URL = NULL;
3099 const xmlChar *tmp, *base;
3100 xmlChar marker;
3101
3102 tmp = catalog;
3103 while (IS_BLANK(*tmp)) tmp++;
3104 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3105 goto error;
3106 tmp += 7;
3107 while (IS_BLANK(*tmp)) tmp++;
3108 if (*tmp != '=') {
3109 return;
3110 }
3111 tmp++;
3112 while (IS_BLANK(*tmp)) tmp++;
3113 marker = *tmp;
3114 if ((marker != '\'') && (marker != '"'))
3115 goto error;
3116 tmp++;
3117 base = tmp;
3118 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3119 if (*tmp == 0)
3120 goto error;
3121 URL = xmlStrndup(base, tmp - base);
3122 tmp++;
3123 while (IS_BLANK(*tmp)) tmp++;
3124 if (*tmp != 0)
3125 goto error;
3126
3127 if (URL != NULL) {
3128 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3129 xmlFree(URL);
3130 }
3131 return;
3132
3133error:
3134 ctxt->errNo = XML_WAR_CATALOG_PI;
3135 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3136 ctxt->sax->warning(ctxt->userData,
3137 "Catalog PI syntax error: %s\n", catalog);
3138 if (URL != NULL)
3139 xmlFree(URL);
3140}
3141#endif
3142
Owen Taylor3473f882001-02-23 17:55:21 +00003143/**
3144 * xmlParsePI:
3145 * @ctxt: an XML parser context
3146 *
3147 * parse an XML Processing Instruction.
3148 *
3149 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3150 *
3151 * The processing is transfered to SAX once parsed.
3152 */
3153
3154void
3155xmlParsePI(xmlParserCtxtPtr ctxt) {
3156 xmlChar *buf = NULL;
3157 int len = 0;
3158 int size = XML_PARSER_BUFFER_SIZE;
3159 int cur, l;
3160 xmlChar *target;
3161 xmlParserInputState state;
3162 int count = 0;
3163
3164 if ((RAW == '<') && (NXT(1) == '?')) {
3165 xmlParserInputPtr input = ctxt->input;
3166 state = ctxt->instate;
3167 ctxt->instate = XML_PARSER_PI;
3168 /*
3169 * this is a Processing Instruction.
3170 */
3171 SKIP(2);
3172 SHRINK;
3173
3174 /*
3175 * Parse the target name and check for special support like
3176 * namespace.
3177 */
3178 target = xmlParsePITarget(ctxt);
3179 if (target != NULL) {
3180 if ((RAW == '?') && (NXT(1) == '>')) {
3181 if (input != ctxt->input) {
3182 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3184 ctxt->sax->error(ctxt->userData,
3185 "PI declaration doesn't start and stop in the same entity\n");
3186 ctxt->wellFormed = 0;
3187 ctxt->disableSAX = 1;
3188 }
3189 SKIP(2);
3190
3191 /*
3192 * SAX: PI detected.
3193 */
3194 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3195 (ctxt->sax->processingInstruction != NULL))
3196 ctxt->sax->processingInstruction(ctxt->userData,
3197 target, NULL);
3198 ctxt->instate = state;
3199 xmlFree(target);
3200 return;
3201 }
3202 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3203 if (buf == NULL) {
3204 xmlGenericError(xmlGenericErrorContext,
3205 "malloc of %d byte failed\n", size);
3206 ctxt->instate = state;
3207 return;
3208 }
3209 cur = CUR;
3210 if (!IS_BLANK(cur)) {
3211 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3213 ctxt->sax->error(ctxt->userData,
3214 "xmlParsePI: PI %s space expected\n", target);
3215 ctxt->wellFormed = 0;
3216 ctxt->disableSAX = 1;
3217 }
3218 SKIP_BLANKS;
3219 cur = CUR_CHAR(l);
3220 while (IS_CHAR(cur) && /* checked */
3221 ((cur != '?') || (NXT(1) != '>'))) {
3222 if (len + 5 >= size) {
3223 size *= 2;
3224 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3225 if (buf == NULL) {
3226 xmlGenericError(xmlGenericErrorContext,
3227 "realloc of %d byte failed\n", size);
3228 ctxt->instate = state;
3229 return;
3230 }
3231 }
3232 count++;
3233 if (count > 50) {
3234 GROW;
3235 count = 0;
3236 }
3237 COPY_BUF(l,buf,len,cur);
3238 NEXTL(l);
3239 cur = CUR_CHAR(l);
3240 if (cur == 0) {
3241 SHRINK;
3242 GROW;
3243 cur = CUR_CHAR(l);
3244 }
3245 }
3246 buf[len] = 0;
3247 if (cur != '?') {
3248 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3250 ctxt->sax->error(ctxt->userData,
3251 "xmlParsePI: PI %s never end ...\n", target);
3252 ctxt->wellFormed = 0;
3253 ctxt->disableSAX = 1;
3254 } else {
3255 if (input != ctxt->input) {
3256 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3258 ctxt->sax->error(ctxt->userData,
3259 "PI declaration doesn't start and stop in the same entity\n");
3260 ctxt->wellFormed = 0;
3261 ctxt->disableSAX = 1;
3262 }
3263 SKIP(2);
3264
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003265#ifdef LIBXML_CATALOG_ENABLED
3266 if (((state == XML_PARSER_MISC) ||
3267 (state == XML_PARSER_START)) &&
3268 (xmlStrEqual(target, XML_CATALOG_PI))) {
3269 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3270 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3271 (allow == XML_CATA_ALLOW_ALL))
3272 xmlParseCatalogPI(ctxt, buf);
3273 }
3274#endif
3275
3276
Owen Taylor3473f882001-02-23 17:55:21 +00003277 /*
3278 * SAX: PI detected.
3279 */
3280 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3281 (ctxt->sax->processingInstruction != NULL))
3282 ctxt->sax->processingInstruction(ctxt->userData,
3283 target, buf);
3284 }
3285 xmlFree(buf);
3286 xmlFree(target);
3287 } else {
3288 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3290 ctxt->sax->error(ctxt->userData,
3291 "xmlParsePI : no target name\n");
3292 ctxt->wellFormed = 0;
3293 ctxt->disableSAX = 1;
3294 }
3295 ctxt->instate = state;
3296 }
3297}
3298
3299/**
3300 * xmlParseNotationDecl:
3301 * @ctxt: an XML parser context
3302 *
3303 * parse a notation declaration
3304 *
3305 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3306 *
3307 * Hence there is actually 3 choices:
3308 * 'PUBLIC' S PubidLiteral
3309 * 'PUBLIC' S PubidLiteral S SystemLiteral
3310 * and 'SYSTEM' S SystemLiteral
3311 *
3312 * See the NOTE on xmlParseExternalID().
3313 */
3314
3315void
3316xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3317 xmlChar *name;
3318 xmlChar *Pubid;
3319 xmlChar *Systemid;
3320
3321 if ((RAW == '<') && (NXT(1) == '!') &&
3322 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3323 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3324 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3325 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3326 xmlParserInputPtr input = ctxt->input;
3327 SHRINK;
3328 SKIP(10);
3329 if (!IS_BLANK(CUR)) {
3330 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3332 ctxt->sax->error(ctxt->userData,
3333 "Space required after '<!NOTATION'\n");
3334 ctxt->wellFormed = 0;
3335 ctxt->disableSAX = 1;
3336 return;
3337 }
3338 SKIP_BLANKS;
3339
Daniel Veillard76d66f42001-05-16 21:05:17 +00003340 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003341 if (name == NULL) {
3342 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3344 ctxt->sax->error(ctxt->userData,
3345 "NOTATION: Name expected here\n");
3346 ctxt->wellFormed = 0;
3347 ctxt->disableSAX = 1;
3348 return;
3349 }
3350 if (!IS_BLANK(CUR)) {
3351 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3353 ctxt->sax->error(ctxt->userData,
3354 "Space required after the NOTATION name'\n");
3355 ctxt->wellFormed = 0;
3356 ctxt->disableSAX = 1;
3357 return;
3358 }
3359 SKIP_BLANKS;
3360
3361 /*
3362 * Parse the IDs.
3363 */
3364 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3365 SKIP_BLANKS;
3366
3367 if (RAW == '>') {
3368 if (input != ctxt->input) {
3369 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3370 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3371 ctxt->sax->error(ctxt->userData,
3372"Notation declaration doesn't start and stop in the same entity\n");
3373 ctxt->wellFormed = 0;
3374 ctxt->disableSAX = 1;
3375 }
3376 NEXT;
3377 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3378 (ctxt->sax->notationDecl != NULL))
3379 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3380 } else {
3381 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3383 ctxt->sax->error(ctxt->userData,
3384 "'>' required to close NOTATION declaration\n");
3385 ctxt->wellFormed = 0;
3386 ctxt->disableSAX = 1;
3387 }
3388 xmlFree(name);
3389 if (Systemid != NULL) xmlFree(Systemid);
3390 if (Pubid != NULL) xmlFree(Pubid);
3391 }
3392}
3393
3394/**
3395 * xmlParseEntityDecl:
3396 * @ctxt: an XML parser context
3397 *
3398 * parse <!ENTITY declarations
3399 *
3400 * [70] EntityDecl ::= GEDecl | PEDecl
3401 *
3402 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3403 *
3404 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3405 *
3406 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3407 *
3408 * [74] PEDef ::= EntityValue | ExternalID
3409 *
3410 * [76] NDataDecl ::= S 'NDATA' S Name
3411 *
3412 * [ VC: Notation Declared ]
3413 * The Name must match the declared name of a notation.
3414 */
3415
3416void
3417xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3418 xmlChar *name = NULL;
3419 xmlChar *value = NULL;
3420 xmlChar *URI = NULL, *literal = NULL;
3421 xmlChar *ndata = NULL;
3422 int isParameter = 0;
3423 xmlChar *orig = NULL;
3424
3425 GROW;
3426 if ((RAW == '<') && (NXT(1) == '!') &&
3427 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3428 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3429 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3430 xmlParserInputPtr input = ctxt->input;
3431 ctxt->instate = XML_PARSER_ENTITY_DECL;
3432 SHRINK;
3433 SKIP(8);
3434 if (!IS_BLANK(CUR)) {
3435 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3437 ctxt->sax->error(ctxt->userData,
3438 "Space required after '<!ENTITY'\n");
3439 ctxt->wellFormed = 0;
3440 ctxt->disableSAX = 1;
3441 }
3442 SKIP_BLANKS;
3443
3444 if (RAW == '%') {
3445 NEXT;
3446 if (!IS_BLANK(CUR)) {
3447 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3448 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3449 ctxt->sax->error(ctxt->userData,
3450 "Space required after '%'\n");
3451 ctxt->wellFormed = 0;
3452 ctxt->disableSAX = 1;
3453 }
3454 SKIP_BLANKS;
3455 isParameter = 1;
3456 }
3457
Daniel Veillard76d66f42001-05-16 21:05:17 +00003458 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003459 if (name == NULL) {
3460 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3462 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3463 ctxt->wellFormed = 0;
3464 ctxt->disableSAX = 1;
3465 return;
3466 }
3467 if (!IS_BLANK(CUR)) {
3468 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3470 ctxt->sax->error(ctxt->userData,
3471 "Space required after the entity name\n");
3472 ctxt->wellFormed = 0;
3473 ctxt->disableSAX = 1;
3474 }
3475 SKIP_BLANKS;
3476
3477 /*
3478 * handle the various case of definitions...
3479 */
3480 if (isParameter) {
3481 if ((RAW == '"') || (RAW == '\'')) {
3482 value = xmlParseEntityValue(ctxt, &orig);
3483 if (value) {
3484 if ((ctxt->sax != NULL) &&
3485 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3486 ctxt->sax->entityDecl(ctxt->userData, name,
3487 XML_INTERNAL_PARAMETER_ENTITY,
3488 NULL, NULL, value);
3489 }
3490 } else {
3491 URI = xmlParseExternalID(ctxt, &literal, 1);
3492 if ((URI == NULL) && (literal == NULL)) {
3493 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3495 ctxt->sax->error(ctxt->userData,
3496 "Entity value required\n");
3497 ctxt->wellFormed = 0;
3498 ctxt->disableSAX = 1;
3499 }
3500 if (URI) {
3501 xmlURIPtr uri;
3502
3503 uri = xmlParseURI((const char *) URI);
3504 if (uri == NULL) {
3505 ctxt->errNo = XML_ERR_INVALID_URI;
3506 if ((ctxt->sax != NULL) &&
3507 (!ctxt->disableSAX) &&
3508 (ctxt->sax->error != NULL))
3509 ctxt->sax->error(ctxt->userData,
3510 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003511 /*
3512 * This really ought to be a well formedness error
3513 * but the XML Core WG decided otherwise c.f. issue
3514 * E26 of the XML erratas.
3515 */
Owen Taylor3473f882001-02-23 17:55:21 +00003516 } else {
3517 if (uri->fragment != NULL) {
3518 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3519 if ((ctxt->sax != NULL) &&
3520 (!ctxt->disableSAX) &&
3521 (ctxt->sax->error != NULL))
3522 ctxt->sax->error(ctxt->userData,
3523 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003524 /*
3525 * Okay this is foolish to block those but not
3526 * invalid URIs.
3527 */
Owen Taylor3473f882001-02-23 17:55:21 +00003528 ctxt->wellFormed = 0;
3529 } else {
3530 if ((ctxt->sax != NULL) &&
3531 (!ctxt->disableSAX) &&
3532 (ctxt->sax->entityDecl != NULL))
3533 ctxt->sax->entityDecl(ctxt->userData, name,
3534 XML_EXTERNAL_PARAMETER_ENTITY,
3535 literal, URI, NULL);
3536 }
3537 xmlFreeURI(uri);
3538 }
3539 }
3540 }
3541 } else {
3542 if ((RAW == '"') || (RAW == '\'')) {
3543 value = xmlParseEntityValue(ctxt, &orig);
3544 if ((ctxt->sax != NULL) &&
3545 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3546 ctxt->sax->entityDecl(ctxt->userData, name,
3547 XML_INTERNAL_GENERAL_ENTITY,
3548 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003549 /*
3550 * For expat compatibility in SAX mode.
3551 */
3552 if ((ctxt->myDoc == NULL) ||
3553 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3554 if (ctxt->myDoc == NULL) {
3555 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3556 }
3557 if (ctxt->myDoc->intSubset == NULL)
3558 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3559 BAD_CAST "fake", NULL, NULL);
3560
3561 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3562 NULL, NULL, value);
3563 }
Owen Taylor3473f882001-02-23 17:55:21 +00003564 } else {
3565 URI = xmlParseExternalID(ctxt, &literal, 1);
3566 if ((URI == NULL) && (literal == NULL)) {
3567 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3569 ctxt->sax->error(ctxt->userData,
3570 "Entity value required\n");
3571 ctxt->wellFormed = 0;
3572 ctxt->disableSAX = 1;
3573 }
3574 if (URI) {
3575 xmlURIPtr uri;
3576
3577 uri = xmlParseURI((const char *)URI);
3578 if (uri == NULL) {
3579 ctxt->errNo = XML_ERR_INVALID_URI;
3580 if ((ctxt->sax != NULL) &&
3581 (!ctxt->disableSAX) &&
3582 (ctxt->sax->error != NULL))
3583 ctxt->sax->error(ctxt->userData,
3584 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003585 /*
3586 * This really ought to be a well formedness error
3587 * but the XML Core WG decided otherwise c.f. issue
3588 * E26 of the XML erratas.
3589 */
Owen Taylor3473f882001-02-23 17:55:21 +00003590 } else {
3591 if (uri->fragment != NULL) {
3592 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3593 if ((ctxt->sax != NULL) &&
3594 (!ctxt->disableSAX) &&
3595 (ctxt->sax->error != NULL))
3596 ctxt->sax->error(ctxt->userData,
3597 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003598 /*
3599 * Okay this is foolish to block those but not
3600 * invalid URIs.
3601 */
Owen Taylor3473f882001-02-23 17:55:21 +00003602 ctxt->wellFormed = 0;
3603 }
3604 xmlFreeURI(uri);
3605 }
3606 }
3607 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3608 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3610 ctxt->sax->error(ctxt->userData,
3611 "Space required before 'NDATA'\n");
3612 ctxt->wellFormed = 0;
3613 ctxt->disableSAX = 1;
3614 }
3615 SKIP_BLANKS;
3616 if ((RAW == 'N') && (NXT(1) == 'D') &&
3617 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3618 (NXT(4) == 'A')) {
3619 SKIP(5);
3620 if (!IS_BLANK(CUR)) {
3621 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3623 ctxt->sax->error(ctxt->userData,
3624 "Space required after 'NDATA'\n");
3625 ctxt->wellFormed = 0;
3626 ctxt->disableSAX = 1;
3627 }
3628 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003629 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003630 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3631 (ctxt->sax->unparsedEntityDecl != NULL))
3632 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3633 literal, URI, ndata);
3634 } else {
3635 if ((ctxt->sax != NULL) &&
3636 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3637 ctxt->sax->entityDecl(ctxt->userData, name,
3638 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3639 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003640 /*
3641 * For expat compatibility in SAX mode.
3642 * assuming the entity repalcement was asked for
3643 */
3644 if ((ctxt->replaceEntities != 0) &&
3645 ((ctxt->myDoc == NULL) ||
3646 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3647 if (ctxt->myDoc == NULL) {
3648 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3649 }
3650
3651 if (ctxt->myDoc->intSubset == NULL)
3652 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3653 BAD_CAST "fake", NULL, NULL);
3654 entityDecl(ctxt, name,
3655 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3656 literal, URI, NULL);
3657 }
Owen Taylor3473f882001-02-23 17:55:21 +00003658 }
3659 }
3660 }
3661 SKIP_BLANKS;
3662 if (RAW != '>') {
3663 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3665 ctxt->sax->error(ctxt->userData,
3666 "xmlParseEntityDecl: entity %s not terminated\n", name);
3667 ctxt->wellFormed = 0;
3668 ctxt->disableSAX = 1;
3669 } else {
3670 if (input != ctxt->input) {
3671 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3673 ctxt->sax->error(ctxt->userData,
3674"Entity declaration doesn't start and stop in the same entity\n");
3675 ctxt->wellFormed = 0;
3676 ctxt->disableSAX = 1;
3677 }
3678 NEXT;
3679 }
3680 if (orig != NULL) {
3681 /*
3682 * Ugly mechanism to save the raw entity value.
3683 */
3684 xmlEntityPtr cur = NULL;
3685
3686 if (isParameter) {
3687 if ((ctxt->sax != NULL) &&
3688 (ctxt->sax->getParameterEntity != NULL))
3689 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3690 } else {
3691 if ((ctxt->sax != NULL) &&
3692 (ctxt->sax->getEntity != NULL))
3693 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003694 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3695 cur = getEntity(ctxt, name);
3696 }
Owen Taylor3473f882001-02-23 17:55:21 +00003697 }
3698 if (cur != NULL) {
3699 if (cur->orig != NULL)
3700 xmlFree(orig);
3701 else
3702 cur->orig = orig;
3703 } else
3704 xmlFree(orig);
3705 }
3706 if (name != NULL) xmlFree(name);
3707 if (value != NULL) xmlFree(value);
3708 if (URI != NULL) xmlFree(URI);
3709 if (literal != NULL) xmlFree(literal);
3710 if (ndata != NULL) xmlFree(ndata);
3711 }
3712}
3713
3714/**
3715 * xmlParseDefaultDecl:
3716 * @ctxt: an XML parser context
3717 * @value: Receive a possible fixed default value for the attribute
3718 *
3719 * Parse an attribute default declaration
3720 *
3721 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3722 *
3723 * [ VC: Required Attribute ]
3724 * if the default declaration is the keyword #REQUIRED, then the
3725 * attribute must be specified for all elements of the type in the
3726 * attribute-list declaration.
3727 *
3728 * [ VC: Attribute Default Legal ]
3729 * The declared default value must meet the lexical constraints of
3730 * the declared attribute type c.f. xmlValidateAttributeDecl()
3731 *
3732 * [ VC: Fixed Attribute Default ]
3733 * if an attribute has a default value declared with the #FIXED
3734 * keyword, instances of that attribute must match the default value.
3735 *
3736 * [ WFC: No < in Attribute Values ]
3737 * handled in xmlParseAttValue()
3738 *
3739 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3740 * or XML_ATTRIBUTE_FIXED.
3741 */
3742
3743int
3744xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3745 int val;
3746 xmlChar *ret;
3747
3748 *value = NULL;
3749 if ((RAW == '#') && (NXT(1) == 'R') &&
3750 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3751 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3752 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3753 (NXT(8) == 'D')) {
3754 SKIP(9);
3755 return(XML_ATTRIBUTE_REQUIRED);
3756 }
3757 if ((RAW == '#') && (NXT(1) == 'I') &&
3758 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3759 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3760 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3761 SKIP(8);
3762 return(XML_ATTRIBUTE_IMPLIED);
3763 }
3764 val = XML_ATTRIBUTE_NONE;
3765 if ((RAW == '#') && (NXT(1) == 'F') &&
3766 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3767 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3768 SKIP(6);
3769 val = XML_ATTRIBUTE_FIXED;
3770 if (!IS_BLANK(CUR)) {
3771 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3773 ctxt->sax->error(ctxt->userData,
3774 "Space required after '#FIXED'\n");
3775 ctxt->wellFormed = 0;
3776 ctxt->disableSAX = 1;
3777 }
3778 SKIP_BLANKS;
3779 }
3780 ret = xmlParseAttValue(ctxt);
3781 ctxt->instate = XML_PARSER_DTD;
3782 if (ret == NULL) {
3783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3784 ctxt->sax->error(ctxt->userData,
3785 "Attribute default value declaration error\n");
3786 ctxt->wellFormed = 0;
3787 ctxt->disableSAX = 1;
3788 } else
3789 *value = ret;
3790 return(val);
3791}
3792
3793/**
3794 * xmlParseNotationType:
3795 * @ctxt: an XML parser context
3796 *
3797 * parse an Notation attribute type.
3798 *
3799 * Note: the leading 'NOTATION' S part has already being parsed...
3800 *
3801 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3802 *
3803 * [ VC: Notation Attributes ]
3804 * Values of this type must match one of the notation names included
3805 * in the declaration; all notation names in the declaration must be declared.
3806 *
3807 * Returns: the notation attribute tree built while parsing
3808 */
3809
3810xmlEnumerationPtr
3811xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3812 xmlChar *name;
3813 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3814
3815 if (RAW != '(') {
3816 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3817 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3818 ctxt->sax->error(ctxt->userData,
3819 "'(' required to start 'NOTATION'\n");
3820 ctxt->wellFormed = 0;
3821 ctxt->disableSAX = 1;
3822 return(NULL);
3823 }
3824 SHRINK;
3825 do {
3826 NEXT;
3827 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003828 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003829 if (name == NULL) {
3830 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3832 ctxt->sax->error(ctxt->userData,
3833 "Name expected in NOTATION declaration\n");
3834 ctxt->wellFormed = 0;
3835 ctxt->disableSAX = 1;
3836 return(ret);
3837 }
3838 cur = xmlCreateEnumeration(name);
3839 xmlFree(name);
3840 if (cur == NULL) return(ret);
3841 if (last == NULL) ret = last = cur;
3842 else {
3843 last->next = cur;
3844 last = cur;
3845 }
3846 SKIP_BLANKS;
3847 } while (RAW == '|');
3848 if (RAW != ')') {
3849 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3851 ctxt->sax->error(ctxt->userData,
3852 "')' required to finish NOTATION declaration\n");
3853 ctxt->wellFormed = 0;
3854 ctxt->disableSAX = 1;
3855 if ((last != NULL) && (last != ret))
3856 xmlFreeEnumeration(last);
3857 return(ret);
3858 }
3859 NEXT;
3860 return(ret);
3861}
3862
3863/**
3864 * xmlParseEnumerationType:
3865 * @ctxt: an XML parser context
3866 *
3867 * parse an Enumeration attribute type.
3868 *
3869 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3870 *
3871 * [ VC: Enumeration ]
3872 * Values of this type must match one of the Nmtoken tokens in
3873 * the declaration
3874 *
3875 * Returns: the enumeration attribute tree built while parsing
3876 */
3877
3878xmlEnumerationPtr
3879xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3880 xmlChar *name;
3881 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3882
3883 if (RAW != '(') {
3884 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3885 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3886 ctxt->sax->error(ctxt->userData,
3887 "'(' required to start ATTLIST enumeration\n");
3888 ctxt->wellFormed = 0;
3889 ctxt->disableSAX = 1;
3890 return(NULL);
3891 }
3892 SHRINK;
3893 do {
3894 NEXT;
3895 SKIP_BLANKS;
3896 name = xmlParseNmtoken(ctxt);
3897 if (name == NULL) {
3898 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3900 ctxt->sax->error(ctxt->userData,
3901 "NmToken expected in ATTLIST enumeration\n");
3902 ctxt->wellFormed = 0;
3903 ctxt->disableSAX = 1;
3904 return(ret);
3905 }
3906 cur = xmlCreateEnumeration(name);
3907 xmlFree(name);
3908 if (cur == NULL) return(ret);
3909 if (last == NULL) ret = last = cur;
3910 else {
3911 last->next = cur;
3912 last = cur;
3913 }
3914 SKIP_BLANKS;
3915 } while (RAW == '|');
3916 if (RAW != ')') {
3917 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3919 ctxt->sax->error(ctxt->userData,
3920 "')' required to finish ATTLIST enumeration\n");
3921 ctxt->wellFormed = 0;
3922 ctxt->disableSAX = 1;
3923 return(ret);
3924 }
3925 NEXT;
3926 return(ret);
3927}
3928
3929/**
3930 * xmlParseEnumeratedType:
3931 * @ctxt: an XML parser context
3932 * @tree: the enumeration tree built while parsing
3933 *
3934 * parse an Enumerated attribute type.
3935 *
3936 * [57] EnumeratedType ::= NotationType | Enumeration
3937 *
3938 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3939 *
3940 *
3941 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3942 */
3943
3944int
3945xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3946 if ((RAW == 'N') && (NXT(1) == 'O') &&
3947 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3948 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3949 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3950 SKIP(8);
3951 if (!IS_BLANK(CUR)) {
3952 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3954 ctxt->sax->error(ctxt->userData,
3955 "Space required after 'NOTATION'\n");
3956 ctxt->wellFormed = 0;
3957 ctxt->disableSAX = 1;
3958 return(0);
3959 }
3960 SKIP_BLANKS;
3961 *tree = xmlParseNotationType(ctxt);
3962 if (*tree == NULL) return(0);
3963 return(XML_ATTRIBUTE_NOTATION);
3964 }
3965 *tree = xmlParseEnumerationType(ctxt);
3966 if (*tree == NULL) return(0);
3967 return(XML_ATTRIBUTE_ENUMERATION);
3968}
3969
3970/**
3971 * xmlParseAttributeType:
3972 * @ctxt: an XML parser context
3973 * @tree: the enumeration tree built while parsing
3974 *
3975 * parse the Attribute list def for an element
3976 *
3977 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3978 *
3979 * [55] StringType ::= 'CDATA'
3980 *
3981 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3982 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3983 *
3984 * Validity constraints for attribute values syntax are checked in
3985 * xmlValidateAttributeValue()
3986 *
3987 * [ VC: ID ]
3988 * Values of type ID must match the Name production. A name must not
3989 * appear more than once in an XML document as a value of this type;
3990 * i.e., ID values must uniquely identify the elements which bear them.
3991 *
3992 * [ VC: One ID per Element Type ]
3993 * No element type may have more than one ID attribute specified.
3994 *
3995 * [ VC: ID Attribute Default ]
3996 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3997 *
3998 * [ VC: IDREF ]
3999 * Values of type IDREF must match the Name production, and values
4000 * of type IDREFS must match Names; each IDREF Name must match the value
4001 * of an ID attribute on some element in the XML document; i.e. IDREF
4002 * values must match the value of some ID attribute.
4003 *
4004 * [ VC: Entity Name ]
4005 * Values of type ENTITY must match the Name production, values
4006 * of type ENTITIES must match Names; each Entity Name must match the
4007 * name of an unparsed entity declared in the DTD.
4008 *
4009 * [ VC: Name Token ]
4010 * Values of type NMTOKEN must match the Nmtoken production; values
4011 * of type NMTOKENS must match Nmtokens.
4012 *
4013 * Returns the attribute type
4014 */
4015int
4016xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4017 SHRINK;
4018 if ((RAW == 'C') && (NXT(1) == 'D') &&
4019 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4020 (NXT(4) == 'A')) {
4021 SKIP(5);
4022 return(XML_ATTRIBUTE_CDATA);
4023 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4024 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4025 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4026 SKIP(6);
4027 return(XML_ATTRIBUTE_IDREFS);
4028 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4029 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4030 (NXT(4) == 'F')) {
4031 SKIP(5);
4032 return(XML_ATTRIBUTE_IDREF);
4033 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4034 SKIP(2);
4035 return(XML_ATTRIBUTE_ID);
4036 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4037 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4038 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4039 SKIP(6);
4040 return(XML_ATTRIBUTE_ENTITY);
4041 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4042 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4043 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4044 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4045 SKIP(8);
4046 return(XML_ATTRIBUTE_ENTITIES);
4047 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4048 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4049 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4050 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4051 SKIP(8);
4052 return(XML_ATTRIBUTE_NMTOKENS);
4053 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4054 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4055 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4056 (NXT(6) == 'N')) {
4057 SKIP(7);
4058 return(XML_ATTRIBUTE_NMTOKEN);
4059 }
4060 return(xmlParseEnumeratedType(ctxt, tree));
4061}
4062
4063/**
4064 * xmlParseAttributeListDecl:
4065 * @ctxt: an XML parser context
4066 *
4067 * : parse the Attribute list def for an element
4068 *
4069 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4070 *
4071 * [53] AttDef ::= S Name S AttType S DefaultDecl
4072 *
4073 */
4074void
4075xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4076 xmlChar *elemName;
4077 xmlChar *attrName;
4078 xmlEnumerationPtr tree;
4079
4080 if ((RAW == '<') && (NXT(1) == '!') &&
4081 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4082 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4083 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4084 (NXT(8) == 'T')) {
4085 xmlParserInputPtr input = ctxt->input;
4086
4087 SKIP(9);
4088 if (!IS_BLANK(CUR)) {
4089 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4091 ctxt->sax->error(ctxt->userData,
4092 "Space required after '<!ATTLIST'\n");
4093 ctxt->wellFormed = 0;
4094 ctxt->disableSAX = 1;
4095 }
4096 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004097 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004098 if (elemName == NULL) {
4099 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4100 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4101 ctxt->sax->error(ctxt->userData,
4102 "ATTLIST: no name for Element\n");
4103 ctxt->wellFormed = 0;
4104 ctxt->disableSAX = 1;
4105 return;
4106 }
4107 SKIP_BLANKS;
4108 GROW;
4109 while (RAW != '>') {
4110 const xmlChar *check = CUR_PTR;
4111 int type;
4112 int def;
4113 xmlChar *defaultValue = NULL;
4114
4115 GROW;
4116 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004117 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004118 if (attrName == NULL) {
4119 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4120 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4121 ctxt->sax->error(ctxt->userData,
4122 "ATTLIST: no name for Attribute\n");
4123 ctxt->wellFormed = 0;
4124 ctxt->disableSAX = 1;
4125 break;
4126 }
4127 GROW;
4128 if (!IS_BLANK(CUR)) {
4129 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4131 ctxt->sax->error(ctxt->userData,
4132 "Space required after the attribute name\n");
4133 ctxt->wellFormed = 0;
4134 ctxt->disableSAX = 1;
4135 if (attrName != NULL)
4136 xmlFree(attrName);
4137 if (defaultValue != NULL)
4138 xmlFree(defaultValue);
4139 break;
4140 }
4141 SKIP_BLANKS;
4142
4143 type = xmlParseAttributeType(ctxt, &tree);
4144 if (type <= 0) {
4145 if (attrName != NULL)
4146 xmlFree(attrName);
4147 if (defaultValue != NULL)
4148 xmlFree(defaultValue);
4149 break;
4150 }
4151
4152 GROW;
4153 if (!IS_BLANK(CUR)) {
4154 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4155 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4156 ctxt->sax->error(ctxt->userData,
4157 "Space required after the attribute type\n");
4158 ctxt->wellFormed = 0;
4159 ctxt->disableSAX = 1;
4160 if (attrName != NULL)
4161 xmlFree(attrName);
4162 if (defaultValue != NULL)
4163 xmlFree(defaultValue);
4164 if (tree != NULL)
4165 xmlFreeEnumeration(tree);
4166 break;
4167 }
4168 SKIP_BLANKS;
4169
4170 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4171 if (def <= 0) {
4172 if (attrName != NULL)
4173 xmlFree(attrName);
4174 if (defaultValue != NULL)
4175 xmlFree(defaultValue);
4176 if (tree != NULL)
4177 xmlFreeEnumeration(tree);
4178 break;
4179 }
4180
4181 GROW;
4182 if (RAW != '>') {
4183 if (!IS_BLANK(CUR)) {
4184 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4186 ctxt->sax->error(ctxt->userData,
4187 "Space required after the attribute default value\n");
4188 ctxt->wellFormed = 0;
4189 ctxt->disableSAX = 1;
4190 if (attrName != NULL)
4191 xmlFree(attrName);
4192 if (defaultValue != NULL)
4193 xmlFree(defaultValue);
4194 if (tree != NULL)
4195 xmlFreeEnumeration(tree);
4196 break;
4197 }
4198 SKIP_BLANKS;
4199 }
4200 if (check == CUR_PTR) {
4201 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4203 ctxt->sax->error(ctxt->userData,
4204 "xmlParseAttributeListDecl: detected internal error\n");
4205 if (attrName != NULL)
4206 xmlFree(attrName);
4207 if (defaultValue != NULL)
4208 xmlFree(defaultValue);
4209 if (tree != NULL)
4210 xmlFreeEnumeration(tree);
4211 break;
4212 }
4213 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4214 (ctxt->sax->attributeDecl != NULL))
4215 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4216 type, def, defaultValue, tree);
4217 if (attrName != NULL)
4218 xmlFree(attrName);
4219 if (defaultValue != NULL)
4220 xmlFree(defaultValue);
4221 GROW;
4222 }
4223 if (RAW == '>') {
4224 if (input != ctxt->input) {
4225 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4227 ctxt->sax->error(ctxt->userData,
4228"Attribute list declaration doesn't start and stop in the same entity\n");
4229 ctxt->wellFormed = 0;
4230 ctxt->disableSAX = 1;
4231 }
4232 NEXT;
4233 }
4234
4235 xmlFree(elemName);
4236 }
4237}
4238
4239/**
4240 * xmlParseElementMixedContentDecl:
4241 * @ctxt: an XML parser context
4242 *
4243 * parse the declaration for a Mixed Element content
4244 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4245 *
4246 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4247 * '(' S? '#PCDATA' S? ')'
4248 *
4249 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4250 *
4251 * [ VC: No Duplicate Types ]
4252 * The same name must not appear more than once in a single
4253 * mixed-content declaration.
4254 *
4255 * returns: the list of the xmlElementContentPtr describing the element choices
4256 */
4257xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004258xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004259 xmlElementContentPtr ret = NULL, cur = NULL, n;
4260 xmlChar *elem = NULL;
4261
4262 GROW;
4263 if ((RAW == '#') && (NXT(1) == 'P') &&
4264 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4265 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4266 (NXT(6) == 'A')) {
4267 SKIP(7);
4268 SKIP_BLANKS;
4269 SHRINK;
4270 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004271 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4272 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4273 if (ctxt->vctxt.error != NULL)
4274 ctxt->vctxt.error(ctxt->vctxt.userData,
4275"Element content declaration doesn't start and stop in the same entity\n");
4276 ctxt->valid = 0;
4277 }
Owen Taylor3473f882001-02-23 17:55:21 +00004278 NEXT;
4279 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4280 if (RAW == '*') {
4281 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4282 NEXT;
4283 }
4284 return(ret);
4285 }
4286 if ((RAW == '(') || (RAW == '|')) {
4287 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4288 if (ret == NULL) return(NULL);
4289 }
4290 while (RAW == '|') {
4291 NEXT;
4292 if (elem == NULL) {
4293 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4294 if (ret == NULL) return(NULL);
4295 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004296 if (cur != NULL)
4297 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004298 cur = ret;
4299 } else {
4300 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4301 if (n == NULL) return(NULL);
4302 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004303 if (n->c1 != NULL)
4304 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004305 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004306 if (n != NULL)
4307 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004308 cur = n;
4309 xmlFree(elem);
4310 }
4311 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004312 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004313 if (elem == NULL) {
4314 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4316 ctxt->sax->error(ctxt->userData,
4317 "xmlParseElementMixedContentDecl : Name expected\n");
4318 ctxt->wellFormed = 0;
4319 ctxt->disableSAX = 1;
4320 xmlFreeElementContent(cur);
4321 return(NULL);
4322 }
4323 SKIP_BLANKS;
4324 GROW;
4325 }
4326 if ((RAW == ')') && (NXT(1) == '*')) {
4327 if (elem != NULL) {
4328 cur->c2 = xmlNewElementContent(elem,
4329 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004330 if (cur->c2 != NULL)
4331 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004332 xmlFree(elem);
4333 }
4334 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004335 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4336 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4337 if (ctxt->vctxt.error != NULL)
4338 ctxt->vctxt.error(ctxt->vctxt.userData,
4339"Element content declaration doesn't start and stop in the same entity\n");
4340 ctxt->valid = 0;
4341 }
Owen Taylor3473f882001-02-23 17:55:21 +00004342 SKIP(2);
4343 } else {
4344 if (elem != NULL) xmlFree(elem);
4345 xmlFreeElementContent(ret);
4346 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4348 ctxt->sax->error(ctxt->userData,
4349 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4350 ctxt->wellFormed = 0;
4351 ctxt->disableSAX = 1;
4352 return(NULL);
4353 }
4354
4355 } else {
4356 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4358 ctxt->sax->error(ctxt->userData,
4359 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4360 ctxt->wellFormed = 0;
4361 ctxt->disableSAX = 1;
4362 }
4363 return(ret);
4364}
4365
4366/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004367 * xmlParseElementChildrenContentD:
4368 * @ctxt: an XML parser context
4369 *
4370 * VMS version of xmlParseElementChildrenContentDecl()
4371 *
4372 * Returns the tree of xmlElementContentPtr describing the element
4373 * hierarchy.
4374 */
4375/**
Owen Taylor3473f882001-02-23 17:55:21 +00004376 * xmlParseElementChildrenContentDecl:
4377 * @ctxt: an XML parser context
4378 *
4379 * parse the declaration for a Mixed Element content
4380 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4381 *
4382 *
4383 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4384 *
4385 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4386 *
4387 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4388 *
4389 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4390 *
4391 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4392 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004393 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004394 * opening or closing parentheses in a choice, seq, or Mixed
4395 * construct is contained in the replacement text for a parameter
4396 * entity, both must be contained in the same replacement text. For
4397 * interoperability, if a parameter-entity reference appears in a
4398 * choice, seq, or Mixed construct, its replacement text should not
4399 * be empty, and neither the first nor last non-blank character of
4400 * the replacement text should be a connector (| or ,).
4401 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004402 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004403 * hierarchy.
4404 */
4405xmlElementContentPtr
4406#ifdef VMS
4407xmlParseElementChildrenContentD
4408#else
4409xmlParseElementChildrenContentDecl
4410#endif
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004411(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004412 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4413 xmlChar *elem;
4414 xmlChar type = 0;
4415
4416 SKIP_BLANKS;
4417 GROW;
4418 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004419 xmlParserInputPtr input = ctxt->input;
4420
Owen Taylor3473f882001-02-23 17:55:21 +00004421 /* Recurse on first child */
4422 NEXT;
4423 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004424 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004425 SKIP_BLANKS;
4426 GROW;
4427 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004428 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004429 if (elem == NULL) {
4430 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4431 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4432 ctxt->sax->error(ctxt->userData,
4433 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4434 ctxt->wellFormed = 0;
4435 ctxt->disableSAX = 1;
4436 return(NULL);
4437 }
4438 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4439 GROW;
4440 if (RAW == '?') {
4441 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4442 NEXT;
4443 } else if (RAW == '*') {
4444 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4445 NEXT;
4446 } else if (RAW == '+') {
4447 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4448 NEXT;
4449 } else {
4450 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4451 }
4452 xmlFree(elem);
4453 GROW;
4454 }
4455 SKIP_BLANKS;
4456 SHRINK;
4457 while (RAW != ')') {
4458 /*
4459 * Each loop we parse one separator and one element.
4460 */
4461 if (RAW == ',') {
4462 if (type == 0) type = CUR;
4463
4464 /*
4465 * Detect "Name | Name , Name" error
4466 */
4467 else if (type != CUR) {
4468 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4470 ctxt->sax->error(ctxt->userData,
4471 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4472 type);
4473 ctxt->wellFormed = 0;
4474 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004475 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004476 xmlFreeElementContent(last);
4477 if (ret != NULL)
4478 xmlFreeElementContent(ret);
4479 return(NULL);
4480 }
4481 NEXT;
4482
4483 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4484 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004485 if ((last != NULL) && (last != ret))
4486 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004487 xmlFreeElementContent(ret);
4488 return(NULL);
4489 }
4490 if (last == NULL) {
4491 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004492 if (ret != NULL)
4493 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004494 ret = cur = op;
4495 } else {
4496 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004497 if (op != NULL)
4498 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004499 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004500 if (last != NULL)
4501 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004502 cur =op;
4503 last = NULL;
4504 }
4505 } else if (RAW == '|') {
4506 if (type == 0) type = CUR;
4507
4508 /*
4509 * Detect "Name , Name | Name" error
4510 */
4511 else if (type != CUR) {
4512 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4514 ctxt->sax->error(ctxt->userData,
4515 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4516 type);
4517 ctxt->wellFormed = 0;
4518 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004519 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004520 xmlFreeElementContent(last);
4521 if (ret != NULL)
4522 xmlFreeElementContent(ret);
4523 return(NULL);
4524 }
4525 NEXT;
4526
4527 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4528 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004529 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004530 xmlFreeElementContent(last);
4531 if (ret != NULL)
4532 xmlFreeElementContent(ret);
4533 return(NULL);
4534 }
4535 if (last == NULL) {
4536 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004537 if (ret != NULL)
4538 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004539 ret = cur = op;
4540 } else {
4541 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004542 if (op != NULL)
4543 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004544 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004545 if (last != NULL)
4546 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004547 cur =op;
4548 last = NULL;
4549 }
4550 } else {
4551 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4553 ctxt->sax->error(ctxt->userData,
4554 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4555 ctxt->wellFormed = 0;
4556 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004557 if (ret != NULL)
4558 xmlFreeElementContent(ret);
4559 return(NULL);
4560 }
4561 GROW;
4562 SKIP_BLANKS;
4563 GROW;
4564 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004565 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004566 /* Recurse on second child */
4567 NEXT;
4568 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004569 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004570 SKIP_BLANKS;
4571 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004572 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004573 if (elem == NULL) {
4574 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4576 ctxt->sax->error(ctxt->userData,
4577 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4578 ctxt->wellFormed = 0;
4579 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004580 if (ret != NULL)
4581 xmlFreeElementContent(ret);
4582 return(NULL);
4583 }
4584 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4585 xmlFree(elem);
4586 if (RAW == '?') {
4587 last->ocur = XML_ELEMENT_CONTENT_OPT;
4588 NEXT;
4589 } else if (RAW == '*') {
4590 last->ocur = XML_ELEMENT_CONTENT_MULT;
4591 NEXT;
4592 } else if (RAW == '+') {
4593 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4594 NEXT;
4595 } else {
4596 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4597 }
4598 }
4599 SKIP_BLANKS;
4600 GROW;
4601 }
4602 if ((cur != NULL) && (last != NULL)) {
4603 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004604 if (last != NULL)
4605 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004606 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004607 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4608 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4609 if (ctxt->vctxt.error != NULL)
4610 ctxt->vctxt.error(ctxt->vctxt.userData,
4611"Element content declaration doesn't start and stop in the same entity\n");
4612 ctxt->valid = 0;
4613 }
Owen Taylor3473f882001-02-23 17:55:21 +00004614 NEXT;
4615 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004616 if (ret != NULL)
4617 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004618 NEXT;
4619 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004620 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004621 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004622 cur = ret;
4623 /*
4624 * Some normalization:
4625 * (a | b* | c?)* == (a | b | c)*
4626 */
4627 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4628 if ((cur->c1 != NULL) &&
4629 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4630 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4631 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4632 if ((cur->c2 != NULL) &&
4633 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4634 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4635 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4636 cur = cur->c2;
4637 }
4638 }
Owen Taylor3473f882001-02-23 17:55:21 +00004639 NEXT;
4640 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004641 if (ret != NULL) {
4642 int found = 0;
4643
Daniel Veillarde470df72001-04-18 21:41:07 +00004644 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004645 /*
4646 * Some normalization:
4647 * (a | b*)+ == (a | b)*
4648 * (a | b?)+ == (a | b)*
4649 */
4650 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4651 if ((cur->c1 != NULL) &&
4652 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4653 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4654 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4655 found = 1;
4656 }
4657 if ((cur->c2 != NULL) &&
4658 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4659 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4660 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4661 found = 1;
4662 }
4663 cur = cur->c2;
4664 }
4665 if (found)
4666 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4667 }
Owen Taylor3473f882001-02-23 17:55:21 +00004668 NEXT;
4669 }
4670 return(ret);
4671}
4672
4673/**
4674 * xmlParseElementContentDecl:
4675 * @ctxt: an XML parser context
4676 * @name: the name of the element being defined.
4677 * @result: the Element Content pointer will be stored here if any
4678 *
4679 * parse the declaration for an Element content either Mixed or Children,
4680 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4681 *
4682 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4683 *
4684 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4685 */
4686
4687int
4688xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4689 xmlElementContentPtr *result) {
4690
4691 xmlElementContentPtr tree = NULL;
4692 xmlParserInputPtr input = ctxt->input;
4693 int res;
4694
4695 *result = NULL;
4696
4697 if (RAW != '(') {
4698 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4699 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4700 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004701 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004702 ctxt->wellFormed = 0;
4703 ctxt->disableSAX = 1;
4704 return(-1);
4705 }
4706 NEXT;
4707 GROW;
4708 SKIP_BLANKS;
4709 if ((RAW == '#') && (NXT(1) == 'P') &&
4710 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4711 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4712 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004713 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004714 res = XML_ELEMENT_TYPE_MIXED;
4715 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004716 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004717 res = XML_ELEMENT_TYPE_ELEMENT;
4718 }
Owen Taylor3473f882001-02-23 17:55:21 +00004719 SKIP_BLANKS;
4720 *result = tree;
4721 return(res);
4722}
4723
4724/**
4725 * xmlParseElementDecl:
4726 * @ctxt: an XML parser context
4727 *
4728 * parse an Element declaration.
4729 *
4730 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4731 *
4732 * [ VC: Unique Element Type Declaration ]
4733 * No element type may be declared more than once
4734 *
4735 * Returns the type of the element, or -1 in case of error
4736 */
4737int
4738xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4739 xmlChar *name;
4740 int ret = -1;
4741 xmlElementContentPtr content = NULL;
4742
4743 GROW;
4744 if ((RAW == '<') && (NXT(1) == '!') &&
4745 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4746 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4747 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4748 (NXT(8) == 'T')) {
4749 xmlParserInputPtr input = ctxt->input;
4750
4751 SKIP(9);
4752 if (!IS_BLANK(CUR)) {
4753 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4755 ctxt->sax->error(ctxt->userData,
4756 "Space required after 'ELEMENT'\n");
4757 ctxt->wellFormed = 0;
4758 ctxt->disableSAX = 1;
4759 }
4760 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004761 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004762 if (name == NULL) {
4763 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4765 ctxt->sax->error(ctxt->userData,
4766 "xmlParseElementDecl: no name for Element\n");
4767 ctxt->wellFormed = 0;
4768 ctxt->disableSAX = 1;
4769 return(-1);
4770 }
4771 while ((RAW == 0) && (ctxt->inputNr > 1))
4772 xmlPopInput(ctxt);
4773 if (!IS_BLANK(CUR)) {
4774 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4776 ctxt->sax->error(ctxt->userData,
4777 "Space required after the element name\n");
4778 ctxt->wellFormed = 0;
4779 ctxt->disableSAX = 1;
4780 }
4781 SKIP_BLANKS;
4782 if ((RAW == 'E') && (NXT(1) == 'M') &&
4783 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4784 (NXT(4) == 'Y')) {
4785 SKIP(5);
4786 /*
4787 * Element must always be empty.
4788 */
4789 ret = XML_ELEMENT_TYPE_EMPTY;
4790 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4791 (NXT(2) == 'Y')) {
4792 SKIP(3);
4793 /*
4794 * Element is a generic container.
4795 */
4796 ret = XML_ELEMENT_TYPE_ANY;
4797 } else if (RAW == '(') {
4798 ret = xmlParseElementContentDecl(ctxt, name, &content);
4799 } else {
4800 /*
4801 * [ WFC: PEs in Internal Subset ] error handling.
4802 */
4803 if ((RAW == '%') && (ctxt->external == 0) &&
4804 (ctxt->inputNr == 1)) {
4805 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4807 ctxt->sax->error(ctxt->userData,
4808 "PEReference: forbidden within markup decl in internal subset\n");
4809 } else {
4810 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4812 ctxt->sax->error(ctxt->userData,
4813 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4814 }
4815 ctxt->wellFormed = 0;
4816 ctxt->disableSAX = 1;
4817 if (name != NULL) xmlFree(name);
4818 return(-1);
4819 }
4820
4821 SKIP_BLANKS;
4822 /*
4823 * Pop-up of finished entities.
4824 */
4825 while ((RAW == 0) && (ctxt->inputNr > 1))
4826 xmlPopInput(ctxt);
4827 SKIP_BLANKS;
4828
4829 if (RAW != '>') {
4830 ctxt->errNo = XML_ERR_GT_REQUIRED;
4831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4832 ctxt->sax->error(ctxt->userData,
4833 "xmlParseElementDecl: expected '>' at the end\n");
4834 ctxt->wellFormed = 0;
4835 ctxt->disableSAX = 1;
4836 } else {
4837 if (input != ctxt->input) {
4838 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4839 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4840 ctxt->sax->error(ctxt->userData,
4841"Element declaration doesn't start and stop in the same entity\n");
4842 ctxt->wellFormed = 0;
4843 ctxt->disableSAX = 1;
4844 }
4845
4846 NEXT;
4847 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4848 (ctxt->sax->elementDecl != NULL))
4849 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4850 content);
4851 }
4852 if (content != NULL) {
4853 xmlFreeElementContent(content);
4854 }
4855 if (name != NULL) {
4856 xmlFree(name);
4857 }
4858 }
4859 return(ret);
4860}
4861
4862/**
Owen Taylor3473f882001-02-23 17:55:21 +00004863 * xmlParseConditionalSections
4864 * @ctxt: an XML parser context
4865 *
4866 * [61] conditionalSect ::= includeSect | ignoreSect
4867 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4868 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4869 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4870 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4871 */
4872
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004873static void
Owen Taylor3473f882001-02-23 17:55:21 +00004874xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4875 SKIP(3);
4876 SKIP_BLANKS;
4877 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4878 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4879 (NXT(6) == 'E')) {
4880 SKIP(7);
4881 SKIP_BLANKS;
4882 if (RAW != '[') {
4883 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4884 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4885 ctxt->sax->error(ctxt->userData,
4886 "XML conditional section '[' expected\n");
4887 ctxt->wellFormed = 0;
4888 ctxt->disableSAX = 1;
4889 } else {
4890 NEXT;
4891 }
4892 if (xmlParserDebugEntities) {
4893 if ((ctxt->input != NULL) && (ctxt->input->filename))
4894 xmlGenericError(xmlGenericErrorContext,
4895 "%s(%d): ", ctxt->input->filename,
4896 ctxt->input->line);
4897 xmlGenericError(xmlGenericErrorContext,
4898 "Entering INCLUDE Conditional Section\n");
4899 }
4900
4901 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4902 (NXT(2) != '>'))) {
4903 const xmlChar *check = CUR_PTR;
4904 int cons = ctxt->input->consumed;
4905 int tok = ctxt->token;
4906
4907 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4908 xmlParseConditionalSections(ctxt);
4909 } else if (IS_BLANK(CUR)) {
4910 NEXT;
4911 } else if (RAW == '%') {
4912 xmlParsePEReference(ctxt);
4913 } else
4914 xmlParseMarkupDecl(ctxt);
4915
4916 /*
4917 * Pop-up of finished entities.
4918 */
4919 while ((RAW == 0) && (ctxt->inputNr > 1))
4920 xmlPopInput(ctxt);
4921
4922 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4923 (tok == ctxt->token)) {
4924 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4926 ctxt->sax->error(ctxt->userData,
4927 "Content error in the external subset\n");
4928 ctxt->wellFormed = 0;
4929 ctxt->disableSAX = 1;
4930 break;
4931 }
4932 }
4933 if (xmlParserDebugEntities) {
4934 if ((ctxt->input != NULL) && (ctxt->input->filename))
4935 xmlGenericError(xmlGenericErrorContext,
4936 "%s(%d): ", ctxt->input->filename,
4937 ctxt->input->line);
4938 xmlGenericError(xmlGenericErrorContext,
4939 "Leaving INCLUDE Conditional Section\n");
4940 }
4941
4942 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4943 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4944 int state;
4945 int instate;
4946 int depth = 0;
4947
4948 SKIP(6);
4949 SKIP_BLANKS;
4950 if (RAW != '[') {
4951 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4953 ctxt->sax->error(ctxt->userData,
4954 "XML conditional section '[' expected\n");
4955 ctxt->wellFormed = 0;
4956 ctxt->disableSAX = 1;
4957 } else {
4958 NEXT;
4959 }
4960 if (xmlParserDebugEntities) {
4961 if ((ctxt->input != NULL) && (ctxt->input->filename))
4962 xmlGenericError(xmlGenericErrorContext,
4963 "%s(%d): ", ctxt->input->filename,
4964 ctxt->input->line);
4965 xmlGenericError(xmlGenericErrorContext,
4966 "Entering IGNORE Conditional Section\n");
4967 }
4968
4969 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004970 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004971 * But disable SAX event generating DTD building in the meantime
4972 */
4973 state = ctxt->disableSAX;
4974 instate = ctxt->instate;
4975 ctxt->disableSAX = 1;
4976 ctxt->instate = XML_PARSER_IGNORE;
4977
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004978 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004979 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4980 depth++;
4981 SKIP(3);
4982 continue;
4983 }
4984 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4985 if (--depth >= 0) SKIP(3);
4986 continue;
4987 }
4988 NEXT;
4989 continue;
4990 }
4991
4992 ctxt->disableSAX = state;
4993 ctxt->instate = instate;
4994
4995 if (xmlParserDebugEntities) {
4996 if ((ctxt->input != NULL) && (ctxt->input->filename))
4997 xmlGenericError(xmlGenericErrorContext,
4998 "%s(%d): ", ctxt->input->filename,
4999 ctxt->input->line);
5000 xmlGenericError(xmlGenericErrorContext,
5001 "Leaving IGNORE Conditional Section\n");
5002 }
5003
5004 } else {
5005 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5007 ctxt->sax->error(ctxt->userData,
5008 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5009 ctxt->wellFormed = 0;
5010 ctxt->disableSAX = 1;
5011 }
5012
5013 if (RAW == 0)
5014 SHRINK;
5015
5016 if (RAW == 0) {
5017 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5019 ctxt->sax->error(ctxt->userData,
5020 "XML conditional section not closed\n");
5021 ctxt->wellFormed = 0;
5022 ctxt->disableSAX = 1;
5023 } else {
5024 SKIP(3);
5025 }
5026}
5027
5028/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005029 * xmlParseMarkupDecl:
5030 * @ctxt: an XML parser context
5031 *
5032 * parse Markup declarations
5033 *
5034 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5035 * NotationDecl | PI | Comment
5036 *
5037 * [ VC: Proper Declaration/PE Nesting ]
5038 * Parameter-entity replacement text must be properly nested with
5039 * markup declarations. That is to say, if either the first character
5040 * or the last character of a markup declaration (markupdecl above) is
5041 * contained in the replacement text for a parameter-entity reference,
5042 * both must be contained in the same replacement text.
5043 *
5044 * [ WFC: PEs in Internal Subset ]
5045 * In the internal DTD subset, parameter-entity references can occur
5046 * only where markup declarations can occur, not within markup declarations.
5047 * (This does not apply to references that occur in external parameter
5048 * entities or to the external subset.)
5049 */
5050void
5051xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5052 GROW;
5053 xmlParseElementDecl(ctxt);
5054 xmlParseAttributeListDecl(ctxt);
5055 xmlParseEntityDecl(ctxt);
5056 xmlParseNotationDecl(ctxt);
5057 xmlParsePI(ctxt);
5058 xmlParseComment(ctxt);
5059 /*
5060 * This is only for internal subset. On external entities,
5061 * the replacement is done before parsing stage
5062 */
5063 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5064 xmlParsePEReference(ctxt);
5065
5066 /*
5067 * Conditional sections are allowed from entities included
5068 * by PE References in the internal subset.
5069 */
5070 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5071 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5072 xmlParseConditionalSections(ctxt);
5073 }
5074 }
5075
5076 ctxt->instate = XML_PARSER_DTD;
5077}
5078
5079/**
5080 * xmlParseTextDecl:
5081 * @ctxt: an XML parser context
5082 *
5083 * parse an XML declaration header for external entities
5084 *
5085 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5086 *
5087 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5088 */
5089
5090void
5091xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5092 xmlChar *version;
5093
5094 /*
5095 * We know that '<?xml' is here.
5096 */
5097 if ((RAW == '<') && (NXT(1) == '?') &&
5098 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5099 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5100 SKIP(5);
5101 } else {
5102 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5104 ctxt->sax->error(ctxt->userData,
5105 "Text declaration '<?xml' required\n");
5106 ctxt->wellFormed = 0;
5107 ctxt->disableSAX = 1;
5108
5109 return;
5110 }
5111
5112 if (!IS_BLANK(CUR)) {
5113 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5115 ctxt->sax->error(ctxt->userData,
5116 "Space needed after '<?xml'\n");
5117 ctxt->wellFormed = 0;
5118 ctxt->disableSAX = 1;
5119 }
5120 SKIP_BLANKS;
5121
5122 /*
5123 * We may have the VersionInfo here.
5124 */
5125 version = xmlParseVersionInfo(ctxt);
5126 if (version == NULL)
5127 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005128 else {
5129 if (!IS_BLANK(CUR)) {
5130 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5132 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5133 ctxt->wellFormed = 0;
5134 ctxt->disableSAX = 1;
5135 }
5136 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005137 ctxt->input->version = version;
5138
5139 /*
5140 * We must have the encoding declaration
5141 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005142 xmlParseEncodingDecl(ctxt);
5143 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5144 /*
5145 * The XML REC instructs us to stop parsing right here
5146 */
5147 return;
5148 }
5149
5150 SKIP_BLANKS;
5151 if ((RAW == '?') && (NXT(1) == '>')) {
5152 SKIP(2);
5153 } else if (RAW == '>') {
5154 /* Deprecated old WD ... */
5155 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5157 ctxt->sax->error(ctxt->userData,
5158 "XML declaration must end-up with '?>'\n");
5159 ctxt->wellFormed = 0;
5160 ctxt->disableSAX = 1;
5161 NEXT;
5162 } else {
5163 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5165 ctxt->sax->error(ctxt->userData,
5166 "parsing XML declaration: '?>' expected\n");
5167 ctxt->wellFormed = 0;
5168 ctxt->disableSAX = 1;
5169 MOVETO_ENDTAG(CUR_PTR);
5170 NEXT;
5171 }
5172}
5173
5174/**
Owen Taylor3473f882001-02-23 17:55:21 +00005175 * xmlParseExternalSubset:
5176 * @ctxt: an XML parser context
5177 * @ExternalID: the external identifier
5178 * @SystemID: the system identifier (or URL)
5179 *
5180 * parse Markup declarations from an external subset
5181 *
5182 * [30] extSubset ::= textDecl? extSubsetDecl
5183 *
5184 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5185 */
5186void
5187xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5188 const xmlChar *SystemID) {
5189 GROW;
5190 if ((RAW == '<') && (NXT(1) == '?') &&
5191 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5192 (NXT(4) == 'l')) {
5193 xmlParseTextDecl(ctxt);
5194 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5195 /*
5196 * The XML REC instructs us to stop parsing right here
5197 */
5198 ctxt->instate = XML_PARSER_EOF;
5199 return;
5200 }
5201 }
5202 if (ctxt->myDoc == NULL) {
5203 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5204 }
5205 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5206 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5207
5208 ctxt->instate = XML_PARSER_DTD;
5209 ctxt->external = 1;
5210 while (((RAW == '<') && (NXT(1) == '?')) ||
5211 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005212 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005213 const xmlChar *check = CUR_PTR;
5214 int cons = ctxt->input->consumed;
5215 int tok = ctxt->token;
5216
5217 GROW;
5218 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5219 xmlParseConditionalSections(ctxt);
5220 } else if (IS_BLANK(CUR)) {
5221 NEXT;
5222 } else if (RAW == '%') {
5223 xmlParsePEReference(ctxt);
5224 } else
5225 xmlParseMarkupDecl(ctxt);
5226
5227 /*
5228 * Pop-up of finished entities.
5229 */
5230 while ((RAW == 0) && (ctxt->inputNr > 1))
5231 xmlPopInput(ctxt);
5232
5233 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5234 (tok == ctxt->token)) {
5235 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5237 ctxt->sax->error(ctxt->userData,
5238 "Content error in the external subset\n");
5239 ctxt->wellFormed = 0;
5240 ctxt->disableSAX = 1;
5241 break;
5242 }
5243 }
5244
5245 if (RAW != 0) {
5246 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5248 ctxt->sax->error(ctxt->userData,
5249 "Extra content at the end of the document\n");
5250 ctxt->wellFormed = 0;
5251 ctxt->disableSAX = 1;
5252 }
5253
5254}
5255
5256/**
5257 * xmlParseReference:
5258 * @ctxt: an XML parser context
5259 *
5260 * parse and handle entity references in content, depending on the SAX
5261 * interface, this may end-up in a call to character() if this is a
5262 * CharRef, a predefined entity, if there is no reference() callback.
5263 * or if the parser was asked to switch to that mode.
5264 *
5265 * [67] Reference ::= EntityRef | CharRef
5266 */
5267void
5268xmlParseReference(xmlParserCtxtPtr ctxt) {
5269 xmlEntityPtr ent;
5270 xmlChar *val;
5271 if (RAW != '&') return;
5272
5273 if (NXT(1) == '#') {
5274 int i = 0;
5275 xmlChar out[10];
5276 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005277 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005278
5279 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5280 /*
5281 * So we are using non-UTF-8 buffers
5282 * Check that the char fit on 8bits, if not
5283 * generate a CharRef.
5284 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005285 if (value <= 0xFF) {
5286 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005287 out[1] = 0;
5288 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5289 (!ctxt->disableSAX))
5290 ctxt->sax->characters(ctxt->userData, out, 1);
5291 } else {
5292 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005293 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005294 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005295 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005296 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5297 (!ctxt->disableSAX))
5298 ctxt->sax->reference(ctxt->userData, out);
5299 }
5300 } else {
5301 /*
5302 * Just encode the value in UTF-8
5303 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005304 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005305 out[i] = 0;
5306 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5307 (!ctxt->disableSAX))
5308 ctxt->sax->characters(ctxt->userData, out, i);
5309 }
5310 } else {
5311 ent = xmlParseEntityRef(ctxt);
5312 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005313 if (!ctxt->wellFormed)
5314 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005315 if ((ent->name != NULL) &&
5316 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5317 xmlNodePtr list = NULL;
5318 int ret;
5319
5320
5321 /*
5322 * The first reference to the entity trigger a parsing phase
5323 * where the ent->children is filled with the result from
5324 * the parsing.
5325 */
5326 if (ent->children == NULL) {
5327 xmlChar *value;
5328 value = ent->content;
5329
5330 /*
5331 * Check that this entity is well formed
5332 */
5333 if ((value != NULL) &&
5334 (value[1] == 0) && (value[0] == '<') &&
5335 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5336 /*
5337 * DONE: get definite answer on this !!!
5338 * Lots of entity decls are used to declare a single
5339 * char
5340 * <!ENTITY lt "<">
5341 * Which seems to be valid since
5342 * 2.4: The ampersand character (&) and the left angle
5343 * bracket (<) may appear in their literal form only
5344 * when used ... They are also legal within the literal
5345 * entity value of an internal entity declaration;i
5346 * see "4.3.2 Well-Formed Parsed Entities".
5347 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5348 * Looking at the OASIS test suite and James Clark
5349 * tests, this is broken. However the XML REC uses
5350 * it. Is the XML REC not well-formed ????
5351 * This is a hack to avoid this problem
5352 *
5353 * ANSWER: since lt gt amp .. are already defined,
5354 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005355 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005356 * is lousy but acceptable.
5357 */
5358 list = xmlNewDocText(ctxt->myDoc, value);
5359 if (list != NULL) {
5360 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5361 (ent->children == NULL)) {
5362 ent->children = list;
5363 ent->last = list;
5364 list->parent = (xmlNodePtr) ent;
5365 } else {
5366 xmlFreeNodeList(list);
5367 }
5368 } else if (list != NULL) {
5369 xmlFreeNodeList(list);
5370 }
5371 } else {
5372 /*
5373 * 4.3.2: An internal general parsed entity is well-formed
5374 * if its replacement text matches the production labeled
5375 * content.
5376 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005377
5378 void *user_data;
5379 /*
5380 * This is a bit hackish but this seems the best
5381 * way to make sure both SAX and DOM entity support
5382 * behaves okay.
5383 */
5384 if (ctxt->userData == ctxt)
5385 user_data = NULL;
5386 else
5387 user_data = ctxt->userData;
5388
Owen Taylor3473f882001-02-23 17:55:21 +00005389 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5390 ctxt->depth++;
5391 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005392 ctxt->sax, user_data, ctxt->depth,
Owen Taylor3473f882001-02-23 17:55:21 +00005393 value, &list);
5394 ctxt->depth--;
5395 } else if (ent->etype ==
5396 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5397 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005398 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005399 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005400 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005401 ctxt->depth--;
5402 } else {
5403 ret = -1;
5404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5405 ctxt->sax->error(ctxt->userData,
5406 "Internal: invalid entity type\n");
5407 }
5408 if (ret == XML_ERR_ENTITY_LOOP) {
5409 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5411 ctxt->sax->error(ctxt->userData,
5412 "Detected entity reference loop\n");
5413 ctxt->wellFormed = 0;
5414 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005415 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005416 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005417 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5418 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005419 (ent->children == NULL)) {
5420 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005421 if (ctxt->replaceEntities) {
5422 /*
5423 * Prune it directly in the generated document
5424 * except for single text nodes.
5425 */
5426 if ((list->type == XML_TEXT_NODE) &&
5427 (list->next == NULL)) {
5428 list->parent = (xmlNodePtr) ent;
5429 list = NULL;
5430 } else {
5431 while (list != NULL) {
5432 list->parent = (xmlNodePtr) ctxt->node;
5433 if (list->next == NULL)
5434 ent->last = list;
5435 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005436 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005437 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005438 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5439 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005440 }
5441 } else {
5442 while (list != NULL) {
5443 list->parent = (xmlNodePtr) ent;
5444 if (list->next == NULL)
5445 ent->last = list;
5446 list = list->next;
5447 }
Owen Taylor3473f882001-02-23 17:55:21 +00005448 }
5449 } else {
5450 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005451 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005452 }
5453 } else if (ret > 0) {
5454 ctxt->errNo = ret;
5455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5456 ctxt->sax->error(ctxt->userData,
5457 "Entity value required\n");
5458 ctxt->wellFormed = 0;
5459 ctxt->disableSAX = 1;
5460 } else if (list != NULL) {
5461 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005462 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005463 }
5464 }
5465 }
5466 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5467 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5468 /*
5469 * Create a node.
5470 */
5471 ctxt->sax->reference(ctxt->userData, ent->name);
5472 return;
5473 } else if (ctxt->replaceEntities) {
5474 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5475 /*
5476 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005477 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005478 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005479 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005480 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005481 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005482 cur = ent->children;
5483 while (cur != NULL) {
5484 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005485 if (firstChild == NULL){
5486 firstChild = new;
5487 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005488 xmlAddChild(ctxt->node, new);
5489 if (cur == ent->last)
5490 break;
5491 cur = cur->next;
5492 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005493 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5494 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005495 } else {
5496 /*
5497 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005498 * node with a possible previous text one which
5499 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005500 */
5501 if (ent->children->type == XML_TEXT_NODE)
5502 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5503 if ((ent->last != ent->children) &&
5504 (ent->last->type == XML_TEXT_NODE))
5505 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5506 xmlAddChildList(ctxt->node, ent->children);
5507 }
5508
Owen Taylor3473f882001-02-23 17:55:21 +00005509 /*
5510 * This is to avoid a nasty side effect, see
5511 * characters() in SAX.c
5512 */
5513 ctxt->nodemem = 0;
5514 ctxt->nodelen = 0;
5515 return;
5516 } else {
5517 /*
5518 * Probably running in SAX mode
5519 */
5520 xmlParserInputPtr input;
5521
5522 input = xmlNewEntityInputStream(ctxt, ent);
5523 xmlPushInput(ctxt, input);
5524 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5525 (RAW == '<') && (NXT(1) == '?') &&
5526 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5527 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5528 xmlParseTextDecl(ctxt);
5529 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5530 /*
5531 * The XML REC instructs us to stop parsing right here
5532 */
5533 ctxt->instate = XML_PARSER_EOF;
5534 return;
5535 }
5536 if (input->standalone == 1) {
5537 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5539 ctxt->sax->error(ctxt->userData,
5540 "external parsed entities cannot be standalone\n");
5541 ctxt->wellFormed = 0;
5542 ctxt->disableSAX = 1;
5543 }
5544 }
5545 return;
5546 }
5547 }
5548 } else {
5549 val = ent->content;
5550 if (val == NULL) return;
5551 /*
5552 * inline the entity.
5553 */
5554 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5555 (!ctxt->disableSAX))
5556 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5557 }
5558 }
5559}
5560
5561/**
5562 * xmlParseEntityRef:
5563 * @ctxt: an XML parser context
5564 *
5565 * parse ENTITY references declarations
5566 *
5567 * [68] EntityRef ::= '&' Name ';'
5568 *
5569 * [ WFC: Entity Declared ]
5570 * In a document without any DTD, a document with only an internal DTD
5571 * subset which contains no parameter entity references, or a document
5572 * with "standalone='yes'", the Name given in the entity reference
5573 * must match that in an entity declaration, except that well-formed
5574 * documents need not declare any of the following entities: amp, lt,
5575 * gt, apos, quot. The declaration of a parameter entity must precede
5576 * any reference to it. Similarly, the declaration of a general entity
5577 * must precede any reference to it which appears in a default value in an
5578 * attribute-list declaration. Note that if entities are declared in the
5579 * external subset or in external parameter entities, a non-validating
5580 * processor is not obligated to read and process their declarations;
5581 * for such documents, the rule that an entity must be declared is a
5582 * well-formedness constraint only if standalone='yes'.
5583 *
5584 * [ WFC: Parsed Entity ]
5585 * An entity reference must not contain the name of an unparsed entity
5586 *
5587 * Returns the xmlEntityPtr if found, or NULL otherwise.
5588 */
5589xmlEntityPtr
5590xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5591 xmlChar *name;
5592 xmlEntityPtr ent = NULL;
5593
5594 GROW;
5595
5596 if (RAW == '&') {
5597 NEXT;
5598 name = xmlParseName(ctxt);
5599 if (name == NULL) {
5600 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5602 ctxt->sax->error(ctxt->userData,
5603 "xmlParseEntityRef: no name\n");
5604 ctxt->wellFormed = 0;
5605 ctxt->disableSAX = 1;
5606 } else {
5607 if (RAW == ';') {
5608 NEXT;
5609 /*
5610 * Ask first SAX for entity resolution, otherwise try the
5611 * predefined set.
5612 */
5613 if (ctxt->sax != NULL) {
5614 if (ctxt->sax->getEntity != NULL)
5615 ent = ctxt->sax->getEntity(ctxt->userData, name);
5616 if (ent == NULL)
5617 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005618 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5619 ent = getEntity(ctxt, name);
5620 }
Owen Taylor3473f882001-02-23 17:55:21 +00005621 }
5622 /*
5623 * [ WFC: Entity Declared ]
5624 * In a document without any DTD, a document with only an
5625 * internal DTD subset which contains no parameter entity
5626 * references, or a document with "standalone='yes'", the
5627 * Name given in the entity reference must match that in an
5628 * entity declaration, except that well-formed documents
5629 * need not declare any of the following entities: amp, lt,
5630 * gt, apos, quot.
5631 * The declaration of a parameter entity must precede any
5632 * reference to it.
5633 * Similarly, the declaration of a general entity must
5634 * precede any reference to it which appears in a default
5635 * value in an attribute-list declaration. Note that if
5636 * entities are declared in the external subset or in
5637 * external parameter entities, a non-validating processor
5638 * is not obligated to read and process their declarations;
5639 * for such documents, the rule that an entity must be
5640 * declared is a well-formedness constraint only if
5641 * standalone='yes'.
5642 */
5643 if (ent == NULL) {
5644 if ((ctxt->standalone == 1) ||
5645 ((ctxt->hasExternalSubset == 0) &&
5646 (ctxt->hasPErefs == 0))) {
5647 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5649 ctxt->sax->error(ctxt->userData,
5650 "Entity '%s' not defined\n", name);
5651 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005652 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005653 ctxt->disableSAX = 1;
5654 } else {
5655 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005657 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005658 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005659 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005660 }
5661 }
5662
5663 /*
5664 * [ WFC: Parsed Entity ]
5665 * An entity reference must not contain the name of an
5666 * unparsed entity
5667 */
5668 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5669 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5671 ctxt->sax->error(ctxt->userData,
5672 "Entity reference to unparsed entity %s\n", name);
5673 ctxt->wellFormed = 0;
5674 ctxt->disableSAX = 1;
5675 }
5676
5677 /*
5678 * [ WFC: No External Entity References ]
5679 * Attribute values cannot contain direct or indirect
5680 * entity references to external entities.
5681 */
5682 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5683 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5684 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5686 ctxt->sax->error(ctxt->userData,
5687 "Attribute references external entity '%s'\n", name);
5688 ctxt->wellFormed = 0;
5689 ctxt->disableSAX = 1;
5690 }
5691 /*
5692 * [ WFC: No < in Attribute Values ]
5693 * The replacement text of any entity referred to directly or
5694 * indirectly in an attribute value (other than "&lt;") must
5695 * not contain a <.
5696 */
5697 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5698 (ent != NULL) &&
5699 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5700 (ent->content != NULL) &&
5701 (xmlStrchr(ent->content, '<'))) {
5702 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5704 ctxt->sax->error(ctxt->userData,
5705 "'<' in entity '%s' is not allowed in attributes values\n", name);
5706 ctxt->wellFormed = 0;
5707 ctxt->disableSAX = 1;
5708 }
5709
5710 /*
5711 * Internal check, no parameter entities here ...
5712 */
5713 else {
5714 switch (ent->etype) {
5715 case XML_INTERNAL_PARAMETER_ENTITY:
5716 case XML_EXTERNAL_PARAMETER_ENTITY:
5717 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5719 ctxt->sax->error(ctxt->userData,
5720 "Attempt to reference the parameter entity '%s'\n", name);
5721 ctxt->wellFormed = 0;
5722 ctxt->disableSAX = 1;
5723 break;
5724 default:
5725 break;
5726 }
5727 }
5728
5729 /*
5730 * [ WFC: No Recursion ]
5731 * A parsed entity must not contain a recursive reference
5732 * to itself, either directly or indirectly.
5733 * Done somewhere else
5734 */
5735
5736 } else {
5737 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5739 ctxt->sax->error(ctxt->userData,
5740 "xmlParseEntityRef: expecting ';'\n");
5741 ctxt->wellFormed = 0;
5742 ctxt->disableSAX = 1;
5743 }
5744 xmlFree(name);
5745 }
5746 }
5747 return(ent);
5748}
5749
5750/**
5751 * xmlParseStringEntityRef:
5752 * @ctxt: an XML parser context
5753 * @str: a pointer to an index in the string
5754 *
5755 * parse ENTITY references declarations, but this version parses it from
5756 * a string value.
5757 *
5758 * [68] EntityRef ::= '&' Name ';'
5759 *
5760 * [ WFC: Entity Declared ]
5761 * In a document without any DTD, a document with only an internal DTD
5762 * subset which contains no parameter entity references, or a document
5763 * with "standalone='yes'", the Name given in the entity reference
5764 * must match that in an entity declaration, except that well-formed
5765 * documents need not declare any of the following entities: amp, lt,
5766 * gt, apos, quot. The declaration of a parameter entity must precede
5767 * any reference to it. Similarly, the declaration of a general entity
5768 * must precede any reference to it which appears in a default value in an
5769 * attribute-list declaration. Note that if entities are declared in the
5770 * external subset or in external parameter entities, a non-validating
5771 * processor is not obligated to read and process their declarations;
5772 * for such documents, the rule that an entity must be declared is a
5773 * well-formedness constraint only if standalone='yes'.
5774 *
5775 * [ WFC: Parsed Entity ]
5776 * An entity reference must not contain the name of an unparsed entity
5777 *
5778 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5779 * is updated to the current location in the string.
5780 */
5781xmlEntityPtr
5782xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5783 xmlChar *name;
5784 const xmlChar *ptr;
5785 xmlChar cur;
5786 xmlEntityPtr ent = NULL;
5787
5788 if ((str == NULL) || (*str == NULL))
5789 return(NULL);
5790 ptr = *str;
5791 cur = *ptr;
5792 if (cur == '&') {
5793 ptr++;
5794 cur = *ptr;
5795 name = xmlParseStringName(ctxt, &ptr);
5796 if (name == NULL) {
5797 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5799 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005800 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005801 ctxt->wellFormed = 0;
5802 ctxt->disableSAX = 1;
5803 } else {
5804 if (*ptr == ';') {
5805 ptr++;
5806 /*
5807 * Ask first SAX for entity resolution, otherwise try the
5808 * predefined set.
5809 */
5810 if (ctxt->sax != NULL) {
5811 if (ctxt->sax->getEntity != NULL)
5812 ent = ctxt->sax->getEntity(ctxt->userData, name);
5813 if (ent == NULL)
5814 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005815 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5816 ent = getEntity(ctxt, name);
5817 }
Owen Taylor3473f882001-02-23 17:55:21 +00005818 }
5819 /*
5820 * [ WFC: Entity Declared ]
5821 * In a document without any DTD, a document with only an
5822 * internal DTD subset which contains no parameter entity
5823 * references, or a document with "standalone='yes'", the
5824 * Name given in the entity reference must match that in an
5825 * entity declaration, except that well-formed documents
5826 * need not declare any of the following entities: amp, lt,
5827 * gt, apos, quot.
5828 * The declaration of a parameter entity must precede any
5829 * reference to it.
5830 * Similarly, the declaration of a general entity must
5831 * precede any reference to it which appears in a default
5832 * value in an attribute-list declaration. Note that if
5833 * entities are declared in the external subset or in
5834 * external parameter entities, a non-validating processor
5835 * is not obligated to read and process their declarations;
5836 * for such documents, the rule that an entity must be
5837 * declared is a well-formedness constraint only if
5838 * standalone='yes'.
5839 */
5840 if (ent == NULL) {
5841 if ((ctxt->standalone == 1) ||
5842 ((ctxt->hasExternalSubset == 0) &&
5843 (ctxt->hasPErefs == 0))) {
5844 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5846 ctxt->sax->error(ctxt->userData,
5847 "Entity '%s' not defined\n", name);
5848 ctxt->wellFormed = 0;
5849 ctxt->disableSAX = 1;
5850 } else {
5851 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5852 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5853 ctxt->sax->warning(ctxt->userData,
5854 "Entity '%s' not defined\n", name);
5855 }
5856 }
5857
5858 /*
5859 * [ WFC: Parsed Entity ]
5860 * An entity reference must not contain the name of an
5861 * unparsed entity
5862 */
5863 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5864 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5866 ctxt->sax->error(ctxt->userData,
5867 "Entity reference to unparsed entity %s\n", name);
5868 ctxt->wellFormed = 0;
5869 ctxt->disableSAX = 1;
5870 }
5871
5872 /*
5873 * [ WFC: No External Entity References ]
5874 * Attribute values cannot contain direct or indirect
5875 * entity references to external entities.
5876 */
5877 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5878 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5879 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5881 ctxt->sax->error(ctxt->userData,
5882 "Attribute references external entity '%s'\n", name);
5883 ctxt->wellFormed = 0;
5884 ctxt->disableSAX = 1;
5885 }
5886 /*
5887 * [ WFC: No < in Attribute Values ]
5888 * The replacement text of any entity referred to directly or
5889 * indirectly in an attribute value (other than "&lt;") must
5890 * not contain a <.
5891 */
5892 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5893 (ent != NULL) &&
5894 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5895 (ent->content != NULL) &&
5896 (xmlStrchr(ent->content, '<'))) {
5897 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5899 ctxt->sax->error(ctxt->userData,
5900 "'<' in entity '%s' is not allowed in attributes values\n", name);
5901 ctxt->wellFormed = 0;
5902 ctxt->disableSAX = 1;
5903 }
5904
5905 /*
5906 * Internal check, no parameter entities here ...
5907 */
5908 else {
5909 switch (ent->etype) {
5910 case XML_INTERNAL_PARAMETER_ENTITY:
5911 case XML_EXTERNAL_PARAMETER_ENTITY:
5912 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5914 ctxt->sax->error(ctxt->userData,
5915 "Attempt to reference the parameter entity '%s'\n", name);
5916 ctxt->wellFormed = 0;
5917 ctxt->disableSAX = 1;
5918 break;
5919 default:
5920 break;
5921 }
5922 }
5923
5924 /*
5925 * [ WFC: No Recursion ]
5926 * A parsed entity must not contain a recursive reference
5927 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005928 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005929 */
5930
5931 } else {
5932 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5934 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005935 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005936 ctxt->wellFormed = 0;
5937 ctxt->disableSAX = 1;
5938 }
5939 xmlFree(name);
5940 }
5941 }
5942 *str = ptr;
5943 return(ent);
5944}
5945
5946/**
5947 * xmlParsePEReference:
5948 * @ctxt: an XML parser context
5949 *
5950 * parse PEReference declarations
5951 * The entity content is handled directly by pushing it's content as
5952 * a new input stream.
5953 *
5954 * [69] PEReference ::= '%' Name ';'
5955 *
5956 * [ WFC: No Recursion ]
5957 * A parsed entity must not contain a recursive
5958 * reference to itself, either directly or indirectly.
5959 *
5960 * [ WFC: Entity Declared ]
5961 * In a document without any DTD, a document with only an internal DTD
5962 * subset which contains no parameter entity references, or a document
5963 * with "standalone='yes'", ... ... The declaration of a parameter
5964 * entity must precede any reference to it...
5965 *
5966 * [ VC: Entity Declared ]
5967 * In a document with an external subset or external parameter entities
5968 * with "standalone='no'", ... ... The declaration of a parameter entity
5969 * must precede any reference to it...
5970 *
5971 * [ WFC: In DTD ]
5972 * Parameter-entity references may only appear in the DTD.
5973 * NOTE: misleading but this is handled.
5974 */
5975void
5976xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5977 xmlChar *name;
5978 xmlEntityPtr entity = NULL;
5979 xmlParserInputPtr input;
5980
5981 if (RAW == '%') {
5982 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005983 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005984 if (name == NULL) {
5985 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5987 ctxt->sax->error(ctxt->userData,
5988 "xmlParsePEReference: no name\n");
5989 ctxt->wellFormed = 0;
5990 ctxt->disableSAX = 1;
5991 } else {
5992 if (RAW == ';') {
5993 NEXT;
5994 if ((ctxt->sax != NULL) &&
5995 (ctxt->sax->getParameterEntity != NULL))
5996 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5997 name);
5998 if (entity == NULL) {
5999 /*
6000 * [ WFC: Entity Declared ]
6001 * In a document without any DTD, a document with only an
6002 * internal DTD subset which contains no parameter entity
6003 * references, or a document with "standalone='yes'", ...
6004 * ... The declaration of a parameter entity must precede
6005 * any reference to it...
6006 */
6007 if ((ctxt->standalone == 1) ||
6008 ((ctxt->hasExternalSubset == 0) &&
6009 (ctxt->hasPErefs == 0))) {
6010 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6011 if ((!ctxt->disableSAX) &&
6012 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6013 ctxt->sax->error(ctxt->userData,
6014 "PEReference: %%%s; not found\n", name);
6015 ctxt->wellFormed = 0;
6016 ctxt->disableSAX = 1;
6017 } else {
6018 /*
6019 * [ VC: Entity Declared ]
6020 * In a document with an external subset or external
6021 * parameter entities with "standalone='no'", ...
6022 * ... The declaration of a parameter entity must precede
6023 * any reference to it...
6024 */
6025 if ((!ctxt->disableSAX) &&
6026 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6027 ctxt->sax->warning(ctxt->userData,
6028 "PEReference: %%%s; not found\n", name);
6029 ctxt->valid = 0;
6030 }
6031 } else {
6032 /*
6033 * Internal checking in case the entity quest barfed
6034 */
6035 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6036 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6037 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6038 ctxt->sax->warning(ctxt->userData,
6039 "Internal: %%%s; is not a parameter entity\n", name);
6040 } else {
6041 /*
6042 * TODO !!!
6043 * handle the extra spaces added before and after
6044 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6045 */
6046 input = xmlNewEntityInputStream(ctxt, entity);
6047 xmlPushInput(ctxt, input);
6048 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6049 (RAW == '<') && (NXT(1) == '?') &&
6050 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6051 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6052 xmlParseTextDecl(ctxt);
6053 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6054 /*
6055 * The XML REC instructs us to stop parsing
6056 * right here
6057 */
6058 ctxt->instate = XML_PARSER_EOF;
6059 xmlFree(name);
6060 return;
6061 }
6062 }
6063 if (ctxt->token == 0)
6064 ctxt->token = ' ';
6065 }
6066 }
6067 ctxt->hasPErefs = 1;
6068 } else {
6069 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6070 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6071 ctxt->sax->error(ctxt->userData,
6072 "xmlParsePEReference: expecting ';'\n");
6073 ctxt->wellFormed = 0;
6074 ctxt->disableSAX = 1;
6075 }
6076 xmlFree(name);
6077 }
6078 }
6079}
6080
6081/**
6082 * xmlParseStringPEReference:
6083 * @ctxt: an XML parser context
6084 * @str: a pointer to an index in the string
6085 *
6086 * parse PEReference declarations
6087 *
6088 * [69] PEReference ::= '%' Name ';'
6089 *
6090 * [ WFC: No Recursion ]
6091 * A parsed entity must not contain a recursive
6092 * reference to itself, either directly or indirectly.
6093 *
6094 * [ WFC: Entity Declared ]
6095 * In a document without any DTD, a document with only an internal DTD
6096 * subset which contains no parameter entity references, or a document
6097 * with "standalone='yes'", ... ... The declaration of a parameter
6098 * entity must precede any reference to it...
6099 *
6100 * [ VC: Entity Declared ]
6101 * In a document with an external subset or external parameter entities
6102 * with "standalone='no'", ... ... The declaration of a parameter entity
6103 * must precede any reference to it...
6104 *
6105 * [ WFC: In DTD ]
6106 * Parameter-entity references may only appear in the DTD.
6107 * NOTE: misleading but this is handled.
6108 *
6109 * Returns the string of the entity content.
6110 * str is updated to the current value of the index
6111 */
6112xmlEntityPtr
6113xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6114 const xmlChar *ptr;
6115 xmlChar cur;
6116 xmlChar *name;
6117 xmlEntityPtr entity = NULL;
6118
6119 if ((str == NULL) || (*str == NULL)) return(NULL);
6120 ptr = *str;
6121 cur = *ptr;
6122 if (cur == '%') {
6123 ptr++;
6124 cur = *ptr;
6125 name = xmlParseStringName(ctxt, &ptr);
6126 if (name == NULL) {
6127 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6129 ctxt->sax->error(ctxt->userData,
6130 "xmlParseStringPEReference: no name\n");
6131 ctxt->wellFormed = 0;
6132 ctxt->disableSAX = 1;
6133 } else {
6134 cur = *ptr;
6135 if (cur == ';') {
6136 ptr++;
6137 cur = *ptr;
6138 if ((ctxt->sax != NULL) &&
6139 (ctxt->sax->getParameterEntity != NULL))
6140 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6141 name);
6142 if (entity == NULL) {
6143 /*
6144 * [ WFC: Entity Declared ]
6145 * In a document without any DTD, a document with only an
6146 * internal DTD subset which contains no parameter entity
6147 * references, or a document with "standalone='yes'", ...
6148 * ... The declaration of a parameter entity must precede
6149 * any reference to it...
6150 */
6151 if ((ctxt->standalone == 1) ||
6152 ((ctxt->hasExternalSubset == 0) &&
6153 (ctxt->hasPErefs == 0))) {
6154 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6155 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6156 ctxt->sax->error(ctxt->userData,
6157 "PEReference: %%%s; not found\n", name);
6158 ctxt->wellFormed = 0;
6159 ctxt->disableSAX = 1;
6160 } else {
6161 /*
6162 * [ VC: Entity Declared ]
6163 * In a document with an external subset or external
6164 * parameter entities with "standalone='no'", ...
6165 * ... The declaration of a parameter entity must
6166 * precede any reference to it...
6167 */
6168 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6169 ctxt->sax->warning(ctxt->userData,
6170 "PEReference: %%%s; not found\n", name);
6171 ctxt->valid = 0;
6172 }
6173 } else {
6174 /*
6175 * Internal checking in case the entity quest barfed
6176 */
6177 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6178 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6179 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6180 ctxt->sax->warning(ctxt->userData,
6181 "Internal: %%%s; is not a parameter entity\n", name);
6182 }
6183 }
6184 ctxt->hasPErefs = 1;
6185 } else {
6186 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6188 ctxt->sax->error(ctxt->userData,
6189 "xmlParseStringPEReference: expecting ';'\n");
6190 ctxt->wellFormed = 0;
6191 ctxt->disableSAX = 1;
6192 }
6193 xmlFree(name);
6194 }
6195 }
6196 *str = ptr;
6197 return(entity);
6198}
6199
6200/**
6201 * xmlParseDocTypeDecl:
6202 * @ctxt: an XML parser context
6203 *
6204 * parse a DOCTYPE declaration
6205 *
6206 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6207 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6208 *
6209 * [ VC: Root Element Type ]
6210 * The Name in the document type declaration must match the element
6211 * type of the root element.
6212 */
6213
6214void
6215xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6216 xmlChar *name = NULL;
6217 xmlChar *ExternalID = NULL;
6218 xmlChar *URI = NULL;
6219
6220 /*
6221 * We know that '<!DOCTYPE' has been detected.
6222 */
6223 SKIP(9);
6224
6225 SKIP_BLANKS;
6226
6227 /*
6228 * Parse the DOCTYPE name.
6229 */
6230 name = xmlParseName(ctxt);
6231 if (name == NULL) {
6232 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6234 ctxt->sax->error(ctxt->userData,
6235 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6236 ctxt->wellFormed = 0;
6237 ctxt->disableSAX = 1;
6238 }
6239 ctxt->intSubName = name;
6240
6241 SKIP_BLANKS;
6242
6243 /*
6244 * Check for SystemID and ExternalID
6245 */
6246 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6247
6248 if ((URI != NULL) || (ExternalID != NULL)) {
6249 ctxt->hasExternalSubset = 1;
6250 }
6251 ctxt->extSubURI = URI;
6252 ctxt->extSubSystem = ExternalID;
6253
6254 SKIP_BLANKS;
6255
6256 /*
6257 * Create and update the internal subset.
6258 */
6259 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6260 (!ctxt->disableSAX))
6261 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6262
6263 /*
6264 * Is there any internal subset declarations ?
6265 * they are handled separately in xmlParseInternalSubset()
6266 */
6267 if (RAW == '[')
6268 return;
6269
6270 /*
6271 * We should be at the end of the DOCTYPE declaration.
6272 */
6273 if (RAW != '>') {
6274 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6275 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006276 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006277 ctxt->wellFormed = 0;
6278 ctxt->disableSAX = 1;
6279 }
6280 NEXT;
6281}
6282
6283/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006284 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006285 * @ctxt: an XML parser context
6286 *
6287 * parse the internal subset declaration
6288 *
6289 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6290 */
6291
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006292static void
Owen Taylor3473f882001-02-23 17:55:21 +00006293xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6294 /*
6295 * Is there any DTD definition ?
6296 */
6297 if (RAW == '[') {
6298 ctxt->instate = XML_PARSER_DTD;
6299 NEXT;
6300 /*
6301 * Parse the succession of Markup declarations and
6302 * PEReferences.
6303 * Subsequence (markupdecl | PEReference | S)*
6304 */
6305 while (RAW != ']') {
6306 const xmlChar *check = CUR_PTR;
6307 int cons = ctxt->input->consumed;
6308
6309 SKIP_BLANKS;
6310 xmlParseMarkupDecl(ctxt);
6311 xmlParsePEReference(ctxt);
6312
6313 /*
6314 * Pop-up of finished entities.
6315 */
6316 while ((RAW == 0) && (ctxt->inputNr > 1))
6317 xmlPopInput(ctxt);
6318
6319 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6320 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6322 ctxt->sax->error(ctxt->userData,
6323 "xmlParseInternalSubset: error detected in Markup declaration\n");
6324 ctxt->wellFormed = 0;
6325 ctxt->disableSAX = 1;
6326 break;
6327 }
6328 }
6329 if (RAW == ']') {
6330 NEXT;
6331 SKIP_BLANKS;
6332 }
6333 }
6334
6335 /*
6336 * We should be at the end of the DOCTYPE declaration.
6337 */
6338 if (RAW != '>') {
6339 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006341 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006342 ctxt->wellFormed = 0;
6343 ctxt->disableSAX = 1;
6344 }
6345 NEXT;
6346}
6347
6348/**
6349 * xmlParseAttribute:
6350 * @ctxt: an XML parser context
6351 * @value: a xmlChar ** used to store the value of the attribute
6352 *
6353 * parse an attribute
6354 *
6355 * [41] Attribute ::= Name Eq AttValue
6356 *
6357 * [ WFC: No External Entity References ]
6358 * Attribute values cannot contain direct or indirect entity references
6359 * to external entities.
6360 *
6361 * [ WFC: No < in Attribute Values ]
6362 * The replacement text of any entity referred to directly or indirectly in
6363 * an attribute value (other than "&lt;") must not contain a <.
6364 *
6365 * [ VC: Attribute Value Type ]
6366 * The attribute must have been declared; the value must be of the type
6367 * declared for it.
6368 *
6369 * [25] Eq ::= S? '=' S?
6370 *
6371 * With namespace:
6372 *
6373 * [NS 11] Attribute ::= QName Eq AttValue
6374 *
6375 * Also the case QName == xmlns:??? is handled independently as a namespace
6376 * definition.
6377 *
6378 * Returns the attribute name, and the value in *value.
6379 */
6380
6381xmlChar *
6382xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6383 xmlChar *name, *val;
6384
6385 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006386 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006387 name = xmlParseName(ctxt);
6388 if (name == NULL) {
6389 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6390 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6391 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6392 ctxt->wellFormed = 0;
6393 ctxt->disableSAX = 1;
6394 return(NULL);
6395 }
6396
6397 /*
6398 * read the value
6399 */
6400 SKIP_BLANKS;
6401 if (RAW == '=') {
6402 NEXT;
6403 SKIP_BLANKS;
6404 val = xmlParseAttValue(ctxt);
6405 ctxt->instate = XML_PARSER_CONTENT;
6406 } else {
6407 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6409 ctxt->sax->error(ctxt->userData,
6410 "Specification mandate value for attribute %s\n", name);
6411 ctxt->wellFormed = 0;
6412 ctxt->disableSAX = 1;
6413 xmlFree(name);
6414 return(NULL);
6415 }
6416
6417 /*
6418 * Check that xml:lang conforms to the specification
6419 * No more registered as an error, just generate a warning now
6420 * since this was deprecated in XML second edition
6421 */
6422 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6423 if (!xmlCheckLanguageID(val)) {
6424 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6425 ctxt->sax->warning(ctxt->userData,
6426 "Malformed value for xml:lang : %s\n", val);
6427 }
6428 }
6429
6430 /*
6431 * Check that xml:space conforms to the specification
6432 */
6433 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6434 if (xmlStrEqual(val, BAD_CAST "default"))
6435 *(ctxt->space) = 0;
6436 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6437 *(ctxt->space) = 1;
6438 else {
6439 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6440 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6441 ctxt->sax->error(ctxt->userData,
6442"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6443 val);
6444 ctxt->wellFormed = 0;
6445 ctxt->disableSAX = 1;
6446 }
6447 }
6448
6449 *value = val;
6450 return(name);
6451}
6452
6453/**
6454 * xmlParseStartTag:
6455 * @ctxt: an XML parser context
6456 *
6457 * parse a start of tag either for rule element or
6458 * EmptyElement. In both case we don't parse the tag closing chars.
6459 *
6460 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6461 *
6462 * [ WFC: Unique Att Spec ]
6463 * No attribute name may appear more than once in the same start-tag or
6464 * empty-element tag.
6465 *
6466 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6467 *
6468 * [ WFC: Unique Att Spec ]
6469 * No attribute name may appear more than once in the same start-tag or
6470 * empty-element tag.
6471 *
6472 * With namespace:
6473 *
6474 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6475 *
6476 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6477 *
6478 * Returns the element name parsed
6479 */
6480
6481xmlChar *
6482xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6483 xmlChar *name;
6484 xmlChar *attname;
6485 xmlChar *attvalue;
6486 const xmlChar **atts = NULL;
6487 int nbatts = 0;
6488 int maxatts = 0;
6489 int i;
6490
6491 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006492 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006493
6494 name = xmlParseName(ctxt);
6495 if (name == NULL) {
6496 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6498 ctxt->sax->error(ctxt->userData,
6499 "xmlParseStartTag: invalid element name\n");
6500 ctxt->wellFormed = 0;
6501 ctxt->disableSAX = 1;
6502 return(NULL);
6503 }
6504
6505 /*
6506 * Now parse the attributes, it ends up with the ending
6507 *
6508 * (S Attribute)* S?
6509 */
6510 SKIP_BLANKS;
6511 GROW;
6512
Daniel Veillard21a0f912001-02-25 19:54:14 +00006513 while ((RAW != '>') &&
6514 ((RAW != '/') || (NXT(1) != '>')) &&
6515 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006516 const xmlChar *q = CUR_PTR;
6517 int cons = ctxt->input->consumed;
6518
6519 attname = xmlParseAttribute(ctxt, &attvalue);
6520 if ((attname != NULL) && (attvalue != NULL)) {
6521 /*
6522 * [ WFC: Unique Att Spec ]
6523 * No attribute name may appear more than once in the same
6524 * start-tag or empty-element tag.
6525 */
6526 for (i = 0; i < nbatts;i += 2) {
6527 if (xmlStrEqual(atts[i], attname)) {
6528 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6530 ctxt->sax->error(ctxt->userData,
6531 "Attribute %s redefined\n",
6532 attname);
6533 ctxt->wellFormed = 0;
6534 ctxt->disableSAX = 1;
6535 xmlFree(attname);
6536 xmlFree(attvalue);
6537 goto failed;
6538 }
6539 }
6540
6541 /*
6542 * Add the pair to atts
6543 */
6544 if (atts == NULL) {
6545 maxatts = 10;
6546 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6547 if (atts == NULL) {
6548 xmlGenericError(xmlGenericErrorContext,
6549 "malloc of %ld byte failed\n",
6550 maxatts * (long)sizeof(xmlChar *));
6551 return(NULL);
6552 }
6553 } else if (nbatts + 4 > maxatts) {
6554 maxatts *= 2;
6555 atts = (const xmlChar **) xmlRealloc((void *) atts,
6556 maxatts * sizeof(xmlChar *));
6557 if (atts == NULL) {
6558 xmlGenericError(xmlGenericErrorContext,
6559 "realloc of %ld byte failed\n",
6560 maxatts * (long)sizeof(xmlChar *));
6561 return(NULL);
6562 }
6563 }
6564 atts[nbatts++] = attname;
6565 atts[nbatts++] = attvalue;
6566 atts[nbatts] = NULL;
6567 atts[nbatts + 1] = NULL;
6568 } else {
6569 if (attname != NULL)
6570 xmlFree(attname);
6571 if (attvalue != NULL)
6572 xmlFree(attvalue);
6573 }
6574
6575failed:
6576
6577 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6578 break;
6579 if (!IS_BLANK(RAW)) {
6580 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6582 ctxt->sax->error(ctxt->userData,
6583 "attributes construct error\n");
6584 ctxt->wellFormed = 0;
6585 ctxt->disableSAX = 1;
6586 }
6587 SKIP_BLANKS;
6588 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6589 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6591 ctxt->sax->error(ctxt->userData,
6592 "xmlParseStartTag: problem parsing attributes\n");
6593 ctxt->wellFormed = 0;
6594 ctxt->disableSAX = 1;
6595 break;
6596 }
6597 GROW;
6598 }
6599
6600 /*
6601 * SAX: Start of Element !
6602 */
6603 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6604 (!ctxt->disableSAX))
6605 ctxt->sax->startElement(ctxt->userData, name, atts);
6606
6607 if (atts != NULL) {
6608 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6609 xmlFree((void *) atts);
6610 }
6611 return(name);
6612}
6613
6614/**
6615 * xmlParseEndTag:
6616 * @ctxt: an XML parser context
6617 *
6618 * parse an end of tag
6619 *
6620 * [42] ETag ::= '</' Name S? '>'
6621 *
6622 * With namespace
6623 *
6624 * [NS 9] ETag ::= '</' QName S? '>'
6625 */
6626
6627void
6628xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6629 xmlChar *name;
6630 xmlChar *oldname;
6631
6632 GROW;
6633 if ((RAW != '<') || (NXT(1) != '/')) {
6634 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6636 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6637 ctxt->wellFormed = 0;
6638 ctxt->disableSAX = 1;
6639 return;
6640 }
6641 SKIP(2);
6642
Daniel Veillard46de64e2002-05-29 08:21:33 +00006643 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006644
6645 /*
6646 * We should definitely be at the ending "S? '>'" part
6647 */
6648 GROW;
6649 SKIP_BLANKS;
6650 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6651 ctxt->errNo = XML_ERR_GT_REQUIRED;
6652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6653 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6654 ctxt->wellFormed = 0;
6655 ctxt->disableSAX = 1;
6656 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006657 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006658
6659 /*
6660 * [ WFC: Element Type Match ]
6661 * The Name in an element's end-tag must match the element type in the
6662 * start-tag.
6663 *
6664 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006665 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006666 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006668 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006669 ctxt->sax->error(ctxt->userData,
6670 "Opening and ending tag mismatch: %s and %s\n",
6671 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006672 xmlFree(name);
6673 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006674 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006675 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006676 }
6677
6678 }
6679 ctxt->wellFormed = 0;
6680 ctxt->disableSAX = 1;
6681 }
6682
6683 /*
6684 * SAX: End of Tag
6685 */
6686 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6687 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006688 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006689
Owen Taylor3473f882001-02-23 17:55:21 +00006690 oldname = namePop(ctxt);
6691 spacePop(ctxt);
6692 if (oldname != NULL) {
6693#ifdef DEBUG_STACK
6694 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6695#endif
6696 xmlFree(oldname);
6697 }
6698 return;
6699}
6700
6701/**
6702 * xmlParseCDSect:
6703 * @ctxt: an XML parser context
6704 *
6705 * Parse escaped pure raw content.
6706 *
6707 * [18] CDSect ::= CDStart CData CDEnd
6708 *
6709 * [19] CDStart ::= '<![CDATA['
6710 *
6711 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6712 *
6713 * [21] CDEnd ::= ']]>'
6714 */
6715void
6716xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6717 xmlChar *buf = NULL;
6718 int len = 0;
6719 int size = XML_PARSER_BUFFER_SIZE;
6720 int r, rl;
6721 int s, sl;
6722 int cur, l;
6723 int count = 0;
6724
6725 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6726 (NXT(2) == '[') && (NXT(3) == 'C') &&
6727 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6728 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6729 (NXT(8) == '[')) {
6730 SKIP(9);
6731 } else
6732 return;
6733
6734 ctxt->instate = XML_PARSER_CDATA_SECTION;
6735 r = CUR_CHAR(rl);
6736 if (!IS_CHAR(r)) {
6737 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6739 ctxt->sax->error(ctxt->userData,
6740 "CData section not finished\n");
6741 ctxt->wellFormed = 0;
6742 ctxt->disableSAX = 1;
6743 ctxt->instate = XML_PARSER_CONTENT;
6744 return;
6745 }
6746 NEXTL(rl);
6747 s = CUR_CHAR(sl);
6748 if (!IS_CHAR(s)) {
6749 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6751 ctxt->sax->error(ctxt->userData,
6752 "CData section not finished\n");
6753 ctxt->wellFormed = 0;
6754 ctxt->disableSAX = 1;
6755 ctxt->instate = XML_PARSER_CONTENT;
6756 return;
6757 }
6758 NEXTL(sl);
6759 cur = CUR_CHAR(l);
6760 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6761 if (buf == NULL) {
6762 xmlGenericError(xmlGenericErrorContext,
6763 "malloc of %d byte failed\n", size);
6764 return;
6765 }
6766 while (IS_CHAR(cur) &&
6767 ((r != ']') || (s != ']') || (cur != '>'))) {
6768 if (len + 5 >= size) {
6769 size *= 2;
6770 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6771 if (buf == NULL) {
6772 xmlGenericError(xmlGenericErrorContext,
6773 "realloc of %d byte failed\n", size);
6774 return;
6775 }
6776 }
6777 COPY_BUF(rl,buf,len,r);
6778 r = s;
6779 rl = sl;
6780 s = cur;
6781 sl = l;
6782 count++;
6783 if (count > 50) {
6784 GROW;
6785 count = 0;
6786 }
6787 NEXTL(l);
6788 cur = CUR_CHAR(l);
6789 }
6790 buf[len] = 0;
6791 ctxt->instate = XML_PARSER_CONTENT;
6792 if (cur != '>') {
6793 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6795 ctxt->sax->error(ctxt->userData,
6796 "CData section not finished\n%.50s\n", buf);
6797 ctxt->wellFormed = 0;
6798 ctxt->disableSAX = 1;
6799 xmlFree(buf);
6800 return;
6801 }
6802 NEXTL(l);
6803
6804 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006805 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006806 */
6807 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6808 if (ctxt->sax->cdataBlock != NULL)
6809 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006810 else if (ctxt->sax->characters != NULL)
6811 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006812 }
6813 xmlFree(buf);
6814}
6815
6816/**
6817 * xmlParseContent:
6818 * @ctxt: an XML parser context
6819 *
6820 * Parse a content:
6821 *
6822 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6823 */
6824
6825void
6826xmlParseContent(xmlParserCtxtPtr ctxt) {
6827 GROW;
Daniel Veillard561b7f82002-03-20 21:55:57 +00006828 while (((RAW != 0) || (ctxt->token != 0)) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006829 ((RAW != '<') || (NXT(1) != '/'))) {
6830 const xmlChar *test = CUR_PTR;
6831 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006832 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006833 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006834
6835 /*
6836 * Handle possible processed charrefs.
6837 */
6838 if (ctxt->token != 0) {
6839 xmlParseCharData(ctxt, 0);
6840 }
6841 /*
6842 * First case : a Processing Instruction.
6843 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006844 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006845 xmlParsePI(ctxt);
6846 }
6847
6848 /*
6849 * Second case : a CDSection
6850 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006851 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006852 (NXT(2) == '[') && (NXT(3) == 'C') &&
6853 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6854 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6855 (NXT(8) == '[')) {
6856 xmlParseCDSect(ctxt);
6857 }
6858
6859 /*
6860 * Third case : a comment
6861 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006862 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006863 (NXT(2) == '-') && (NXT(3) == '-')) {
6864 xmlParseComment(ctxt);
6865 ctxt->instate = XML_PARSER_CONTENT;
6866 }
6867
6868 /*
6869 * Fourth case : a sub-element.
6870 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006871 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006872 xmlParseElement(ctxt);
6873 }
6874
6875 /*
6876 * Fifth case : a reference. If if has not been resolved,
6877 * parsing returns it's Name, create the node
6878 */
6879
Daniel Veillard21a0f912001-02-25 19:54:14 +00006880 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006881 xmlParseReference(ctxt);
6882 }
6883
6884 /*
6885 * Last case, text. Note that References are handled directly.
6886 */
6887 else {
6888 xmlParseCharData(ctxt, 0);
6889 }
6890
6891 GROW;
6892 /*
6893 * Pop-up of finished entities.
6894 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00006895 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00006896 xmlPopInput(ctxt);
6897 SHRINK;
6898
6899 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6900 (tok == ctxt->token)) {
6901 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6903 ctxt->sax->error(ctxt->userData,
6904 "detected an error in element content\n");
6905 ctxt->wellFormed = 0;
6906 ctxt->disableSAX = 1;
6907 ctxt->instate = XML_PARSER_EOF;
6908 break;
6909 }
6910 }
6911}
6912
6913/**
6914 * xmlParseElement:
6915 * @ctxt: an XML parser context
6916 *
6917 * parse an XML element, this is highly recursive
6918 *
6919 * [39] element ::= EmptyElemTag | STag content ETag
6920 *
6921 * [ WFC: Element Type Match ]
6922 * The Name in an element's end-tag must match the element type in the
6923 * start-tag.
6924 *
6925 * [ VC: Element Valid ]
6926 * An element is valid if there is a declaration matching elementdecl
6927 * where the Name matches the element type and one of the following holds:
6928 * - The declaration matches EMPTY and the element has no content.
6929 * - The declaration matches children and the sequence of child elements
6930 * belongs to the language generated by the regular expression in the
6931 * content model, with optional white space (characters matching the
6932 * nonterminal S) between each pair of child elements.
6933 * - The declaration matches Mixed and the content consists of character
6934 * data and child elements whose types match names in the content model.
6935 * - The declaration matches ANY, and the types of any child elements have
6936 * been declared.
6937 */
6938
6939void
6940xmlParseElement(xmlParserCtxtPtr ctxt) {
6941 const xmlChar *openTag = CUR_PTR;
6942 xmlChar *name;
6943 xmlChar *oldname;
6944 xmlParserNodeInfo node_info;
6945 xmlNodePtr ret;
6946
6947 /* Capture start position */
6948 if (ctxt->record_info) {
6949 node_info.begin_pos = ctxt->input->consumed +
6950 (CUR_PTR - ctxt->input->base);
6951 node_info.begin_line = ctxt->input->line;
6952 }
6953
6954 if (ctxt->spaceNr == 0)
6955 spacePush(ctxt, -1);
6956 else
6957 spacePush(ctxt, *ctxt->space);
6958
6959 name = xmlParseStartTag(ctxt);
6960 if (name == NULL) {
6961 spacePop(ctxt);
6962 return;
6963 }
6964 namePush(ctxt, name);
6965 ret = ctxt->node;
6966
6967 /*
6968 * [ VC: Root Element Type ]
6969 * The Name in the document type declaration must match the element
6970 * type of the root element.
6971 */
6972 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6973 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6974 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6975
6976 /*
6977 * Check for an Empty Element.
6978 */
6979 if ((RAW == '/') && (NXT(1) == '>')) {
6980 SKIP(2);
6981 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6982 (!ctxt->disableSAX))
6983 ctxt->sax->endElement(ctxt->userData, name);
6984 oldname = namePop(ctxt);
6985 spacePop(ctxt);
6986 if (oldname != NULL) {
6987#ifdef DEBUG_STACK
6988 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6989#endif
6990 xmlFree(oldname);
6991 }
6992 if ( ret != NULL && ctxt->record_info ) {
6993 node_info.end_pos = ctxt->input->consumed +
6994 (CUR_PTR - ctxt->input->base);
6995 node_info.end_line = ctxt->input->line;
6996 node_info.node = ret;
6997 xmlParserAddNodeInfo(ctxt, &node_info);
6998 }
6999 return;
7000 }
7001 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007002 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007003 } else {
7004 ctxt->errNo = XML_ERR_GT_REQUIRED;
7005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7006 ctxt->sax->error(ctxt->userData,
7007 "Couldn't find end of Start Tag\n%.30s\n",
7008 openTag);
7009 ctxt->wellFormed = 0;
7010 ctxt->disableSAX = 1;
7011
7012 /*
7013 * end of parsing of this node.
7014 */
7015 nodePop(ctxt);
7016 oldname = namePop(ctxt);
7017 spacePop(ctxt);
7018 if (oldname != NULL) {
7019#ifdef DEBUG_STACK
7020 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7021#endif
7022 xmlFree(oldname);
7023 }
7024
7025 /*
7026 * Capture end position and add node
7027 */
7028 if ( ret != NULL && ctxt->record_info ) {
7029 node_info.end_pos = ctxt->input->consumed +
7030 (CUR_PTR - ctxt->input->base);
7031 node_info.end_line = ctxt->input->line;
7032 node_info.node = ret;
7033 xmlParserAddNodeInfo(ctxt, &node_info);
7034 }
7035 return;
7036 }
7037
7038 /*
7039 * Parse the content of the element:
7040 */
7041 xmlParseContent(ctxt);
7042 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007043 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7045 ctxt->sax->error(ctxt->userData,
7046 "Premature end of data in tag %.30s\n", openTag);
7047 ctxt->wellFormed = 0;
7048 ctxt->disableSAX = 1;
7049
7050 /*
7051 * end of parsing of this node.
7052 */
7053 nodePop(ctxt);
7054 oldname = namePop(ctxt);
7055 spacePop(ctxt);
7056 if (oldname != NULL) {
7057#ifdef DEBUG_STACK
7058 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7059#endif
7060 xmlFree(oldname);
7061 }
7062 return;
7063 }
7064
7065 /*
7066 * parse the end of tag: '</' should be here.
7067 */
7068 xmlParseEndTag(ctxt);
7069
7070 /*
7071 * Capture end position and add node
7072 */
7073 if ( ret != NULL && ctxt->record_info ) {
7074 node_info.end_pos = ctxt->input->consumed +
7075 (CUR_PTR - ctxt->input->base);
7076 node_info.end_line = ctxt->input->line;
7077 node_info.node = ret;
7078 xmlParserAddNodeInfo(ctxt, &node_info);
7079 }
7080}
7081
7082/**
7083 * xmlParseVersionNum:
7084 * @ctxt: an XML parser context
7085 *
7086 * parse the XML version value.
7087 *
7088 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7089 *
7090 * Returns the string giving the XML version number, or NULL
7091 */
7092xmlChar *
7093xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7094 xmlChar *buf = NULL;
7095 int len = 0;
7096 int size = 10;
7097 xmlChar cur;
7098
7099 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7100 if (buf == NULL) {
7101 xmlGenericError(xmlGenericErrorContext,
7102 "malloc of %d byte failed\n", size);
7103 return(NULL);
7104 }
7105 cur = CUR;
7106 while (((cur >= 'a') && (cur <= 'z')) ||
7107 ((cur >= 'A') && (cur <= 'Z')) ||
7108 ((cur >= '0') && (cur <= '9')) ||
7109 (cur == '_') || (cur == '.') ||
7110 (cur == ':') || (cur == '-')) {
7111 if (len + 1 >= size) {
7112 size *= 2;
7113 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7114 if (buf == NULL) {
7115 xmlGenericError(xmlGenericErrorContext,
7116 "realloc of %d byte failed\n", size);
7117 return(NULL);
7118 }
7119 }
7120 buf[len++] = cur;
7121 NEXT;
7122 cur=CUR;
7123 }
7124 buf[len] = 0;
7125 return(buf);
7126}
7127
7128/**
7129 * xmlParseVersionInfo:
7130 * @ctxt: an XML parser context
7131 *
7132 * parse the XML version.
7133 *
7134 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7135 *
7136 * [25] Eq ::= S? '=' S?
7137 *
7138 * Returns the version string, e.g. "1.0"
7139 */
7140
7141xmlChar *
7142xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7143 xmlChar *version = NULL;
7144 const xmlChar *q;
7145
7146 if ((RAW == 'v') && (NXT(1) == 'e') &&
7147 (NXT(2) == 'r') && (NXT(3) == 's') &&
7148 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7149 (NXT(6) == 'n')) {
7150 SKIP(7);
7151 SKIP_BLANKS;
7152 if (RAW != '=') {
7153 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7154 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7155 ctxt->sax->error(ctxt->userData,
7156 "xmlParseVersionInfo : expected '='\n");
7157 ctxt->wellFormed = 0;
7158 ctxt->disableSAX = 1;
7159 return(NULL);
7160 }
7161 NEXT;
7162 SKIP_BLANKS;
7163 if (RAW == '"') {
7164 NEXT;
7165 q = CUR_PTR;
7166 version = xmlParseVersionNum(ctxt);
7167 if (RAW != '"') {
7168 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7169 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7170 ctxt->sax->error(ctxt->userData,
7171 "String not closed\n%.50s\n", q);
7172 ctxt->wellFormed = 0;
7173 ctxt->disableSAX = 1;
7174 } else
7175 NEXT;
7176 } else if (RAW == '\''){
7177 NEXT;
7178 q = CUR_PTR;
7179 version = xmlParseVersionNum(ctxt);
7180 if (RAW != '\'') {
7181 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7183 ctxt->sax->error(ctxt->userData,
7184 "String not closed\n%.50s\n", q);
7185 ctxt->wellFormed = 0;
7186 ctxt->disableSAX = 1;
7187 } else
7188 NEXT;
7189 } else {
7190 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7192 ctxt->sax->error(ctxt->userData,
7193 "xmlParseVersionInfo : expected ' or \"\n");
7194 ctxt->wellFormed = 0;
7195 ctxt->disableSAX = 1;
7196 }
7197 }
7198 return(version);
7199}
7200
7201/**
7202 * xmlParseEncName:
7203 * @ctxt: an XML parser context
7204 *
7205 * parse the XML encoding name
7206 *
7207 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7208 *
7209 * Returns the encoding name value or NULL
7210 */
7211xmlChar *
7212xmlParseEncName(xmlParserCtxtPtr ctxt) {
7213 xmlChar *buf = NULL;
7214 int len = 0;
7215 int size = 10;
7216 xmlChar cur;
7217
7218 cur = CUR;
7219 if (((cur >= 'a') && (cur <= 'z')) ||
7220 ((cur >= 'A') && (cur <= 'Z'))) {
7221 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7222 if (buf == NULL) {
7223 xmlGenericError(xmlGenericErrorContext,
7224 "malloc of %d byte failed\n", size);
7225 return(NULL);
7226 }
7227
7228 buf[len++] = cur;
7229 NEXT;
7230 cur = CUR;
7231 while (((cur >= 'a') && (cur <= 'z')) ||
7232 ((cur >= 'A') && (cur <= 'Z')) ||
7233 ((cur >= '0') && (cur <= '9')) ||
7234 (cur == '.') || (cur == '_') ||
7235 (cur == '-')) {
7236 if (len + 1 >= size) {
7237 size *= 2;
7238 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7239 if (buf == NULL) {
7240 xmlGenericError(xmlGenericErrorContext,
7241 "realloc of %d byte failed\n", size);
7242 return(NULL);
7243 }
7244 }
7245 buf[len++] = cur;
7246 NEXT;
7247 cur = CUR;
7248 if (cur == 0) {
7249 SHRINK;
7250 GROW;
7251 cur = CUR;
7252 }
7253 }
7254 buf[len] = 0;
7255 } else {
7256 ctxt->errNo = XML_ERR_ENCODING_NAME;
7257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7258 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7259 ctxt->wellFormed = 0;
7260 ctxt->disableSAX = 1;
7261 }
7262 return(buf);
7263}
7264
7265/**
7266 * xmlParseEncodingDecl:
7267 * @ctxt: an XML parser context
7268 *
7269 * parse the XML encoding declaration
7270 *
7271 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7272 *
7273 * this setups the conversion filters.
7274 *
7275 * Returns the encoding value or NULL
7276 */
7277
7278xmlChar *
7279xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7280 xmlChar *encoding = NULL;
7281 const xmlChar *q;
7282
7283 SKIP_BLANKS;
7284 if ((RAW == 'e') && (NXT(1) == 'n') &&
7285 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7286 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7287 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7288 SKIP(8);
7289 SKIP_BLANKS;
7290 if (RAW != '=') {
7291 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7292 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7293 ctxt->sax->error(ctxt->userData,
7294 "xmlParseEncodingDecl : expected '='\n");
7295 ctxt->wellFormed = 0;
7296 ctxt->disableSAX = 1;
7297 return(NULL);
7298 }
7299 NEXT;
7300 SKIP_BLANKS;
7301 if (RAW == '"') {
7302 NEXT;
7303 q = CUR_PTR;
7304 encoding = xmlParseEncName(ctxt);
7305 if (RAW != '"') {
7306 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7307 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7308 ctxt->sax->error(ctxt->userData,
7309 "String not closed\n%.50s\n", q);
7310 ctxt->wellFormed = 0;
7311 ctxt->disableSAX = 1;
7312 } else
7313 NEXT;
7314 } else if (RAW == '\''){
7315 NEXT;
7316 q = CUR_PTR;
7317 encoding = xmlParseEncName(ctxt);
7318 if (RAW != '\'') {
7319 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7320 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7321 ctxt->sax->error(ctxt->userData,
7322 "String not closed\n%.50s\n", q);
7323 ctxt->wellFormed = 0;
7324 ctxt->disableSAX = 1;
7325 } else
7326 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007327 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007328 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7330 ctxt->sax->error(ctxt->userData,
7331 "xmlParseEncodingDecl : expected ' or \"\n");
7332 ctxt->wellFormed = 0;
7333 ctxt->disableSAX = 1;
7334 }
7335 if (encoding != NULL) {
7336 xmlCharEncoding enc;
7337 xmlCharEncodingHandlerPtr handler;
7338
7339 if (ctxt->input->encoding != NULL)
7340 xmlFree((xmlChar *) ctxt->input->encoding);
7341 ctxt->input->encoding = encoding;
7342
7343 enc = xmlParseCharEncoding((const char *) encoding);
7344 /*
7345 * registered set of known encodings
7346 */
7347 if (enc != XML_CHAR_ENCODING_ERROR) {
7348 xmlSwitchEncoding(ctxt, enc);
7349 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007350 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007351 xmlFree(encoding);
7352 return(NULL);
7353 }
7354 } else {
7355 /*
7356 * fallback for unknown encodings
7357 */
7358 handler = xmlFindCharEncodingHandler((const char *) encoding);
7359 if (handler != NULL) {
7360 xmlSwitchToEncoding(ctxt, handler);
7361 } else {
7362 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7364 ctxt->sax->error(ctxt->userData,
7365 "Unsupported encoding %s\n", encoding);
7366 return(NULL);
7367 }
7368 }
7369 }
7370 }
7371 return(encoding);
7372}
7373
7374/**
7375 * xmlParseSDDecl:
7376 * @ctxt: an XML parser context
7377 *
7378 * parse the XML standalone declaration
7379 *
7380 * [32] SDDecl ::= S 'standalone' Eq
7381 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7382 *
7383 * [ VC: Standalone Document Declaration ]
7384 * TODO The standalone document declaration must have the value "no"
7385 * if any external markup declarations contain declarations of:
7386 * - attributes with default values, if elements to which these
7387 * attributes apply appear in the document without specifications
7388 * of values for these attributes, or
7389 * - entities (other than amp, lt, gt, apos, quot), if references
7390 * to those entities appear in the document, or
7391 * - attributes with values subject to normalization, where the
7392 * attribute appears in the document with a value which will change
7393 * as a result of normalization, or
7394 * - element types with element content, if white space occurs directly
7395 * within any instance of those types.
7396 *
7397 * Returns 1 if standalone, 0 otherwise
7398 */
7399
7400int
7401xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7402 int standalone = -1;
7403
7404 SKIP_BLANKS;
7405 if ((RAW == 's') && (NXT(1) == 't') &&
7406 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7407 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7408 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7409 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7410 SKIP(10);
7411 SKIP_BLANKS;
7412 if (RAW != '=') {
7413 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7415 ctxt->sax->error(ctxt->userData,
7416 "XML standalone declaration : expected '='\n");
7417 ctxt->wellFormed = 0;
7418 ctxt->disableSAX = 1;
7419 return(standalone);
7420 }
7421 NEXT;
7422 SKIP_BLANKS;
7423 if (RAW == '\''){
7424 NEXT;
7425 if ((RAW == 'n') && (NXT(1) == 'o')) {
7426 standalone = 0;
7427 SKIP(2);
7428 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7429 (NXT(2) == 's')) {
7430 standalone = 1;
7431 SKIP(3);
7432 } else {
7433 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7435 ctxt->sax->error(ctxt->userData,
7436 "standalone accepts only 'yes' or 'no'\n");
7437 ctxt->wellFormed = 0;
7438 ctxt->disableSAX = 1;
7439 }
7440 if (RAW != '\'') {
7441 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7443 ctxt->sax->error(ctxt->userData, "String not closed\n");
7444 ctxt->wellFormed = 0;
7445 ctxt->disableSAX = 1;
7446 } else
7447 NEXT;
7448 } else if (RAW == '"'){
7449 NEXT;
7450 if ((RAW == 'n') && (NXT(1) == 'o')) {
7451 standalone = 0;
7452 SKIP(2);
7453 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7454 (NXT(2) == 's')) {
7455 standalone = 1;
7456 SKIP(3);
7457 } else {
7458 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7460 ctxt->sax->error(ctxt->userData,
7461 "standalone accepts only 'yes' or 'no'\n");
7462 ctxt->wellFormed = 0;
7463 ctxt->disableSAX = 1;
7464 }
7465 if (RAW != '"') {
7466 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7468 ctxt->sax->error(ctxt->userData, "String not closed\n");
7469 ctxt->wellFormed = 0;
7470 ctxt->disableSAX = 1;
7471 } else
7472 NEXT;
7473 } else {
7474 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7476 ctxt->sax->error(ctxt->userData,
7477 "Standalone value not found\n");
7478 ctxt->wellFormed = 0;
7479 ctxt->disableSAX = 1;
7480 }
7481 }
7482 return(standalone);
7483}
7484
7485/**
7486 * xmlParseXMLDecl:
7487 * @ctxt: an XML parser context
7488 *
7489 * parse an XML declaration header
7490 *
7491 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7492 */
7493
7494void
7495xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7496 xmlChar *version;
7497
7498 /*
7499 * We know that '<?xml' is here.
7500 */
7501 SKIP(5);
7502
7503 if (!IS_BLANK(RAW)) {
7504 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7506 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7507 ctxt->wellFormed = 0;
7508 ctxt->disableSAX = 1;
7509 }
7510 SKIP_BLANKS;
7511
7512 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007513 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007514 */
7515 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007516 if (version == NULL) {
7517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7518 ctxt->sax->error(ctxt->userData,
7519 "Malformed declaration expecting version\n");
7520 ctxt->wellFormed = 0;
7521 ctxt->disableSAX = 1;
7522 } else {
7523 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7524 /*
7525 * TODO: Blueberry should be detected here
7526 */
7527 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7528 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7529 version);
7530 }
7531 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007532 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007533 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007534 }
Owen Taylor3473f882001-02-23 17:55:21 +00007535
7536 /*
7537 * We may have the encoding declaration
7538 */
7539 if (!IS_BLANK(RAW)) {
7540 if ((RAW == '?') && (NXT(1) == '>')) {
7541 SKIP(2);
7542 return;
7543 }
7544 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7546 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7547 ctxt->wellFormed = 0;
7548 ctxt->disableSAX = 1;
7549 }
7550 xmlParseEncodingDecl(ctxt);
7551 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7552 /*
7553 * The XML REC instructs us to stop parsing right here
7554 */
7555 return;
7556 }
7557
7558 /*
7559 * We may have the standalone status.
7560 */
7561 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7562 if ((RAW == '?') && (NXT(1) == '>')) {
7563 SKIP(2);
7564 return;
7565 }
7566 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7568 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7569 ctxt->wellFormed = 0;
7570 ctxt->disableSAX = 1;
7571 }
7572 SKIP_BLANKS;
7573 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7574
7575 SKIP_BLANKS;
7576 if ((RAW == '?') && (NXT(1) == '>')) {
7577 SKIP(2);
7578 } else if (RAW == '>') {
7579 /* Deprecated old WD ... */
7580 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7582 ctxt->sax->error(ctxt->userData,
7583 "XML declaration must end-up with '?>'\n");
7584 ctxt->wellFormed = 0;
7585 ctxt->disableSAX = 1;
7586 NEXT;
7587 } else {
7588 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7590 ctxt->sax->error(ctxt->userData,
7591 "parsing XML declaration: '?>' expected\n");
7592 ctxt->wellFormed = 0;
7593 ctxt->disableSAX = 1;
7594 MOVETO_ENDTAG(CUR_PTR);
7595 NEXT;
7596 }
7597}
7598
7599/**
7600 * xmlParseMisc:
7601 * @ctxt: an XML parser context
7602 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007603 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007604 *
7605 * [27] Misc ::= Comment | PI | S
7606 */
7607
7608void
7609xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007610 while (((RAW == '<') && (NXT(1) == '?')) ||
7611 ((RAW == '<') && (NXT(1) == '!') &&
7612 (NXT(2) == '-') && (NXT(3) == '-')) ||
7613 IS_BLANK(CUR)) {
7614 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007615 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007616 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007617 NEXT;
7618 } else
7619 xmlParseComment(ctxt);
7620 }
7621}
7622
7623/**
7624 * xmlParseDocument:
7625 * @ctxt: an XML parser context
7626 *
7627 * parse an XML document (and build a tree if using the standard SAX
7628 * interface).
7629 *
7630 * [1] document ::= prolog element Misc*
7631 *
7632 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7633 *
7634 * Returns 0, -1 in case of error. the parser context is augmented
7635 * as a result of the parsing.
7636 */
7637
7638int
7639xmlParseDocument(xmlParserCtxtPtr ctxt) {
7640 xmlChar start[4];
7641 xmlCharEncoding enc;
7642
7643 xmlInitParser();
7644
7645 GROW;
7646
7647 /*
7648 * SAX: beginning of the document processing.
7649 */
7650 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7651 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7652
Daniel Veillard50f34372001-08-03 12:06:36 +00007653 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007654 /*
7655 * Get the 4 first bytes and decode the charset
7656 * if enc != XML_CHAR_ENCODING_NONE
7657 * plug some encoding conversion routines.
7658 */
7659 start[0] = RAW;
7660 start[1] = NXT(1);
7661 start[2] = NXT(2);
7662 start[3] = NXT(3);
7663 enc = xmlDetectCharEncoding(start, 4);
7664 if (enc != XML_CHAR_ENCODING_NONE) {
7665 xmlSwitchEncoding(ctxt, enc);
7666 }
Owen Taylor3473f882001-02-23 17:55:21 +00007667 }
7668
7669
7670 if (CUR == 0) {
7671 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7673 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7674 ctxt->wellFormed = 0;
7675 ctxt->disableSAX = 1;
7676 }
7677
7678 /*
7679 * Check for the XMLDecl in the Prolog.
7680 */
7681 GROW;
7682 if ((RAW == '<') && (NXT(1) == '?') &&
7683 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7684 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7685
7686 /*
7687 * Note that we will switch encoding on the fly.
7688 */
7689 xmlParseXMLDecl(ctxt);
7690 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7691 /*
7692 * The XML REC instructs us to stop parsing right here
7693 */
7694 return(-1);
7695 }
7696 ctxt->standalone = ctxt->input->standalone;
7697 SKIP_BLANKS;
7698 } else {
7699 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7700 }
7701 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7702 ctxt->sax->startDocument(ctxt->userData);
7703
7704 /*
7705 * The Misc part of the Prolog
7706 */
7707 GROW;
7708 xmlParseMisc(ctxt);
7709
7710 /*
7711 * Then possibly doc type declaration(s) and more Misc
7712 * (doctypedecl Misc*)?
7713 */
7714 GROW;
7715 if ((RAW == '<') && (NXT(1) == '!') &&
7716 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7717 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7718 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7719 (NXT(8) == 'E')) {
7720
7721 ctxt->inSubset = 1;
7722 xmlParseDocTypeDecl(ctxt);
7723 if (RAW == '[') {
7724 ctxt->instate = XML_PARSER_DTD;
7725 xmlParseInternalSubset(ctxt);
7726 }
7727
7728 /*
7729 * Create and update the external subset.
7730 */
7731 ctxt->inSubset = 2;
7732 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7733 (!ctxt->disableSAX))
7734 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7735 ctxt->extSubSystem, ctxt->extSubURI);
7736 ctxt->inSubset = 0;
7737
7738
7739 ctxt->instate = XML_PARSER_PROLOG;
7740 xmlParseMisc(ctxt);
7741 }
7742
7743 /*
7744 * Time to start parsing the tree itself
7745 */
7746 GROW;
7747 if (RAW != '<') {
7748 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7750 ctxt->sax->error(ctxt->userData,
7751 "Start tag expected, '<' not found\n");
7752 ctxt->wellFormed = 0;
7753 ctxt->disableSAX = 1;
7754 ctxt->instate = XML_PARSER_EOF;
7755 } else {
7756 ctxt->instate = XML_PARSER_CONTENT;
7757 xmlParseElement(ctxt);
7758 ctxt->instate = XML_PARSER_EPILOG;
7759
7760
7761 /*
7762 * The Misc part at the end
7763 */
7764 xmlParseMisc(ctxt);
7765
Daniel Veillard561b7f82002-03-20 21:55:57 +00007766 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007767 ctxt->errNo = XML_ERR_DOCUMENT_END;
7768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7769 ctxt->sax->error(ctxt->userData,
7770 "Extra content at the end of the document\n");
7771 ctxt->wellFormed = 0;
7772 ctxt->disableSAX = 1;
7773 }
7774 ctxt->instate = XML_PARSER_EOF;
7775 }
7776
7777 /*
7778 * SAX: end of the document processing.
7779 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007780 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007781 ctxt->sax->endDocument(ctxt->userData);
7782
Daniel Veillard5997aca2002-03-18 18:36:20 +00007783 /*
7784 * Remove locally kept entity definitions if the tree was not built
7785 */
7786 if ((ctxt->myDoc != NULL) &&
7787 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7788 xmlFreeDoc(ctxt->myDoc);
7789 ctxt->myDoc = NULL;
7790 }
7791
Daniel Veillardc7612992002-02-17 22:47:37 +00007792 if (! ctxt->wellFormed) {
7793 ctxt->valid = 0;
7794 return(-1);
7795 }
Owen Taylor3473f882001-02-23 17:55:21 +00007796 return(0);
7797}
7798
7799/**
7800 * xmlParseExtParsedEnt:
7801 * @ctxt: an XML parser context
7802 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007803 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007804 * An external general parsed entity is well-formed if it matches the
7805 * production labeled extParsedEnt.
7806 *
7807 * [78] extParsedEnt ::= TextDecl? content
7808 *
7809 * Returns 0, -1 in case of error. the parser context is augmented
7810 * as a result of the parsing.
7811 */
7812
7813int
7814xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7815 xmlChar start[4];
7816 xmlCharEncoding enc;
7817
7818 xmlDefaultSAXHandlerInit();
7819
7820 GROW;
7821
7822 /*
7823 * SAX: beginning of the document processing.
7824 */
7825 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7826 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7827
7828 /*
7829 * Get the 4 first bytes and decode the charset
7830 * if enc != XML_CHAR_ENCODING_NONE
7831 * plug some encoding conversion routines.
7832 */
7833 start[0] = RAW;
7834 start[1] = NXT(1);
7835 start[2] = NXT(2);
7836 start[3] = NXT(3);
7837 enc = xmlDetectCharEncoding(start, 4);
7838 if (enc != XML_CHAR_ENCODING_NONE) {
7839 xmlSwitchEncoding(ctxt, enc);
7840 }
7841
7842
7843 if (CUR == 0) {
7844 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7846 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7847 ctxt->wellFormed = 0;
7848 ctxt->disableSAX = 1;
7849 }
7850
7851 /*
7852 * Check for the XMLDecl in the Prolog.
7853 */
7854 GROW;
7855 if ((RAW == '<') && (NXT(1) == '?') &&
7856 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7857 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7858
7859 /*
7860 * Note that we will switch encoding on the fly.
7861 */
7862 xmlParseXMLDecl(ctxt);
7863 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7864 /*
7865 * The XML REC instructs us to stop parsing right here
7866 */
7867 return(-1);
7868 }
7869 SKIP_BLANKS;
7870 } else {
7871 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7872 }
7873 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7874 ctxt->sax->startDocument(ctxt->userData);
7875
7876 /*
7877 * Doing validity checking on chunk doesn't make sense
7878 */
7879 ctxt->instate = XML_PARSER_CONTENT;
7880 ctxt->validate = 0;
7881 ctxt->loadsubset = 0;
7882 ctxt->depth = 0;
7883
7884 xmlParseContent(ctxt);
7885
7886 if ((RAW == '<') && (NXT(1) == '/')) {
7887 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7888 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7889 ctxt->sax->error(ctxt->userData,
7890 "chunk is not well balanced\n");
7891 ctxt->wellFormed = 0;
7892 ctxt->disableSAX = 1;
7893 } else if (RAW != 0) {
7894 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7896 ctxt->sax->error(ctxt->userData,
7897 "extra content at the end of well balanced chunk\n");
7898 ctxt->wellFormed = 0;
7899 ctxt->disableSAX = 1;
7900 }
7901
7902 /*
7903 * SAX: end of the document processing.
7904 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007905 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007906 ctxt->sax->endDocument(ctxt->userData);
7907
7908 if (! ctxt->wellFormed) return(-1);
7909 return(0);
7910}
7911
7912/************************************************************************
7913 * *
7914 * Progressive parsing interfaces *
7915 * *
7916 ************************************************************************/
7917
7918/**
7919 * xmlParseLookupSequence:
7920 * @ctxt: an XML parser context
7921 * @first: the first char to lookup
7922 * @next: the next char to lookup or zero
7923 * @third: the next char to lookup or zero
7924 *
7925 * Try to find if a sequence (first, next, third) or just (first next) or
7926 * (first) is available in the input stream.
7927 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7928 * to avoid rescanning sequences of bytes, it DOES change the state of the
7929 * parser, do not use liberally.
7930 *
7931 * Returns the index to the current parsing point if the full sequence
7932 * is available, -1 otherwise.
7933 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007934static int
Owen Taylor3473f882001-02-23 17:55:21 +00007935xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7936 xmlChar next, xmlChar third) {
7937 int base, len;
7938 xmlParserInputPtr in;
7939 const xmlChar *buf;
7940
7941 in = ctxt->input;
7942 if (in == NULL) return(-1);
7943 base = in->cur - in->base;
7944 if (base < 0) return(-1);
7945 if (ctxt->checkIndex > base)
7946 base = ctxt->checkIndex;
7947 if (in->buf == NULL) {
7948 buf = in->base;
7949 len = in->length;
7950 } else {
7951 buf = in->buf->buffer->content;
7952 len = in->buf->buffer->use;
7953 }
7954 /* take into account the sequence length */
7955 if (third) len -= 2;
7956 else if (next) len --;
7957 for (;base < len;base++) {
7958 if (buf[base] == first) {
7959 if (third != 0) {
7960 if ((buf[base + 1] != next) ||
7961 (buf[base + 2] != third)) continue;
7962 } else if (next != 0) {
7963 if (buf[base + 1] != next) continue;
7964 }
7965 ctxt->checkIndex = 0;
7966#ifdef DEBUG_PUSH
7967 if (next == 0)
7968 xmlGenericError(xmlGenericErrorContext,
7969 "PP: lookup '%c' found at %d\n",
7970 first, base);
7971 else if (third == 0)
7972 xmlGenericError(xmlGenericErrorContext,
7973 "PP: lookup '%c%c' found at %d\n",
7974 first, next, base);
7975 else
7976 xmlGenericError(xmlGenericErrorContext,
7977 "PP: lookup '%c%c%c' found at %d\n",
7978 first, next, third, base);
7979#endif
7980 return(base - (in->cur - in->base));
7981 }
7982 }
7983 ctxt->checkIndex = base;
7984#ifdef DEBUG_PUSH
7985 if (next == 0)
7986 xmlGenericError(xmlGenericErrorContext,
7987 "PP: lookup '%c' failed\n", first);
7988 else if (third == 0)
7989 xmlGenericError(xmlGenericErrorContext,
7990 "PP: lookup '%c%c' failed\n", first, next);
7991 else
7992 xmlGenericError(xmlGenericErrorContext,
7993 "PP: lookup '%c%c%c' failed\n", first, next, third);
7994#endif
7995 return(-1);
7996}
7997
7998/**
7999 * xmlParseTryOrFinish:
8000 * @ctxt: an XML parser context
8001 * @terminate: last chunk indicator
8002 *
8003 * Try to progress on parsing
8004 *
8005 * Returns zero if no parsing was possible
8006 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008007static int
Owen Taylor3473f882001-02-23 17:55:21 +00008008xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8009 int ret = 0;
8010 int avail;
8011 xmlChar cur, next;
8012
8013#ifdef DEBUG_PUSH
8014 switch (ctxt->instate) {
8015 case XML_PARSER_EOF:
8016 xmlGenericError(xmlGenericErrorContext,
8017 "PP: try EOF\n"); break;
8018 case XML_PARSER_START:
8019 xmlGenericError(xmlGenericErrorContext,
8020 "PP: try START\n"); break;
8021 case XML_PARSER_MISC:
8022 xmlGenericError(xmlGenericErrorContext,
8023 "PP: try MISC\n");break;
8024 case XML_PARSER_COMMENT:
8025 xmlGenericError(xmlGenericErrorContext,
8026 "PP: try COMMENT\n");break;
8027 case XML_PARSER_PROLOG:
8028 xmlGenericError(xmlGenericErrorContext,
8029 "PP: try PROLOG\n");break;
8030 case XML_PARSER_START_TAG:
8031 xmlGenericError(xmlGenericErrorContext,
8032 "PP: try START_TAG\n");break;
8033 case XML_PARSER_CONTENT:
8034 xmlGenericError(xmlGenericErrorContext,
8035 "PP: try CONTENT\n");break;
8036 case XML_PARSER_CDATA_SECTION:
8037 xmlGenericError(xmlGenericErrorContext,
8038 "PP: try CDATA_SECTION\n");break;
8039 case XML_PARSER_END_TAG:
8040 xmlGenericError(xmlGenericErrorContext,
8041 "PP: try END_TAG\n");break;
8042 case XML_PARSER_ENTITY_DECL:
8043 xmlGenericError(xmlGenericErrorContext,
8044 "PP: try ENTITY_DECL\n");break;
8045 case XML_PARSER_ENTITY_VALUE:
8046 xmlGenericError(xmlGenericErrorContext,
8047 "PP: try ENTITY_VALUE\n");break;
8048 case XML_PARSER_ATTRIBUTE_VALUE:
8049 xmlGenericError(xmlGenericErrorContext,
8050 "PP: try ATTRIBUTE_VALUE\n");break;
8051 case XML_PARSER_DTD:
8052 xmlGenericError(xmlGenericErrorContext,
8053 "PP: try DTD\n");break;
8054 case XML_PARSER_EPILOG:
8055 xmlGenericError(xmlGenericErrorContext,
8056 "PP: try EPILOG\n");break;
8057 case XML_PARSER_PI:
8058 xmlGenericError(xmlGenericErrorContext,
8059 "PP: try PI\n");break;
8060 case XML_PARSER_IGNORE:
8061 xmlGenericError(xmlGenericErrorContext,
8062 "PP: try IGNORE\n");break;
8063 }
8064#endif
8065
8066 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008067 SHRINK;
8068
Owen Taylor3473f882001-02-23 17:55:21 +00008069 /*
8070 * Pop-up of finished entities.
8071 */
8072 while ((RAW == 0) && (ctxt->inputNr > 1))
8073 xmlPopInput(ctxt);
8074
8075 if (ctxt->input ==NULL) break;
8076 if (ctxt->input->buf == NULL)
8077 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008078 else {
8079 /*
8080 * If we are operating on converted input, try to flush
8081 * remainng chars to avoid them stalling in the non-converted
8082 * buffer.
8083 */
8084 if ((ctxt->input->buf->raw != NULL) &&
8085 (ctxt->input->buf->raw->use > 0)) {
8086 int base = ctxt->input->base -
8087 ctxt->input->buf->buffer->content;
8088 int current = ctxt->input->cur - ctxt->input->base;
8089
8090 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8091 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8092 ctxt->input->cur = ctxt->input->base + current;
8093 ctxt->input->end =
8094 &ctxt->input->buf->buffer->content[
8095 ctxt->input->buf->buffer->use];
8096 }
8097 avail = ctxt->input->buf->buffer->use -
8098 (ctxt->input->cur - ctxt->input->base);
8099 }
Owen Taylor3473f882001-02-23 17:55:21 +00008100 if (avail < 1)
8101 goto done;
8102 switch (ctxt->instate) {
8103 case XML_PARSER_EOF:
8104 /*
8105 * Document parsing is done !
8106 */
8107 goto done;
8108 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008109 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8110 xmlChar start[4];
8111 xmlCharEncoding enc;
8112
8113 /*
8114 * Very first chars read from the document flow.
8115 */
8116 if (avail < 4)
8117 goto done;
8118
8119 /*
8120 * Get the 4 first bytes and decode the charset
8121 * if enc != XML_CHAR_ENCODING_NONE
8122 * plug some encoding conversion routines.
8123 */
8124 start[0] = RAW;
8125 start[1] = NXT(1);
8126 start[2] = NXT(2);
8127 start[3] = NXT(3);
8128 enc = xmlDetectCharEncoding(start, 4);
8129 if (enc != XML_CHAR_ENCODING_NONE) {
8130 xmlSwitchEncoding(ctxt, enc);
8131 }
8132 break;
8133 }
Owen Taylor3473f882001-02-23 17:55:21 +00008134
8135 cur = ctxt->input->cur[0];
8136 next = ctxt->input->cur[1];
8137 if (cur == 0) {
8138 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8139 ctxt->sax->setDocumentLocator(ctxt->userData,
8140 &xmlDefaultSAXLocator);
8141 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8143 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8144 ctxt->wellFormed = 0;
8145 ctxt->disableSAX = 1;
8146 ctxt->instate = XML_PARSER_EOF;
8147#ifdef DEBUG_PUSH
8148 xmlGenericError(xmlGenericErrorContext,
8149 "PP: entering EOF\n");
8150#endif
8151 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8152 ctxt->sax->endDocument(ctxt->userData);
8153 goto done;
8154 }
8155 if ((cur == '<') && (next == '?')) {
8156 /* PI or XML decl */
8157 if (avail < 5) return(ret);
8158 if ((!terminate) &&
8159 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8160 return(ret);
8161 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8162 ctxt->sax->setDocumentLocator(ctxt->userData,
8163 &xmlDefaultSAXLocator);
8164 if ((ctxt->input->cur[2] == 'x') &&
8165 (ctxt->input->cur[3] == 'm') &&
8166 (ctxt->input->cur[4] == 'l') &&
8167 (IS_BLANK(ctxt->input->cur[5]))) {
8168 ret += 5;
8169#ifdef DEBUG_PUSH
8170 xmlGenericError(xmlGenericErrorContext,
8171 "PP: Parsing XML Decl\n");
8172#endif
8173 xmlParseXMLDecl(ctxt);
8174 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8175 /*
8176 * The XML REC instructs us to stop parsing right
8177 * here
8178 */
8179 ctxt->instate = XML_PARSER_EOF;
8180 return(0);
8181 }
8182 ctxt->standalone = ctxt->input->standalone;
8183 if ((ctxt->encoding == NULL) &&
8184 (ctxt->input->encoding != NULL))
8185 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8186 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8187 (!ctxt->disableSAX))
8188 ctxt->sax->startDocument(ctxt->userData);
8189 ctxt->instate = XML_PARSER_MISC;
8190#ifdef DEBUG_PUSH
8191 xmlGenericError(xmlGenericErrorContext,
8192 "PP: entering MISC\n");
8193#endif
8194 } else {
8195 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8196 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8197 (!ctxt->disableSAX))
8198 ctxt->sax->startDocument(ctxt->userData);
8199 ctxt->instate = XML_PARSER_MISC;
8200#ifdef DEBUG_PUSH
8201 xmlGenericError(xmlGenericErrorContext,
8202 "PP: entering MISC\n");
8203#endif
8204 }
8205 } else {
8206 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8207 ctxt->sax->setDocumentLocator(ctxt->userData,
8208 &xmlDefaultSAXLocator);
8209 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8210 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8211 (!ctxt->disableSAX))
8212 ctxt->sax->startDocument(ctxt->userData);
8213 ctxt->instate = XML_PARSER_MISC;
8214#ifdef DEBUG_PUSH
8215 xmlGenericError(xmlGenericErrorContext,
8216 "PP: entering MISC\n");
8217#endif
8218 }
8219 break;
8220 case XML_PARSER_MISC:
8221 SKIP_BLANKS;
8222 if (ctxt->input->buf == NULL)
8223 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8224 else
8225 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8226 if (avail < 2)
8227 goto done;
8228 cur = ctxt->input->cur[0];
8229 next = ctxt->input->cur[1];
8230 if ((cur == '<') && (next == '?')) {
8231 if ((!terminate) &&
8232 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8233 goto done;
8234#ifdef DEBUG_PUSH
8235 xmlGenericError(xmlGenericErrorContext,
8236 "PP: Parsing PI\n");
8237#endif
8238 xmlParsePI(ctxt);
8239 } else if ((cur == '<') && (next == '!') &&
8240 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8241 if ((!terminate) &&
8242 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8243 goto done;
8244#ifdef DEBUG_PUSH
8245 xmlGenericError(xmlGenericErrorContext,
8246 "PP: Parsing Comment\n");
8247#endif
8248 xmlParseComment(ctxt);
8249 ctxt->instate = XML_PARSER_MISC;
8250 } else if ((cur == '<') && (next == '!') &&
8251 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8252 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8253 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8254 (ctxt->input->cur[8] == 'E')) {
8255 if ((!terminate) &&
8256 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8257 goto done;
8258#ifdef DEBUG_PUSH
8259 xmlGenericError(xmlGenericErrorContext,
8260 "PP: Parsing internal subset\n");
8261#endif
8262 ctxt->inSubset = 1;
8263 xmlParseDocTypeDecl(ctxt);
8264 if (RAW == '[') {
8265 ctxt->instate = XML_PARSER_DTD;
8266#ifdef DEBUG_PUSH
8267 xmlGenericError(xmlGenericErrorContext,
8268 "PP: entering DTD\n");
8269#endif
8270 } else {
8271 /*
8272 * Create and update the external subset.
8273 */
8274 ctxt->inSubset = 2;
8275 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8276 (ctxt->sax->externalSubset != NULL))
8277 ctxt->sax->externalSubset(ctxt->userData,
8278 ctxt->intSubName, ctxt->extSubSystem,
8279 ctxt->extSubURI);
8280 ctxt->inSubset = 0;
8281 ctxt->instate = XML_PARSER_PROLOG;
8282#ifdef DEBUG_PUSH
8283 xmlGenericError(xmlGenericErrorContext,
8284 "PP: entering PROLOG\n");
8285#endif
8286 }
8287 } else if ((cur == '<') && (next == '!') &&
8288 (avail < 9)) {
8289 goto done;
8290 } else {
8291 ctxt->instate = XML_PARSER_START_TAG;
8292#ifdef DEBUG_PUSH
8293 xmlGenericError(xmlGenericErrorContext,
8294 "PP: entering START_TAG\n");
8295#endif
8296 }
8297 break;
8298 case XML_PARSER_IGNORE:
8299 xmlGenericError(xmlGenericErrorContext,
8300 "PP: internal error, state == IGNORE");
8301 ctxt->instate = XML_PARSER_DTD;
8302#ifdef DEBUG_PUSH
8303 xmlGenericError(xmlGenericErrorContext,
8304 "PP: entering DTD\n");
8305#endif
8306 break;
8307 case XML_PARSER_PROLOG:
8308 SKIP_BLANKS;
8309 if (ctxt->input->buf == NULL)
8310 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8311 else
8312 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8313 if (avail < 2)
8314 goto done;
8315 cur = ctxt->input->cur[0];
8316 next = ctxt->input->cur[1];
8317 if ((cur == '<') && (next == '?')) {
8318 if ((!terminate) &&
8319 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8320 goto done;
8321#ifdef DEBUG_PUSH
8322 xmlGenericError(xmlGenericErrorContext,
8323 "PP: Parsing PI\n");
8324#endif
8325 xmlParsePI(ctxt);
8326 } else if ((cur == '<') && (next == '!') &&
8327 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8328 if ((!terminate) &&
8329 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8330 goto done;
8331#ifdef DEBUG_PUSH
8332 xmlGenericError(xmlGenericErrorContext,
8333 "PP: Parsing Comment\n");
8334#endif
8335 xmlParseComment(ctxt);
8336 ctxt->instate = XML_PARSER_PROLOG;
8337 } else if ((cur == '<') && (next == '!') &&
8338 (avail < 4)) {
8339 goto done;
8340 } else {
8341 ctxt->instate = XML_PARSER_START_TAG;
8342#ifdef DEBUG_PUSH
8343 xmlGenericError(xmlGenericErrorContext,
8344 "PP: entering START_TAG\n");
8345#endif
8346 }
8347 break;
8348 case XML_PARSER_EPILOG:
8349 SKIP_BLANKS;
8350 if (ctxt->input->buf == NULL)
8351 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8352 else
8353 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8354 if (avail < 2)
8355 goto done;
8356 cur = ctxt->input->cur[0];
8357 next = ctxt->input->cur[1];
8358 if ((cur == '<') && (next == '?')) {
8359 if ((!terminate) &&
8360 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8361 goto done;
8362#ifdef DEBUG_PUSH
8363 xmlGenericError(xmlGenericErrorContext,
8364 "PP: Parsing PI\n");
8365#endif
8366 xmlParsePI(ctxt);
8367 ctxt->instate = XML_PARSER_EPILOG;
8368 } else if ((cur == '<') && (next == '!') &&
8369 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8370 if ((!terminate) &&
8371 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8372 goto done;
8373#ifdef DEBUG_PUSH
8374 xmlGenericError(xmlGenericErrorContext,
8375 "PP: Parsing Comment\n");
8376#endif
8377 xmlParseComment(ctxt);
8378 ctxt->instate = XML_PARSER_EPILOG;
8379 } else if ((cur == '<') && (next == '!') &&
8380 (avail < 4)) {
8381 goto done;
8382 } else {
8383 ctxt->errNo = XML_ERR_DOCUMENT_END;
8384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8385 ctxt->sax->error(ctxt->userData,
8386 "Extra content at the end of the document\n");
8387 ctxt->wellFormed = 0;
8388 ctxt->disableSAX = 1;
8389 ctxt->instate = XML_PARSER_EOF;
8390#ifdef DEBUG_PUSH
8391 xmlGenericError(xmlGenericErrorContext,
8392 "PP: entering EOF\n");
8393#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008394 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008395 ctxt->sax->endDocument(ctxt->userData);
8396 goto done;
8397 }
8398 break;
8399 case XML_PARSER_START_TAG: {
8400 xmlChar *name, *oldname;
8401
8402 if ((avail < 2) && (ctxt->inputNr == 1))
8403 goto done;
8404 cur = ctxt->input->cur[0];
8405 if (cur != '<') {
8406 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8408 ctxt->sax->error(ctxt->userData,
8409 "Start tag expect, '<' not found\n");
8410 ctxt->wellFormed = 0;
8411 ctxt->disableSAX = 1;
8412 ctxt->instate = XML_PARSER_EOF;
8413#ifdef DEBUG_PUSH
8414 xmlGenericError(xmlGenericErrorContext,
8415 "PP: entering EOF\n");
8416#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008417 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008418 ctxt->sax->endDocument(ctxt->userData);
8419 goto done;
8420 }
8421 if ((!terminate) &&
8422 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8423 goto done;
8424 if (ctxt->spaceNr == 0)
8425 spacePush(ctxt, -1);
8426 else
8427 spacePush(ctxt, *ctxt->space);
8428 name = xmlParseStartTag(ctxt);
8429 if (name == NULL) {
8430 spacePop(ctxt);
8431 ctxt->instate = XML_PARSER_EOF;
8432#ifdef DEBUG_PUSH
8433 xmlGenericError(xmlGenericErrorContext,
8434 "PP: entering EOF\n");
8435#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008436 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008437 ctxt->sax->endDocument(ctxt->userData);
8438 goto done;
8439 }
8440 namePush(ctxt, xmlStrdup(name));
8441
8442 /*
8443 * [ VC: Root Element Type ]
8444 * The Name in the document type declaration must match
8445 * the element type of the root element.
8446 */
8447 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8448 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8449 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8450
8451 /*
8452 * Check for an Empty Element.
8453 */
8454 if ((RAW == '/') && (NXT(1) == '>')) {
8455 SKIP(2);
8456 if ((ctxt->sax != NULL) &&
8457 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8458 ctxt->sax->endElement(ctxt->userData, name);
8459 xmlFree(name);
8460 oldname = namePop(ctxt);
8461 spacePop(ctxt);
8462 if (oldname != NULL) {
8463#ifdef DEBUG_STACK
8464 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8465#endif
8466 xmlFree(oldname);
8467 }
8468 if (ctxt->name == NULL) {
8469 ctxt->instate = XML_PARSER_EPILOG;
8470#ifdef DEBUG_PUSH
8471 xmlGenericError(xmlGenericErrorContext,
8472 "PP: entering EPILOG\n");
8473#endif
8474 } else {
8475 ctxt->instate = XML_PARSER_CONTENT;
8476#ifdef DEBUG_PUSH
8477 xmlGenericError(xmlGenericErrorContext,
8478 "PP: entering CONTENT\n");
8479#endif
8480 }
8481 break;
8482 }
8483 if (RAW == '>') {
8484 NEXT;
8485 } else {
8486 ctxt->errNo = XML_ERR_GT_REQUIRED;
8487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8488 ctxt->sax->error(ctxt->userData,
8489 "Couldn't find end of Start Tag %s\n",
8490 name);
8491 ctxt->wellFormed = 0;
8492 ctxt->disableSAX = 1;
8493
8494 /*
8495 * end of parsing of this node.
8496 */
8497 nodePop(ctxt);
8498 oldname = namePop(ctxt);
8499 spacePop(ctxt);
8500 if (oldname != NULL) {
8501#ifdef DEBUG_STACK
8502 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8503#endif
8504 xmlFree(oldname);
8505 }
8506 }
8507 xmlFree(name);
8508 ctxt->instate = XML_PARSER_CONTENT;
8509#ifdef DEBUG_PUSH
8510 xmlGenericError(xmlGenericErrorContext,
8511 "PP: entering CONTENT\n");
8512#endif
8513 break;
8514 }
8515 case XML_PARSER_CONTENT: {
8516 const xmlChar *test;
8517 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008518 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008519
8520 /*
8521 * Handle preparsed entities and charRef
8522 */
8523 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008524 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008525
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008526 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008527 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8528 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008529 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008530 ctxt->token = 0;
8531 }
8532 if ((avail < 2) && (ctxt->inputNr == 1))
8533 goto done;
8534 cur = ctxt->input->cur[0];
8535 next = ctxt->input->cur[1];
8536
8537 test = CUR_PTR;
8538 cons = ctxt->input->consumed;
8539 tok = ctxt->token;
8540 if ((cur == '<') && (next == '?')) {
8541 if ((!terminate) &&
8542 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8543 goto done;
8544#ifdef DEBUG_PUSH
8545 xmlGenericError(xmlGenericErrorContext,
8546 "PP: Parsing PI\n");
8547#endif
8548 xmlParsePI(ctxt);
8549 } else if ((cur == '<') && (next == '!') &&
8550 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8551 if ((!terminate) &&
8552 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8553 goto done;
8554#ifdef DEBUG_PUSH
8555 xmlGenericError(xmlGenericErrorContext,
8556 "PP: Parsing Comment\n");
8557#endif
8558 xmlParseComment(ctxt);
8559 ctxt->instate = XML_PARSER_CONTENT;
8560 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8561 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8562 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8563 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8564 (ctxt->input->cur[8] == '[')) {
8565 SKIP(9);
8566 ctxt->instate = XML_PARSER_CDATA_SECTION;
8567#ifdef DEBUG_PUSH
8568 xmlGenericError(xmlGenericErrorContext,
8569 "PP: entering CDATA_SECTION\n");
8570#endif
8571 break;
8572 } else if ((cur == '<') && (next == '!') &&
8573 (avail < 9)) {
8574 goto done;
8575 } else if ((cur == '<') && (next == '/')) {
8576 ctxt->instate = XML_PARSER_END_TAG;
8577#ifdef DEBUG_PUSH
8578 xmlGenericError(xmlGenericErrorContext,
8579 "PP: entering END_TAG\n");
8580#endif
8581 break;
8582 } else if (cur == '<') {
8583 ctxt->instate = XML_PARSER_START_TAG;
8584#ifdef DEBUG_PUSH
8585 xmlGenericError(xmlGenericErrorContext,
8586 "PP: entering START_TAG\n");
8587#endif
8588 break;
8589 } else if (cur == '&') {
8590 if ((!terminate) &&
8591 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8592 goto done;
8593#ifdef DEBUG_PUSH
8594 xmlGenericError(xmlGenericErrorContext,
8595 "PP: Parsing Reference\n");
8596#endif
8597 xmlParseReference(ctxt);
8598 } else {
8599 /* TODO Avoid the extra copy, handle directly !!! */
8600 /*
8601 * Goal of the following test is:
8602 * - minimize calls to the SAX 'character' callback
8603 * when they are mergeable
8604 * - handle an problem for isBlank when we only parse
8605 * a sequence of blank chars and the next one is
8606 * not available to check against '<' presence.
8607 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008608 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008609 * of the parser.
8610 */
8611 if ((ctxt->inputNr == 1) &&
8612 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8613 if ((!terminate) &&
8614 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8615 goto done;
8616 }
8617 ctxt->checkIndex = 0;
8618#ifdef DEBUG_PUSH
8619 xmlGenericError(xmlGenericErrorContext,
8620 "PP: Parsing char data\n");
8621#endif
8622 xmlParseCharData(ctxt, 0);
8623 }
8624 /*
8625 * Pop-up of finished entities.
8626 */
8627 while ((RAW == 0) && (ctxt->inputNr > 1))
8628 xmlPopInput(ctxt);
8629 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8630 (tok == ctxt->token)) {
8631 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8633 ctxt->sax->error(ctxt->userData,
8634 "detected an error in element content\n");
8635 ctxt->wellFormed = 0;
8636 ctxt->disableSAX = 1;
8637 ctxt->instate = XML_PARSER_EOF;
8638 break;
8639 }
8640 break;
8641 }
8642 case XML_PARSER_CDATA_SECTION: {
8643 /*
8644 * The Push mode need to have the SAX callback for
8645 * cdataBlock merge back contiguous callbacks.
8646 */
8647 int base;
8648
8649 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8650 if (base < 0) {
8651 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8652 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8653 if (ctxt->sax->cdataBlock != NULL)
8654 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8655 XML_PARSER_BIG_BUFFER_SIZE);
8656 }
8657 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8658 ctxt->checkIndex = 0;
8659 }
8660 goto done;
8661 } else {
8662 if ((ctxt->sax != NULL) && (base > 0) &&
8663 (!ctxt->disableSAX)) {
8664 if (ctxt->sax->cdataBlock != NULL)
8665 ctxt->sax->cdataBlock(ctxt->userData,
8666 ctxt->input->cur, base);
8667 }
8668 SKIP(base + 3);
8669 ctxt->checkIndex = 0;
8670 ctxt->instate = XML_PARSER_CONTENT;
8671#ifdef DEBUG_PUSH
8672 xmlGenericError(xmlGenericErrorContext,
8673 "PP: entering CONTENT\n");
8674#endif
8675 }
8676 break;
8677 }
8678 case XML_PARSER_END_TAG:
8679 if (avail < 2)
8680 goto done;
8681 if ((!terminate) &&
8682 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8683 goto done;
8684 xmlParseEndTag(ctxt);
8685 if (ctxt->name == NULL) {
8686 ctxt->instate = XML_PARSER_EPILOG;
8687#ifdef DEBUG_PUSH
8688 xmlGenericError(xmlGenericErrorContext,
8689 "PP: entering EPILOG\n");
8690#endif
8691 } else {
8692 ctxt->instate = XML_PARSER_CONTENT;
8693#ifdef DEBUG_PUSH
8694 xmlGenericError(xmlGenericErrorContext,
8695 "PP: entering CONTENT\n");
8696#endif
8697 }
8698 break;
8699 case XML_PARSER_DTD: {
8700 /*
8701 * Sorry but progressive parsing of the internal subset
8702 * is not expected to be supported. We first check that
8703 * the full content of the internal subset is available and
8704 * the parsing is launched only at that point.
8705 * Internal subset ends up with "']' S? '>'" in an unescaped
8706 * section and not in a ']]>' sequence which are conditional
8707 * sections (whoever argued to keep that crap in XML deserve
8708 * a place in hell !).
8709 */
8710 int base, i;
8711 xmlChar *buf;
8712 xmlChar quote = 0;
8713
8714 base = ctxt->input->cur - ctxt->input->base;
8715 if (base < 0) return(0);
8716 if (ctxt->checkIndex > base)
8717 base = ctxt->checkIndex;
8718 buf = ctxt->input->buf->buffer->content;
8719 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8720 base++) {
8721 if (quote != 0) {
8722 if (buf[base] == quote)
8723 quote = 0;
8724 continue;
8725 }
8726 if (buf[base] == '"') {
8727 quote = '"';
8728 continue;
8729 }
8730 if (buf[base] == '\'') {
8731 quote = '\'';
8732 continue;
8733 }
8734 if (buf[base] == ']') {
8735 if ((unsigned int) base +1 >=
8736 ctxt->input->buf->buffer->use)
8737 break;
8738 if (buf[base + 1] == ']') {
8739 /* conditional crap, skip both ']' ! */
8740 base++;
8741 continue;
8742 }
8743 for (i = 0;
8744 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8745 i++) {
8746 if (buf[base + i] == '>')
8747 goto found_end_int_subset;
8748 }
8749 break;
8750 }
8751 }
8752 /*
8753 * We didn't found the end of the Internal subset
8754 */
8755 if (quote == 0)
8756 ctxt->checkIndex = base;
8757#ifdef DEBUG_PUSH
8758 if (next == 0)
8759 xmlGenericError(xmlGenericErrorContext,
8760 "PP: lookup of int subset end filed\n");
8761#endif
8762 goto done;
8763
8764found_end_int_subset:
8765 xmlParseInternalSubset(ctxt);
8766 ctxt->inSubset = 2;
8767 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8768 (ctxt->sax->externalSubset != NULL))
8769 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8770 ctxt->extSubSystem, ctxt->extSubURI);
8771 ctxt->inSubset = 0;
8772 ctxt->instate = XML_PARSER_PROLOG;
8773 ctxt->checkIndex = 0;
8774#ifdef DEBUG_PUSH
8775 xmlGenericError(xmlGenericErrorContext,
8776 "PP: entering PROLOG\n");
8777#endif
8778 break;
8779 }
8780 case XML_PARSER_COMMENT:
8781 xmlGenericError(xmlGenericErrorContext,
8782 "PP: internal error, state == COMMENT\n");
8783 ctxt->instate = XML_PARSER_CONTENT;
8784#ifdef DEBUG_PUSH
8785 xmlGenericError(xmlGenericErrorContext,
8786 "PP: entering CONTENT\n");
8787#endif
8788 break;
8789 case XML_PARSER_PI:
8790 xmlGenericError(xmlGenericErrorContext,
8791 "PP: internal error, state == PI\n");
8792 ctxt->instate = XML_PARSER_CONTENT;
8793#ifdef DEBUG_PUSH
8794 xmlGenericError(xmlGenericErrorContext,
8795 "PP: entering CONTENT\n");
8796#endif
8797 break;
8798 case XML_PARSER_ENTITY_DECL:
8799 xmlGenericError(xmlGenericErrorContext,
8800 "PP: internal error, state == ENTITY_DECL\n");
8801 ctxt->instate = XML_PARSER_DTD;
8802#ifdef DEBUG_PUSH
8803 xmlGenericError(xmlGenericErrorContext,
8804 "PP: entering DTD\n");
8805#endif
8806 break;
8807 case XML_PARSER_ENTITY_VALUE:
8808 xmlGenericError(xmlGenericErrorContext,
8809 "PP: internal error, state == ENTITY_VALUE\n");
8810 ctxt->instate = XML_PARSER_CONTENT;
8811#ifdef DEBUG_PUSH
8812 xmlGenericError(xmlGenericErrorContext,
8813 "PP: entering DTD\n");
8814#endif
8815 break;
8816 case XML_PARSER_ATTRIBUTE_VALUE:
8817 xmlGenericError(xmlGenericErrorContext,
8818 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8819 ctxt->instate = XML_PARSER_START_TAG;
8820#ifdef DEBUG_PUSH
8821 xmlGenericError(xmlGenericErrorContext,
8822 "PP: entering START_TAG\n");
8823#endif
8824 break;
8825 case XML_PARSER_SYSTEM_LITERAL:
8826 xmlGenericError(xmlGenericErrorContext,
8827 "PP: internal error, state == SYSTEM_LITERAL\n");
8828 ctxt->instate = XML_PARSER_START_TAG;
8829#ifdef DEBUG_PUSH
8830 xmlGenericError(xmlGenericErrorContext,
8831 "PP: entering START_TAG\n");
8832#endif
8833 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008834 case XML_PARSER_PUBLIC_LITERAL:
8835 xmlGenericError(xmlGenericErrorContext,
8836 "PP: internal error, state == PUBLIC_LITERAL\n");
8837 ctxt->instate = XML_PARSER_START_TAG;
8838#ifdef DEBUG_PUSH
8839 xmlGenericError(xmlGenericErrorContext,
8840 "PP: entering START_TAG\n");
8841#endif
8842 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008843 }
8844 }
8845done:
8846#ifdef DEBUG_PUSH
8847 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8848#endif
8849 return(ret);
8850}
8851
8852/**
Owen Taylor3473f882001-02-23 17:55:21 +00008853 * xmlParseChunk:
8854 * @ctxt: an XML parser context
8855 * @chunk: an char array
8856 * @size: the size in byte of the chunk
8857 * @terminate: last chunk indicator
8858 *
8859 * Parse a Chunk of memory
8860 *
8861 * Returns zero if no error, the xmlParserErrors otherwise.
8862 */
8863int
8864xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8865 int terminate) {
8866 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8867 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8868 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8869 int cur = ctxt->input->cur - ctxt->input->base;
8870
8871 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8872 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8873 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008874 ctxt->input->end =
8875 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008876#ifdef DEBUG_PUSH
8877 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8878#endif
8879
8880 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8881 xmlParseTryOrFinish(ctxt, terminate);
8882 } else if (ctxt->instate != XML_PARSER_EOF) {
8883 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8884 xmlParserInputBufferPtr in = ctxt->input->buf;
8885 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8886 (in->raw != NULL)) {
8887 int nbchars;
8888
8889 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8890 if (nbchars < 0) {
8891 xmlGenericError(xmlGenericErrorContext,
8892 "xmlParseChunk: encoder error\n");
8893 return(XML_ERR_INVALID_ENCODING);
8894 }
8895 }
8896 }
8897 }
8898 xmlParseTryOrFinish(ctxt, terminate);
8899 if (terminate) {
8900 /*
8901 * Check for termination
8902 */
8903 if ((ctxt->instate != XML_PARSER_EOF) &&
8904 (ctxt->instate != XML_PARSER_EPILOG)) {
8905 ctxt->errNo = XML_ERR_DOCUMENT_END;
8906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8907 ctxt->sax->error(ctxt->userData,
8908 "Extra content at the end of the document\n");
8909 ctxt->wellFormed = 0;
8910 ctxt->disableSAX = 1;
8911 }
8912 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008913 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008914 ctxt->sax->endDocument(ctxt->userData);
8915 }
8916 ctxt->instate = XML_PARSER_EOF;
8917 }
8918 return((xmlParserErrors) ctxt->errNo);
8919}
8920
8921/************************************************************************
8922 * *
8923 * I/O front end functions to the parser *
8924 * *
8925 ************************************************************************/
8926
8927/**
8928 * xmlStopParser:
8929 * @ctxt: an XML parser context
8930 *
8931 * Blocks further parser processing
8932 */
8933void
8934xmlStopParser(xmlParserCtxtPtr ctxt) {
8935 ctxt->instate = XML_PARSER_EOF;
8936 if (ctxt->input != NULL)
8937 ctxt->input->cur = BAD_CAST"";
8938}
8939
8940/**
8941 * xmlCreatePushParserCtxt:
8942 * @sax: a SAX handler
8943 * @user_data: The user data returned on SAX callbacks
8944 * @chunk: a pointer to an array of chars
8945 * @size: number of chars in the array
8946 * @filename: an optional file name or URI
8947 *
8948 * Create a parser context for using the XML parser in push mode
8949 * To allow content encoding detection, @size should be >= 4
8950 * The value of @filename is used for fetching external entities
8951 * and error/warning reports.
8952 *
8953 * Returns the new parser context or NULL
8954 */
8955xmlParserCtxtPtr
8956xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8957 const char *chunk, int size, const char *filename) {
8958 xmlParserCtxtPtr ctxt;
8959 xmlParserInputPtr inputStream;
8960 xmlParserInputBufferPtr buf;
8961 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8962
8963 /*
8964 * plug some encoding conversion routines
8965 */
8966 if ((chunk != NULL) && (size >= 4))
8967 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8968
8969 buf = xmlAllocParserInputBuffer(enc);
8970 if (buf == NULL) return(NULL);
8971
8972 ctxt = xmlNewParserCtxt();
8973 if (ctxt == NULL) {
8974 xmlFree(buf);
8975 return(NULL);
8976 }
8977 if (sax != NULL) {
8978 if (ctxt->sax != &xmlDefaultSAXHandler)
8979 xmlFree(ctxt->sax);
8980 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8981 if (ctxt->sax == NULL) {
8982 xmlFree(buf);
8983 xmlFree(ctxt);
8984 return(NULL);
8985 }
8986 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8987 if (user_data != NULL)
8988 ctxt->userData = user_data;
8989 }
8990 if (filename == NULL) {
8991 ctxt->directory = NULL;
8992 } else {
8993 ctxt->directory = xmlParserGetDirectory(filename);
8994 }
8995
8996 inputStream = xmlNewInputStream(ctxt);
8997 if (inputStream == NULL) {
8998 xmlFreeParserCtxt(ctxt);
8999 return(NULL);
9000 }
9001
9002 if (filename == NULL)
9003 inputStream->filename = NULL;
9004 else
9005 inputStream->filename = xmlMemStrdup(filename);
9006 inputStream->buf = buf;
9007 inputStream->base = inputStream->buf->buffer->content;
9008 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009009 inputStream->end =
9010 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009011
9012 inputPush(ctxt, inputStream);
9013
9014 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9015 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009016 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9017 int cur = ctxt->input->cur - ctxt->input->base;
9018
Owen Taylor3473f882001-02-23 17:55:21 +00009019 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009020
9021 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9022 ctxt->input->cur = ctxt->input->base + cur;
9023 ctxt->input->end =
9024 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009025#ifdef DEBUG_PUSH
9026 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9027#endif
9028 }
9029
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009030 if (enc != XML_CHAR_ENCODING_NONE) {
9031 xmlSwitchEncoding(ctxt, enc);
9032 }
9033
Owen Taylor3473f882001-02-23 17:55:21 +00009034 return(ctxt);
9035}
9036
9037/**
9038 * xmlCreateIOParserCtxt:
9039 * @sax: a SAX handler
9040 * @user_data: The user data returned on SAX callbacks
9041 * @ioread: an I/O read function
9042 * @ioclose: an I/O close function
9043 * @ioctx: an I/O handler
9044 * @enc: the charset encoding if known
9045 *
9046 * Create a parser context for using the XML parser with an existing
9047 * I/O stream
9048 *
9049 * Returns the new parser context or NULL
9050 */
9051xmlParserCtxtPtr
9052xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9053 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9054 void *ioctx, xmlCharEncoding enc) {
9055 xmlParserCtxtPtr ctxt;
9056 xmlParserInputPtr inputStream;
9057 xmlParserInputBufferPtr buf;
9058
9059 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9060 if (buf == NULL) return(NULL);
9061
9062 ctxt = xmlNewParserCtxt();
9063 if (ctxt == NULL) {
9064 xmlFree(buf);
9065 return(NULL);
9066 }
9067 if (sax != NULL) {
9068 if (ctxt->sax != &xmlDefaultSAXHandler)
9069 xmlFree(ctxt->sax);
9070 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9071 if (ctxt->sax == NULL) {
9072 xmlFree(buf);
9073 xmlFree(ctxt);
9074 return(NULL);
9075 }
9076 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9077 if (user_data != NULL)
9078 ctxt->userData = user_data;
9079 }
9080
9081 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9082 if (inputStream == NULL) {
9083 xmlFreeParserCtxt(ctxt);
9084 return(NULL);
9085 }
9086 inputPush(ctxt, inputStream);
9087
9088 return(ctxt);
9089}
9090
9091/************************************************************************
9092 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009093 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009094 * *
9095 ************************************************************************/
9096
9097/**
9098 * xmlIOParseDTD:
9099 * @sax: the SAX handler block or NULL
9100 * @input: an Input Buffer
9101 * @enc: the charset encoding if known
9102 *
9103 * Load and parse a DTD
9104 *
9105 * Returns the resulting xmlDtdPtr or NULL in case of error.
9106 * @input will be freed at parsing end.
9107 */
9108
9109xmlDtdPtr
9110xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9111 xmlCharEncoding enc) {
9112 xmlDtdPtr ret = NULL;
9113 xmlParserCtxtPtr ctxt;
9114 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009115 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009116
9117 if (input == NULL)
9118 return(NULL);
9119
9120 ctxt = xmlNewParserCtxt();
9121 if (ctxt == NULL) {
9122 return(NULL);
9123 }
9124
9125 /*
9126 * Set-up the SAX context
9127 */
9128 if (sax != NULL) {
9129 if (ctxt->sax != NULL)
9130 xmlFree(ctxt->sax);
9131 ctxt->sax = sax;
9132 ctxt->userData = NULL;
9133 }
9134
9135 /*
9136 * generate a parser input from the I/O handler
9137 */
9138
9139 pinput = xmlNewIOInputStream(ctxt, input, enc);
9140 if (pinput == NULL) {
9141 if (sax != NULL) ctxt->sax = NULL;
9142 xmlFreeParserCtxt(ctxt);
9143 return(NULL);
9144 }
9145
9146 /*
9147 * plug some encoding conversion routines here.
9148 */
9149 xmlPushInput(ctxt, pinput);
9150
9151 pinput->filename = NULL;
9152 pinput->line = 1;
9153 pinput->col = 1;
9154 pinput->base = ctxt->input->cur;
9155 pinput->cur = ctxt->input->cur;
9156 pinput->free = NULL;
9157
9158 /*
9159 * let's parse that entity knowing it's an external subset.
9160 */
9161 ctxt->inSubset = 2;
9162 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9163 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9164 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009165
9166 if (enc == XML_CHAR_ENCODING_NONE) {
9167 /*
9168 * Get the 4 first bytes and decode the charset
9169 * if enc != XML_CHAR_ENCODING_NONE
9170 * plug some encoding conversion routines.
9171 */
9172 start[0] = RAW;
9173 start[1] = NXT(1);
9174 start[2] = NXT(2);
9175 start[3] = NXT(3);
9176 enc = xmlDetectCharEncoding(start, 4);
9177 if (enc != XML_CHAR_ENCODING_NONE) {
9178 xmlSwitchEncoding(ctxt, enc);
9179 }
9180 }
9181
Owen Taylor3473f882001-02-23 17:55:21 +00009182 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9183
9184 if (ctxt->myDoc != NULL) {
9185 if (ctxt->wellFormed) {
9186 ret = ctxt->myDoc->extSubset;
9187 ctxt->myDoc->extSubset = NULL;
9188 } else {
9189 ret = NULL;
9190 }
9191 xmlFreeDoc(ctxt->myDoc);
9192 ctxt->myDoc = NULL;
9193 }
9194 if (sax != NULL) ctxt->sax = NULL;
9195 xmlFreeParserCtxt(ctxt);
9196
9197 return(ret);
9198}
9199
9200/**
9201 * xmlSAXParseDTD:
9202 * @sax: the SAX handler block
9203 * @ExternalID: a NAME* containing the External ID of the DTD
9204 * @SystemID: a NAME* containing the URL to the DTD
9205 *
9206 * Load and parse an external subset.
9207 *
9208 * Returns the resulting xmlDtdPtr or NULL in case of error.
9209 */
9210
9211xmlDtdPtr
9212xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9213 const xmlChar *SystemID) {
9214 xmlDtdPtr ret = NULL;
9215 xmlParserCtxtPtr ctxt;
9216 xmlParserInputPtr input = NULL;
9217 xmlCharEncoding enc;
9218
9219 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9220
9221 ctxt = xmlNewParserCtxt();
9222 if (ctxt == NULL) {
9223 return(NULL);
9224 }
9225
9226 /*
9227 * Set-up the SAX context
9228 */
9229 if (sax != NULL) {
9230 if (ctxt->sax != NULL)
9231 xmlFree(ctxt->sax);
9232 ctxt->sax = sax;
9233 ctxt->userData = NULL;
9234 }
9235
9236 /*
9237 * Ask the Entity resolver to load the damn thing
9238 */
9239
9240 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9241 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9242 if (input == NULL) {
9243 if (sax != NULL) ctxt->sax = NULL;
9244 xmlFreeParserCtxt(ctxt);
9245 return(NULL);
9246 }
9247
9248 /*
9249 * plug some encoding conversion routines here.
9250 */
9251 xmlPushInput(ctxt, input);
9252 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9253 xmlSwitchEncoding(ctxt, enc);
9254
9255 if (input->filename == NULL)
9256 input->filename = (char *) xmlStrdup(SystemID);
9257 input->line = 1;
9258 input->col = 1;
9259 input->base = ctxt->input->cur;
9260 input->cur = ctxt->input->cur;
9261 input->free = NULL;
9262
9263 /*
9264 * let's parse that entity knowing it's an external subset.
9265 */
9266 ctxt->inSubset = 2;
9267 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9268 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9269 ExternalID, SystemID);
9270 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9271
9272 if (ctxt->myDoc != NULL) {
9273 if (ctxt->wellFormed) {
9274 ret = ctxt->myDoc->extSubset;
9275 ctxt->myDoc->extSubset = NULL;
9276 } else {
9277 ret = NULL;
9278 }
9279 xmlFreeDoc(ctxt->myDoc);
9280 ctxt->myDoc = NULL;
9281 }
9282 if (sax != NULL) ctxt->sax = NULL;
9283 xmlFreeParserCtxt(ctxt);
9284
9285 return(ret);
9286}
9287
9288/**
9289 * xmlParseDTD:
9290 * @ExternalID: a NAME* containing the External ID of the DTD
9291 * @SystemID: a NAME* containing the URL to the DTD
9292 *
9293 * Load and parse an external subset.
9294 *
9295 * Returns the resulting xmlDtdPtr or NULL in case of error.
9296 */
9297
9298xmlDtdPtr
9299xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9300 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9301}
9302
9303/************************************************************************
9304 * *
9305 * Front ends when parsing an Entity *
9306 * *
9307 ************************************************************************/
9308
9309/**
Owen Taylor3473f882001-02-23 17:55:21 +00009310 * xmlParseCtxtExternalEntity:
9311 * @ctx: the existing parsing context
9312 * @URL: the URL for the entity to load
9313 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009314 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009315 *
9316 * Parse an external general entity within an existing parsing context
9317 * An external general parsed entity is well-formed if it matches the
9318 * production labeled extParsedEnt.
9319 *
9320 * [78] extParsedEnt ::= TextDecl? content
9321 *
9322 * Returns 0 if the entity is well formed, -1 in case of args problem and
9323 * the parser error code otherwise
9324 */
9325
9326int
9327xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009328 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009329 xmlParserCtxtPtr ctxt;
9330 xmlDocPtr newDoc;
9331 xmlSAXHandlerPtr oldsax = NULL;
9332 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009333 xmlChar start[4];
9334 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009335
9336 if (ctx->depth > 40) {
9337 return(XML_ERR_ENTITY_LOOP);
9338 }
9339
Daniel Veillardcda96922001-08-21 10:56:31 +00009340 if (lst != NULL)
9341 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009342 if ((URL == NULL) && (ID == NULL))
9343 return(-1);
9344 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9345 return(-1);
9346
9347
9348 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9349 if (ctxt == NULL) return(-1);
9350 ctxt->userData = ctxt;
9351 oldsax = ctxt->sax;
9352 ctxt->sax = ctx->sax;
9353 newDoc = xmlNewDoc(BAD_CAST "1.0");
9354 if (newDoc == NULL) {
9355 xmlFreeParserCtxt(ctxt);
9356 return(-1);
9357 }
9358 if (ctx->myDoc != NULL) {
9359 newDoc->intSubset = ctx->myDoc->intSubset;
9360 newDoc->extSubset = ctx->myDoc->extSubset;
9361 }
9362 if (ctx->myDoc->URL != NULL) {
9363 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9364 }
9365 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9366 if (newDoc->children == NULL) {
9367 ctxt->sax = oldsax;
9368 xmlFreeParserCtxt(ctxt);
9369 newDoc->intSubset = NULL;
9370 newDoc->extSubset = NULL;
9371 xmlFreeDoc(newDoc);
9372 return(-1);
9373 }
9374 nodePush(ctxt, newDoc->children);
9375 if (ctx->myDoc == NULL) {
9376 ctxt->myDoc = newDoc;
9377 } else {
9378 ctxt->myDoc = ctx->myDoc;
9379 newDoc->children->doc = ctx->myDoc;
9380 }
9381
Daniel Veillard87a764e2001-06-20 17:41:10 +00009382 /*
9383 * Get the 4 first bytes and decode the charset
9384 * if enc != XML_CHAR_ENCODING_NONE
9385 * plug some encoding conversion routines.
9386 */
9387 GROW
9388 start[0] = RAW;
9389 start[1] = NXT(1);
9390 start[2] = NXT(2);
9391 start[3] = NXT(3);
9392 enc = xmlDetectCharEncoding(start, 4);
9393 if (enc != XML_CHAR_ENCODING_NONE) {
9394 xmlSwitchEncoding(ctxt, enc);
9395 }
9396
Owen Taylor3473f882001-02-23 17:55:21 +00009397 /*
9398 * Parse a possible text declaration first
9399 */
Owen Taylor3473f882001-02-23 17:55:21 +00009400 if ((RAW == '<') && (NXT(1) == '?') &&
9401 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9402 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9403 xmlParseTextDecl(ctxt);
9404 }
9405
9406 /*
9407 * Doing validity checking on chunk doesn't make sense
9408 */
9409 ctxt->instate = XML_PARSER_CONTENT;
9410 ctxt->validate = ctx->validate;
9411 ctxt->loadsubset = ctx->loadsubset;
9412 ctxt->depth = ctx->depth + 1;
9413 ctxt->replaceEntities = ctx->replaceEntities;
9414 if (ctxt->validate) {
9415 ctxt->vctxt.error = ctx->vctxt.error;
9416 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009417 } else {
9418 ctxt->vctxt.error = NULL;
9419 ctxt->vctxt.warning = NULL;
9420 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009421 ctxt->vctxt.nodeTab = NULL;
9422 ctxt->vctxt.nodeNr = 0;
9423 ctxt->vctxt.nodeMax = 0;
9424 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009425
9426 xmlParseContent(ctxt);
9427
9428 if ((RAW == '<') && (NXT(1) == '/')) {
9429 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9431 ctxt->sax->error(ctxt->userData,
9432 "chunk is not well balanced\n");
9433 ctxt->wellFormed = 0;
9434 ctxt->disableSAX = 1;
9435 } else if (RAW != 0) {
9436 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9437 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9438 ctxt->sax->error(ctxt->userData,
9439 "extra content at the end of well balanced chunk\n");
9440 ctxt->wellFormed = 0;
9441 ctxt->disableSAX = 1;
9442 }
9443 if (ctxt->node != newDoc->children) {
9444 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9445 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9446 ctxt->sax->error(ctxt->userData,
9447 "chunk is not well balanced\n");
9448 ctxt->wellFormed = 0;
9449 ctxt->disableSAX = 1;
9450 }
9451
9452 if (!ctxt->wellFormed) {
9453 if (ctxt->errNo == 0)
9454 ret = 1;
9455 else
9456 ret = ctxt->errNo;
9457 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009458 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009459 xmlNodePtr cur;
9460
9461 /*
9462 * Return the newly created nodeset after unlinking it from
9463 * they pseudo parent.
9464 */
9465 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009466 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009467 while (cur != NULL) {
9468 cur->parent = NULL;
9469 cur = cur->next;
9470 }
9471 newDoc->children->children = NULL;
9472 }
9473 ret = 0;
9474 }
9475 ctxt->sax = oldsax;
9476 xmlFreeParserCtxt(ctxt);
9477 newDoc->intSubset = NULL;
9478 newDoc->extSubset = NULL;
9479 xmlFreeDoc(newDoc);
9480
9481 return(ret);
9482}
9483
9484/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009485 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009486 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009487 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009488 * @sax: the SAX handler bloc (possibly NULL)
9489 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9490 * @depth: Used for loop detection, use 0
9491 * @URL: the URL for the entity to load
9492 * @ID: the System ID for the entity to load
9493 * @list: the return value for the set of parsed nodes
9494 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009495 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009496 *
9497 * Returns 0 if the entity is well formed, -1 in case of args problem and
9498 * the parser error code otherwise
9499 */
9500
Daniel Veillard257d9102001-05-08 10:41:44 +00009501static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009502xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9503 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009504 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009505 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009506 xmlParserCtxtPtr ctxt;
9507 xmlDocPtr newDoc;
9508 xmlSAXHandlerPtr oldsax = NULL;
9509 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009510 xmlChar start[4];
9511 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009512
9513 if (depth > 40) {
9514 return(XML_ERR_ENTITY_LOOP);
9515 }
9516
9517
9518
9519 if (list != NULL)
9520 *list = NULL;
9521 if ((URL == NULL) && (ID == NULL))
9522 return(-1);
9523 if (doc == NULL) /* @@ relax but check for dereferences */
9524 return(-1);
9525
9526
9527 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9528 if (ctxt == NULL) return(-1);
9529 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009530 if (oldctxt != NULL) {
9531 ctxt->_private = oldctxt->_private;
9532 ctxt->loadsubset = oldctxt->loadsubset;
9533 ctxt->validate = oldctxt->validate;
9534 ctxt->external = oldctxt->external;
9535 } else {
9536 /*
9537 * Doing validity checking on chunk without context
9538 * doesn't make sense
9539 */
9540 ctxt->_private = NULL;
9541 ctxt->validate = 0;
9542 ctxt->external = 2;
9543 ctxt->loadsubset = 0;
9544 }
Owen Taylor3473f882001-02-23 17:55:21 +00009545 if (sax != NULL) {
9546 oldsax = ctxt->sax;
9547 ctxt->sax = sax;
9548 if (user_data != NULL)
9549 ctxt->userData = user_data;
9550 }
9551 newDoc = xmlNewDoc(BAD_CAST "1.0");
9552 if (newDoc == NULL) {
9553 xmlFreeParserCtxt(ctxt);
9554 return(-1);
9555 }
9556 if (doc != NULL) {
9557 newDoc->intSubset = doc->intSubset;
9558 newDoc->extSubset = doc->extSubset;
9559 }
9560 if (doc->URL != NULL) {
9561 newDoc->URL = xmlStrdup(doc->URL);
9562 }
9563 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9564 if (newDoc->children == NULL) {
9565 if (sax != NULL)
9566 ctxt->sax = oldsax;
9567 xmlFreeParserCtxt(ctxt);
9568 newDoc->intSubset = NULL;
9569 newDoc->extSubset = NULL;
9570 xmlFreeDoc(newDoc);
9571 return(-1);
9572 }
9573 nodePush(ctxt, newDoc->children);
9574 if (doc == NULL) {
9575 ctxt->myDoc = newDoc;
9576 } else {
9577 ctxt->myDoc = doc;
9578 newDoc->children->doc = doc;
9579 }
9580
Daniel Veillard87a764e2001-06-20 17:41:10 +00009581 /*
9582 * Get the 4 first bytes and decode the charset
9583 * if enc != XML_CHAR_ENCODING_NONE
9584 * plug some encoding conversion routines.
9585 */
9586 GROW;
9587 start[0] = RAW;
9588 start[1] = NXT(1);
9589 start[2] = NXT(2);
9590 start[3] = NXT(3);
9591 enc = xmlDetectCharEncoding(start, 4);
9592 if (enc != XML_CHAR_ENCODING_NONE) {
9593 xmlSwitchEncoding(ctxt, enc);
9594 }
9595
Owen Taylor3473f882001-02-23 17:55:21 +00009596 /*
9597 * Parse a possible text declaration first
9598 */
Owen Taylor3473f882001-02-23 17:55:21 +00009599 if ((RAW == '<') && (NXT(1) == '?') &&
9600 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9601 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9602 xmlParseTextDecl(ctxt);
9603 }
9604
Owen Taylor3473f882001-02-23 17:55:21 +00009605 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009606 ctxt->depth = depth;
9607
9608 xmlParseContent(ctxt);
9609
Daniel Veillard561b7f82002-03-20 21:55:57 +00009610 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009611 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9613 ctxt->sax->error(ctxt->userData,
9614 "chunk is not well balanced\n");
9615 ctxt->wellFormed = 0;
9616 ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009617 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009618 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9620 ctxt->sax->error(ctxt->userData,
9621 "extra content at the end of well balanced chunk\n");
9622 ctxt->wellFormed = 0;
9623 ctxt->disableSAX = 1;
9624 }
9625 if (ctxt->node != newDoc->children) {
9626 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9628 ctxt->sax->error(ctxt->userData,
9629 "chunk is not well balanced\n");
9630 ctxt->wellFormed = 0;
9631 ctxt->disableSAX = 1;
9632 }
9633
9634 if (!ctxt->wellFormed) {
9635 if (ctxt->errNo == 0)
9636 ret = 1;
9637 else
9638 ret = ctxt->errNo;
9639 } else {
9640 if (list != NULL) {
9641 xmlNodePtr cur;
9642
9643 /*
9644 * Return the newly created nodeset after unlinking it from
9645 * they pseudo parent.
9646 */
9647 cur = newDoc->children->children;
9648 *list = cur;
9649 while (cur != NULL) {
9650 cur->parent = NULL;
9651 cur = cur->next;
9652 }
9653 newDoc->children->children = NULL;
9654 }
9655 ret = 0;
9656 }
9657 if (sax != NULL)
9658 ctxt->sax = oldsax;
9659 xmlFreeParserCtxt(ctxt);
9660 newDoc->intSubset = NULL;
9661 newDoc->extSubset = NULL;
9662 xmlFreeDoc(newDoc);
9663
9664 return(ret);
9665}
9666
9667/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009668 * xmlParseExternalEntity:
9669 * @doc: the document the chunk pertains to
9670 * @sax: the SAX handler bloc (possibly NULL)
9671 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9672 * @depth: Used for loop detection, use 0
9673 * @URL: the URL for the entity to load
9674 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009675 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009676 *
9677 * Parse an external general entity
9678 * An external general parsed entity is well-formed if it matches the
9679 * production labeled extParsedEnt.
9680 *
9681 * [78] extParsedEnt ::= TextDecl? content
9682 *
9683 * Returns 0 if the entity is well formed, -1 in case of args problem and
9684 * the parser error code otherwise
9685 */
9686
9687int
9688xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009689 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009690 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009691 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009692}
9693
9694/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009695 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009696 * @doc: the document the chunk pertains to
9697 * @sax: the SAX handler bloc (possibly NULL)
9698 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9699 * @depth: Used for loop detection, use 0
9700 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009701 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009702 *
9703 * Parse a well-balanced chunk of an XML document
9704 * called by the parser
9705 * The allowed sequence for the Well Balanced Chunk is the one defined by
9706 * the content production in the XML grammar:
9707 *
9708 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9709 *
9710 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9711 * the parser error code otherwise
9712 */
9713
9714int
9715xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009716 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009717 xmlParserCtxtPtr ctxt;
9718 xmlDocPtr newDoc;
9719 xmlSAXHandlerPtr oldsax = NULL;
9720 int size;
9721 int ret = 0;
9722
9723 if (depth > 40) {
9724 return(XML_ERR_ENTITY_LOOP);
9725 }
9726
9727
Daniel Veillardcda96922001-08-21 10:56:31 +00009728 if (lst != NULL)
9729 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009730 if (string == NULL)
9731 return(-1);
9732
9733 size = xmlStrlen(string);
9734
9735 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9736 if (ctxt == NULL) return(-1);
9737 ctxt->userData = ctxt;
9738 if (sax != NULL) {
9739 oldsax = ctxt->sax;
9740 ctxt->sax = sax;
9741 if (user_data != NULL)
9742 ctxt->userData = user_data;
9743 }
9744 newDoc = xmlNewDoc(BAD_CAST "1.0");
9745 if (newDoc == NULL) {
9746 xmlFreeParserCtxt(ctxt);
9747 return(-1);
9748 }
9749 if (doc != NULL) {
9750 newDoc->intSubset = doc->intSubset;
9751 newDoc->extSubset = doc->extSubset;
9752 }
9753 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9754 if (newDoc->children == NULL) {
9755 if (sax != NULL)
9756 ctxt->sax = oldsax;
9757 xmlFreeParserCtxt(ctxt);
9758 newDoc->intSubset = NULL;
9759 newDoc->extSubset = NULL;
9760 xmlFreeDoc(newDoc);
9761 return(-1);
9762 }
9763 nodePush(ctxt, newDoc->children);
9764 if (doc == NULL) {
9765 ctxt->myDoc = newDoc;
9766 } else {
9767 ctxt->myDoc = doc;
9768 newDoc->children->doc = doc;
9769 }
9770 ctxt->instate = XML_PARSER_CONTENT;
9771 ctxt->depth = depth;
9772
9773 /*
9774 * Doing validity checking on chunk doesn't make sense
9775 */
9776 ctxt->validate = 0;
9777 ctxt->loadsubset = 0;
9778
9779 xmlParseContent(ctxt);
9780
9781 if ((RAW == '<') && (NXT(1) == '/')) {
9782 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9784 ctxt->sax->error(ctxt->userData,
9785 "chunk is not well balanced\n");
9786 ctxt->wellFormed = 0;
9787 ctxt->disableSAX = 1;
9788 } else if (RAW != 0) {
9789 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9791 ctxt->sax->error(ctxt->userData,
9792 "extra content at the end of well balanced chunk\n");
9793 ctxt->wellFormed = 0;
9794 ctxt->disableSAX = 1;
9795 }
9796 if (ctxt->node != newDoc->children) {
9797 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9799 ctxt->sax->error(ctxt->userData,
9800 "chunk is not well balanced\n");
9801 ctxt->wellFormed = 0;
9802 ctxt->disableSAX = 1;
9803 }
9804
9805 if (!ctxt->wellFormed) {
9806 if (ctxt->errNo == 0)
9807 ret = 1;
9808 else
9809 ret = ctxt->errNo;
9810 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009811 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009812 xmlNodePtr cur;
9813
9814 /*
9815 * Return the newly created nodeset after unlinking it from
9816 * they pseudo parent.
9817 */
9818 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009819 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009820 while (cur != NULL) {
9821 cur->parent = NULL;
9822 cur = cur->next;
9823 }
9824 newDoc->children->children = NULL;
9825 }
9826 ret = 0;
9827 }
9828 if (sax != NULL)
9829 ctxt->sax = oldsax;
9830 xmlFreeParserCtxt(ctxt);
9831 newDoc->intSubset = NULL;
9832 newDoc->extSubset = NULL;
9833 xmlFreeDoc(newDoc);
9834
9835 return(ret);
9836}
9837
9838/**
9839 * xmlSAXParseEntity:
9840 * @sax: the SAX handler block
9841 * @filename: the filename
9842 *
9843 * parse an XML external entity out of context and build a tree.
9844 * It use the given SAX function block to handle the parsing callback.
9845 * If sax is NULL, fallback to the default DOM tree building routines.
9846 *
9847 * [78] extParsedEnt ::= TextDecl? content
9848 *
9849 * This correspond to a "Well Balanced" chunk
9850 *
9851 * Returns the resulting document tree
9852 */
9853
9854xmlDocPtr
9855xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9856 xmlDocPtr ret;
9857 xmlParserCtxtPtr ctxt;
9858 char *directory = NULL;
9859
9860 ctxt = xmlCreateFileParserCtxt(filename);
9861 if (ctxt == NULL) {
9862 return(NULL);
9863 }
9864 if (sax != NULL) {
9865 if (ctxt->sax != NULL)
9866 xmlFree(ctxt->sax);
9867 ctxt->sax = sax;
9868 ctxt->userData = NULL;
9869 }
9870
9871 if ((ctxt->directory == NULL) && (directory == NULL))
9872 directory = xmlParserGetDirectory(filename);
9873
9874 xmlParseExtParsedEnt(ctxt);
9875
9876 if (ctxt->wellFormed)
9877 ret = ctxt->myDoc;
9878 else {
9879 ret = NULL;
9880 xmlFreeDoc(ctxt->myDoc);
9881 ctxt->myDoc = NULL;
9882 }
9883 if (sax != NULL)
9884 ctxt->sax = NULL;
9885 xmlFreeParserCtxt(ctxt);
9886
9887 return(ret);
9888}
9889
9890/**
9891 * xmlParseEntity:
9892 * @filename: the filename
9893 *
9894 * parse an XML external entity out of context and build a tree.
9895 *
9896 * [78] extParsedEnt ::= TextDecl? content
9897 *
9898 * This correspond to a "Well Balanced" chunk
9899 *
9900 * Returns the resulting document tree
9901 */
9902
9903xmlDocPtr
9904xmlParseEntity(const char *filename) {
9905 return(xmlSAXParseEntity(NULL, filename));
9906}
9907
9908/**
9909 * xmlCreateEntityParserCtxt:
9910 * @URL: the entity URL
9911 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009912 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009913 *
9914 * Create a parser context for an external entity
9915 * Automatic support for ZLIB/Compress compressed document is provided
9916 * by default if found at compile-time.
9917 *
9918 * Returns the new parser context or NULL
9919 */
9920xmlParserCtxtPtr
9921xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9922 const xmlChar *base) {
9923 xmlParserCtxtPtr ctxt;
9924 xmlParserInputPtr inputStream;
9925 char *directory = NULL;
9926 xmlChar *uri;
9927
9928 ctxt = xmlNewParserCtxt();
9929 if (ctxt == NULL) {
9930 return(NULL);
9931 }
9932
9933 uri = xmlBuildURI(URL, base);
9934
9935 if (uri == NULL) {
9936 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9937 if (inputStream == NULL) {
9938 xmlFreeParserCtxt(ctxt);
9939 return(NULL);
9940 }
9941
9942 inputPush(ctxt, inputStream);
9943
9944 if ((ctxt->directory == NULL) && (directory == NULL))
9945 directory = xmlParserGetDirectory((char *)URL);
9946 if ((ctxt->directory == NULL) && (directory != NULL))
9947 ctxt->directory = directory;
9948 } else {
9949 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9950 if (inputStream == NULL) {
9951 xmlFree(uri);
9952 xmlFreeParserCtxt(ctxt);
9953 return(NULL);
9954 }
9955
9956 inputPush(ctxt, inputStream);
9957
9958 if ((ctxt->directory == NULL) && (directory == NULL))
9959 directory = xmlParserGetDirectory((char *)uri);
9960 if ((ctxt->directory == NULL) && (directory != NULL))
9961 ctxt->directory = directory;
9962 xmlFree(uri);
9963 }
9964
9965 return(ctxt);
9966}
9967
9968/************************************************************************
9969 * *
9970 * Front ends when parsing from a file *
9971 * *
9972 ************************************************************************/
9973
9974/**
9975 * xmlCreateFileParserCtxt:
9976 * @filename: the filename
9977 *
9978 * Create a parser context for a file content.
9979 * Automatic support for ZLIB/Compress compressed document is provided
9980 * by default if found at compile-time.
9981 *
9982 * Returns the new parser context or NULL
9983 */
9984xmlParserCtxtPtr
9985xmlCreateFileParserCtxt(const char *filename)
9986{
9987 xmlParserCtxtPtr ctxt;
9988 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009989 char *directory = NULL;
9990
Owen Taylor3473f882001-02-23 17:55:21 +00009991 ctxt = xmlNewParserCtxt();
9992 if (ctxt == NULL) {
9993 if (xmlDefaultSAXHandler.error != NULL) {
9994 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9995 }
9996 return(NULL);
9997 }
9998
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009999 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010000 if (inputStream == NULL) {
10001 xmlFreeParserCtxt(ctxt);
10002 return(NULL);
10003 }
10004
Owen Taylor3473f882001-02-23 17:55:21 +000010005 inputPush(ctxt, inputStream);
10006 if ((ctxt->directory == NULL) && (directory == NULL))
10007 directory = xmlParserGetDirectory(filename);
10008 if ((ctxt->directory == NULL) && (directory != NULL))
10009 ctxt->directory = directory;
10010
10011 return(ctxt);
10012}
10013
10014/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010015 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010016 * @sax: the SAX handler block
10017 * @filename: the filename
10018 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10019 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010020 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010021 *
10022 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10023 * compressed document is provided by default if found at compile-time.
10024 * It use the given SAX function block to handle the parsing callback.
10025 * If sax is NULL, fallback to the default DOM tree building routines.
10026 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010027 * User data (void *) is stored within the parser context in the
10028 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010029 *
Owen Taylor3473f882001-02-23 17:55:21 +000010030 * Returns the resulting document tree
10031 */
10032
10033xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010034xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10035 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010036 xmlDocPtr ret;
10037 xmlParserCtxtPtr ctxt;
10038 char *directory = NULL;
10039
Daniel Veillard635ef722001-10-29 11:48:19 +000010040 xmlInitParser();
10041
Owen Taylor3473f882001-02-23 17:55:21 +000010042 ctxt = xmlCreateFileParserCtxt(filename);
10043 if (ctxt == NULL) {
10044 return(NULL);
10045 }
10046 if (sax != NULL) {
10047 if (ctxt->sax != NULL)
10048 xmlFree(ctxt->sax);
10049 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010050 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010051 if (data!=NULL) {
10052 ctxt->_private=data;
10053 }
Owen Taylor3473f882001-02-23 17:55:21 +000010054
10055 if ((ctxt->directory == NULL) && (directory == NULL))
10056 directory = xmlParserGetDirectory(filename);
10057 if ((ctxt->directory == NULL) && (directory != NULL))
10058 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10059
10060 xmlParseDocument(ctxt);
10061
10062 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10063 else {
10064 ret = NULL;
10065 xmlFreeDoc(ctxt->myDoc);
10066 ctxt->myDoc = NULL;
10067 }
10068 if (sax != NULL)
10069 ctxt->sax = NULL;
10070 xmlFreeParserCtxt(ctxt);
10071
10072 return(ret);
10073}
10074
10075/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010076 * xmlSAXParseFile:
10077 * @sax: the SAX handler block
10078 * @filename: the filename
10079 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10080 * documents
10081 *
10082 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10083 * compressed document is provided by default if found at compile-time.
10084 * It use the given SAX function block to handle the parsing callback.
10085 * If sax is NULL, fallback to the default DOM tree building routines.
10086 *
10087 * Returns the resulting document tree
10088 */
10089
10090xmlDocPtr
10091xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10092 int recovery) {
10093 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10094}
10095
10096/**
Owen Taylor3473f882001-02-23 17:55:21 +000010097 * xmlRecoverDoc:
10098 * @cur: a pointer to an array of xmlChar
10099 *
10100 * parse an XML in-memory document and build a tree.
10101 * In the case the document is not Well Formed, a tree is built anyway
10102 *
10103 * Returns the resulting document tree
10104 */
10105
10106xmlDocPtr
10107xmlRecoverDoc(xmlChar *cur) {
10108 return(xmlSAXParseDoc(NULL, cur, 1));
10109}
10110
10111/**
10112 * xmlParseFile:
10113 * @filename: the filename
10114 *
10115 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10116 * compressed document is provided by default if found at compile-time.
10117 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010118 * Returns the resulting document tree if the file was wellformed,
10119 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010120 */
10121
10122xmlDocPtr
10123xmlParseFile(const char *filename) {
10124 return(xmlSAXParseFile(NULL, filename, 0));
10125}
10126
10127/**
10128 * xmlRecoverFile:
10129 * @filename: the filename
10130 *
10131 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10132 * compressed document is provided by default if found at compile-time.
10133 * In the case the document is not Well Formed, a tree is built anyway
10134 *
10135 * Returns the resulting document tree
10136 */
10137
10138xmlDocPtr
10139xmlRecoverFile(const char *filename) {
10140 return(xmlSAXParseFile(NULL, filename, 1));
10141}
10142
10143
10144/**
10145 * xmlSetupParserForBuffer:
10146 * @ctxt: an XML parser context
10147 * @buffer: a xmlChar * buffer
10148 * @filename: a file name
10149 *
10150 * Setup the parser context to parse a new buffer; Clears any prior
10151 * contents from the parser context. The buffer parameter must not be
10152 * NULL, but the filename parameter can be
10153 */
10154void
10155xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10156 const char* filename)
10157{
10158 xmlParserInputPtr input;
10159
10160 input = xmlNewInputStream(ctxt);
10161 if (input == NULL) {
10162 perror("malloc");
10163 xmlFree(ctxt);
10164 return;
10165 }
10166
10167 xmlClearParserCtxt(ctxt);
10168 if (filename != NULL)
10169 input->filename = xmlMemStrdup(filename);
10170 input->base = buffer;
10171 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010172 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010173 inputPush(ctxt, input);
10174}
10175
10176/**
10177 * xmlSAXUserParseFile:
10178 * @sax: a SAX handler
10179 * @user_data: The user data returned on SAX callbacks
10180 * @filename: a file name
10181 *
10182 * parse an XML file and call the given SAX handler routines.
10183 * Automatic support for ZLIB/Compress compressed document is provided
10184 *
10185 * Returns 0 in case of success or a error number otherwise
10186 */
10187int
10188xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10189 const char *filename) {
10190 int ret = 0;
10191 xmlParserCtxtPtr ctxt;
10192
10193 ctxt = xmlCreateFileParserCtxt(filename);
10194 if (ctxt == NULL) return -1;
10195 if (ctxt->sax != &xmlDefaultSAXHandler)
10196 xmlFree(ctxt->sax);
10197 ctxt->sax = sax;
10198 if (user_data != NULL)
10199 ctxt->userData = user_data;
10200
10201 xmlParseDocument(ctxt);
10202
10203 if (ctxt->wellFormed)
10204 ret = 0;
10205 else {
10206 if (ctxt->errNo != 0)
10207 ret = ctxt->errNo;
10208 else
10209 ret = -1;
10210 }
10211 if (sax != NULL)
10212 ctxt->sax = NULL;
10213 xmlFreeParserCtxt(ctxt);
10214
10215 return ret;
10216}
10217
10218/************************************************************************
10219 * *
10220 * Front ends when parsing from memory *
10221 * *
10222 ************************************************************************/
10223
10224/**
10225 * xmlCreateMemoryParserCtxt:
10226 * @buffer: a pointer to a char array
10227 * @size: the size of the array
10228 *
10229 * Create a parser context for an XML in-memory document.
10230 *
10231 * Returns the new parser context or NULL
10232 */
10233xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010234xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010235 xmlParserCtxtPtr ctxt;
10236 xmlParserInputPtr input;
10237 xmlParserInputBufferPtr buf;
10238
10239 if (buffer == NULL)
10240 return(NULL);
10241 if (size <= 0)
10242 return(NULL);
10243
10244 ctxt = xmlNewParserCtxt();
10245 if (ctxt == NULL)
10246 return(NULL);
10247
10248 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10249 if (buf == NULL) return(NULL);
10250
10251 input = xmlNewInputStream(ctxt);
10252 if (input == NULL) {
10253 xmlFreeParserCtxt(ctxt);
10254 return(NULL);
10255 }
10256
10257 input->filename = NULL;
10258 input->buf = buf;
10259 input->base = input->buf->buffer->content;
10260 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010261 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010262
10263 inputPush(ctxt, input);
10264 return(ctxt);
10265}
10266
10267/**
10268 * xmlSAXParseMemory:
10269 * @sax: the SAX handler block
10270 * @buffer: an pointer to a char array
10271 * @size: the size of the array
10272 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10273 * documents
10274 *
10275 * parse an XML in-memory block and use the given SAX function block
10276 * to handle the parsing callback. If sax is NULL, fallback to the default
10277 * DOM tree building routines.
10278 *
10279 * Returns the resulting document tree
10280 */
10281xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010282xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10283 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010284 xmlDocPtr ret;
10285 xmlParserCtxtPtr ctxt;
10286
10287 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10288 if (ctxt == NULL) return(NULL);
10289 if (sax != NULL) {
10290 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010291 }
10292
10293 xmlParseDocument(ctxt);
10294
10295 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10296 else {
10297 ret = NULL;
10298 xmlFreeDoc(ctxt->myDoc);
10299 ctxt->myDoc = NULL;
10300 }
10301 if (sax != NULL)
10302 ctxt->sax = NULL;
10303 xmlFreeParserCtxt(ctxt);
10304
10305 return(ret);
10306}
10307
10308/**
10309 * xmlParseMemory:
10310 * @buffer: an pointer to a char array
10311 * @size: the size of the array
10312 *
10313 * parse an XML in-memory block and build a tree.
10314 *
10315 * Returns the resulting document tree
10316 */
10317
Daniel Veillard50822cb2001-07-26 20:05:51 +000010318xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010319 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10320}
10321
10322/**
10323 * xmlRecoverMemory:
10324 * @buffer: an pointer to a char array
10325 * @size: the size of the array
10326 *
10327 * parse an XML in-memory block and build a tree.
10328 * In the case the document is not Well Formed, a tree is built anyway
10329 *
10330 * Returns the resulting document tree
10331 */
10332
Daniel Veillard50822cb2001-07-26 20:05:51 +000010333xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010334 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10335}
10336
10337/**
10338 * xmlSAXUserParseMemory:
10339 * @sax: a SAX handler
10340 * @user_data: The user data returned on SAX callbacks
10341 * @buffer: an in-memory XML document input
10342 * @size: the length of the XML document in bytes
10343 *
10344 * A better SAX parsing routine.
10345 * parse an XML in-memory buffer and call the given SAX handler routines.
10346 *
10347 * Returns 0 in case of success or a error number otherwise
10348 */
10349int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010350 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010351 int ret = 0;
10352 xmlParserCtxtPtr ctxt;
10353 xmlSAXHandlerPtr oldsax = NULL;
10354
10355 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10356 if (ctxt == NULL) return -1;
10357 if (sax != NULL) {
10358 oldsax = ctxt->sax;
10359 ctxt->sax = sax;
10360 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010361 if (user_data != NULL)
10362 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010363
10364 xmlParseDocument(ctxt);
10365
10366 if (ctxt->wellFormed)
10367 ret = 0;
10368 else {
10369 if (ctxt->errNo != 0)
10370 ret = ctxt->errNo;
10371 else
10372 ret = -1;
10373 }
10374 if (sax != NULL) {
10375 ctxt->sax = oldsax;
10376 }
10377 xmlFreeParserCtxt(ctxt);
10378
10379 return ret;
10380}
10381
10382/**
10383 * xmlCreateDocParserCtxt:
10384 * @cur: a pointer to an array of xmlChar
10385 *
10386 * Creates a parser context for an XML in-memory document.
10387 *
10388 * Returns the new parser context or NULL
10389 */
10390xmlParserCtxtPtr
10391xmlCreateDocParserCtxt(xmlChar *cur) {
10392 int len;
10393
10394 if (cur == NULL)
10395 return(NULL);
10396 len = xmlStrlen(cur);
10397 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10398}
10399
10400/**
10401 * xmlSAXParseDoc:
10402 * @sax: the SAX handler block
10403 * @cur: a pointer to an array of xmlChar
10404 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10405 * documents
10406 *
10407 * parse an XML in-memory document and build a tree.
10408 * It use the given SAX function block to handle the parsing callback.
10409 * If sax is NULL, fallback to the default DOM tree building routines.
10410 *
10411 * Returns the resulting document tree
10412 */
10413
10414xmlDocPtr
10415xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10416 xmlDocPtr ret;
10417 xmlParserCtxtPtr ctxt;
10418
10419 if (cur == NULL) return(NULL);
10420
10421
10422 ctxt = xmlCreateDocParserCtxt(cur);
10423 if (ctxt == NULL) return(NULL);
10424 if (sax != NULL) {
10425 ctxt->sax = sax;
10426 ctxt->userData = NULL;
10427 }
10428
10429 xmlParseDocument(ctxt);
10430 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10431 else {
10432 ret = NULL;
10433 xmlFreeDoc(ctxt->myDoc);
10434 ctxt->myDoc = NULL;
10435 }
10436 if (sax != NULL)
10437 ctxt->sax = NULL;
10438 xmlFreeParserCtxt(ctxt);
10439
10440 return(ret);
10441}
10442
10443/**
10444 * xmlParseDoc:
10445 * @cur: a pointer to an array of xmlChar
10446 *
10447 * parse an XML in-memory document and build a tree.
10448 *
10449 * Returns the resulting document tree
10450 */
10451
10452xmlDocPtr
10453xmlParseDoc(xmlChar *cur) {
10454 return(xmlSAXParseDoc(NULL, cur, 0));
10455}
10456
Daniel Veillard8107a222002-01-13 14:10:10 +000010457/************************************************************************
10458 * *
10459 * Specific function to keep track of entities references *
10460 * and used by the XSLT debugger *
10461 * *
10462 ************************************************************************/
10463
10464static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10465
10466/**
10467 * xmlAddEntityReference:
10468 * @ent : A valid entity
10469 * @firstNode : A valid first node for children of entity
10470 * @lastNode : A valid last node of children entity
10471 *
10472 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10473 */
10474static void
10475xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10476 xmlNodePtr lastNode)
10477{
10478 if (xmlEntityRefFunc != NULL) {
10479 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10480 }
10481}
10482
10483
10484/**
10485 * xmlSetEntityReferenceFunc:
10486 * @func : A valid function
10487 *
10488 * Set the function to call call back when a xml reference has been made
10489 */
10490void
10491xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10492{
10493 xmlEntityRefFunc = func;
10494}
Owen Taylor3473f882001-02-23 17:55:21 +000010495
10496/************************************************************************
10497 * *
10498 * Miscellaneous *
10499 * *
10500 ************************************************************************/
10501
10502#ifdef LIBXML_XPATH_ENABLED
10503#include <libxml/xpath.h>
10504#endif
10505
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010506extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010507static int xmlParserInitialized = 0;
10508
10509/**
10510 * xmlInitParser:
10511 *
10512 * Initialization function for the XML parser.
10513 * This is not reentrant. Call once before processing in case of
10514 * use in multithreaded programs.
10515 */
10516
10517void
10518xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010519 if (xmlParserInitialized != 0)
10520 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010521
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010522 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10523 (xmlGenericError == NULL))
10524 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010525 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010526 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010527 xmlInitCharEncodingHandlers();
10528 xmlInitializePredefinedEntities();
10529 xmlDefaultSAXHandlerInit();
10530 xmlRegisterDefaultInputCallbacks();
10531 xmlRegisterDefaultOutputCallbacks();
10532#ifdef LIBXML_HTML_ENABLED
10533 htmlInitAutoClose();
10534 htmlDefaultSAXHandlerInit();
10535#endif
10536#ifdef LIBXML_XPATH_ENABLED
10537 xmlXPathInit();
10538#endif
10539 xmlParserInitialized = 1;
10540}
10541
10542/**
10543 * xmlCleanupParser:
10544 *
10545 * Cleanup function for the XML parser. It tries to reclaim all
10546 * parsing related global memory allocated for the parser processing.
10547 * It doesn't deallocate any document related memory. Calling this
10548 * function should not prevent reusing the parser.
10549 */
10550
10551void
10552xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010553 xmlCleanupCharEncodingHandlers();
10554 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010555#ifdef LIBXML_CATALOG_ENABLED
10556 xmlCatalogCleanup();
10557#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010558 xmlCleanupThreads();
10559 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010560}