blob: fa0dd4a3f818ff8c91f42df290347344c5d968ae [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Owen Taylor3473f882001-02-23 17:55:21 +0000108/************************************************************************
109 * *
110 * Parser stacks related functions and macros *
111 * *
112 ************************************************************************/
113
114xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
115 const xmlChar ** str);
116
117/*
118 * Generic function for accessing stacks in the Parser Context
119 */
120
121#define PUSH_AND_POP(scope, type, name) \
122scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
123 if (ctxt->name##Nr >= ctxt->name##Max) { \
124 ctxt->name##Max *= 2; \
125 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
126 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
127 if (ctxt->name##Tab == NULL) { \
128 xmlGenericError(xmlGenericErrorContext, \
129 "realloc failed !\n"); \
130 return(0); \
131 } \
132 } \
133 ctxt->name##Tab[ctxt->name##Nr] = value; \
134 ctxt->name = value; \
135 return(ctxt->name##Nr++); \
136} \
137scope type name##Pop(xmlParserCtxtPtr ctxt) { \
138 type ret; \
139 if (ctxt->name##Nr <= 0) return(0); \
140 ctxt->name##Nr--; \
141 if (ctxt->name##Nr > 0) \
142 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
143 else \
144 ctxt->name = NULL; \
145 ret = ctxt->name##Tab[ctxt->name##Nr]; \
146 ctxt->name##Tab[ctxt->name##Nr] = 0; \
147 return(ret); \
148} \
149
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000150/**
151 * inputPop:
152 * @ctxt: an XML parser context
153 *
154 * Pops the top parser input from the input stack
155 *
156 * Returns the input just removed
157 */
158/**
159 * inputPush:
160 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 *
163 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000164 *
165 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000166 */
167/**
168 * namePop:
169 * @ctxt: an XML parser context
170 *
171 * Pops the top element name from the name stack
172 *
173 * Returns the name just removed
174 */
175/**
176 * namePush:
177 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 *
180 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000181 *
182 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 */
184/**
185 * nodePop:
186 * @ctxt: an XML parser context
187 *
188 * Pops the top element node from the node stack
189 *
190 * Returns the node just removed
191 */
192/**
193 * nodePush:
194 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000196 *
197 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000198 *
199 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000200 */
Owen Taylor3473f882001-02-23 17:55:21 +0000201/*
202 * Those macros actually generate the functions
203 */
204PUSH_AND_POP(extern, xmlParserInputPtr, input)
205PUSH_AND_POP(extern, xmlNodePtr, node)
206PUSH_AND_POP(extern, xmlChar*, name)
207
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000208static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000209 if (ctxt->spaceNr >= ctxt->spaceMax) {
210 ctxt->spaceMax *= 2;
211 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
212 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
213 if (ctxt->spaceTab == NULL) {
214 xmlGenericError(xmlGenericErrorContext,
215 "realloc failed !\n");
216 return(0);
217 }
218 }
219 ctxt->spaceTab[ctxt->spaceNr] = val;
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
221 return(ctxt->spaceNr++);
222}
223
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000224static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000225 int ret;
226 if (ctxt->spaceNr <= 0) return(0);
227 ctxt->spaceNr--;
228 if (ctxt->spaceNr > 0)
229 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
230 else
231 ctxt->space = NULL;
232 ret = ctxt->spaceTab[ctxt->spaceNr];
233 ctxt->spaceTab[ctxt->spaceNr] = -1;
234 return(ret);
235}
236
237/*
238 * Macros for accessing the content. Those should be used only by the parser,
239 * and not exported.
240 *
241 * Dirty macros, i.e. one often need to make assumption on the context to
242 * use them
243 *
244 * CUR_PTR return the current pointer to the xmlChar to be parsed.
245 * To be used with extreme caution since operations consuming
246 * characters may move the input buffer to a different location !
247 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
248 * This should be used internally by the parser
249 * only to compare to ASCII values otherwise it would break when
250 * running with UTF-8 encoding.
251 * RAW same as CUR but in the input buffer, bypass any token
252 * extraction that may have been done
253 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
254 * to compare on ASCII based substring.
255 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
256 * strings within the parser.
257 *
258 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
259 *
260 * NEXT Skip to the next character, this does the proper decoding
261 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000262 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000263 * CUR_CHAR(l) returns the current unicode character (int), set l
264 * to the number of xmlChars used for the encoding [0-5].
265 * CUR_SCHAR same but operate on a string instead of the context
266 * COPY_BUF copy the current unicode char to the target buffer, increment
267 * the index
268 * GROW, SHRINK handling of input buffers
269 */
270
Daniel Veillard561b7f82002-03-20 21:55:57 +0000271#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
272#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
Owen Taylor3473f882001-02-23 17:55:21 +0000273#define NXT(val) ctxt->input->cur[(val)]
274#define CUR_PTR ctxt->input->cur
275
276#define SKIP(val) do { \
277 ctxt->nbChars += (val),ctxt->input->cur += (val); \
278 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000279 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
281 xmlPopInput(ctxt); \
282 } while (0)
283
Daniel Veillard46de64e2002-05-29 08:21:33 +0000284#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
285 xmlSHRINK (ctxt);
286
287static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
288 xmlParserInputShrink(ctxt->input);
289 if ((*ctxt->input->cur == 0) &&
290 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
291 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000292 }
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard46de64e2002-05-29 08:21:33 +0000294#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
295 xmlGROW (ctxt);
296
297static void xmlGROW (xmlParserCtxtPtr ctxt) {
298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
299 if ((*ctxt->input->cur == 0) &&
300 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
301 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000302 }
Owen Taylor3473f882001-02-23 17:55:21 +0000303
304#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
305
306#define NEXT xmlNextChar(ctxt)
307
Daniel Veillard21a0f912001-02-25 19:54:14 +0000308#define NEXT1 { \
309 ctxt->input->cur++; \
310 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000311 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000312 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
313 }
314
Owen Taylor3473f882001-02-23 17:55:21 +0000315#define NEXTL(l) do { \
316 if (*(ctxt->input->cur) == '\n') { \
317 ctxt->input->line++; ctxt->input->col = 1; \
318 } else ctxt->input->col++; \
319 ctxt->token = 0; ctxt->input->cur += l; \
320 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000321 } while (0)
322
323#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
324#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
325
326#define COPY_BUF(l,b,i,v) \
327 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000328 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000329
330/**
331 * xmlSkipBlankChars:
332 * @ctxt: the XML parser context
333 *
334 * skip all blanks character found at that point in the input streams.
335 * It pops up finished entities in the process if allowable at that point.
336 *
337 * Returns the number of space chars skipped
338 */
339
340int
341xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000342 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000343
Daniel Veillard02141ea2001-04-30 11:46:40 +0000344 if (ctxt->token != 0) {
345 if (!IS_BLANK(ctxt->token))
346 return(0);
347 ctxt->token = 0;
348 res++;
349 }
Owen Taylor3473f882001-02-23 17:55:21 +0000350 /*
351 * It's Okay to use CUR/NEXT here since all the blanks are on
352 * the ASCII range.
353 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000354 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
355 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000356 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000357 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000358 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000359 cur = ctxt->input->cur;
360 while (IS_BLANK(*cur)) {
361 if (*cur == '\n') {
362 ctxt->input->line++; ctxt->input->col = 1;
363 }
364 cur++;
365 res++;
366 if (*cur == 0) {
367 ctxt->input->cur = cur;
368 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
369 cur = ctxt->input->cur;
370 }
371 }
372 ctxt->input->cur = cur;
373 } else {
374 int cur;
375 do {
376 cur = CUR;
377 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
378 NEXT;
379 cur = CUR;
380 res++;
381 }
382 while ((cur == 0) && (ctxt->inputNr > 1) &&
383 (ctxt->instate != XML_PARSER_COMMENT)) {
384 xmlPopInput(ctxt);
385 cur = CUR;
386 }
387 /*
388 * Need to handle support of entities branching here
389 */
390 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
391 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
392 }
Owen Taylor3473f882001-02-23 17:55:21 +0000393 return(res);
394}
395
396/************************************************************************
397 * *
398 * Commodity functions to handle entities *
399 * *
400 ************************************************************************/
401
402/**
403 * xmlPopInput:
404 * @ctxt: an XML parser context
405 *
406 * xmlPopInput: the current input pointed by ctxt->input came to an end
407 * pop it and return the next char.
408 *
409 * Returns the current xmlChar in the parser context
410 */
411xmlChar
412xmlPopInput(xmlParserCtxtPtr ctxt) {
413 if (ctxt->inputNr == 1) return(0); /* End of main Input */
414 if (xmlParserDebugEntities)
415 xmlGenericError(xmlGenericErrorContext,
416 "Popping input %d\n", ctxt->inputNr);
417 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000418 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000419 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
420 return(xmlPopInput(ctxt));
421 return(CUR);
422}
423
424/**
425 * xmlPushInput:
426 * @ctxt: an XML parser context
427 * @input: an XML parser input fragment (entity, XML fragment ...).
428 *
429 * xmlPushInput: switch to a new input stream which is stacked on top
430 * of the previous one(s).
431 */
432void
433xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
434 if (input == NULL) return;
435
436 if (xmlParserDebugEntities) {
437 if ((ctxt->input != NULL) && (ctxt->input->filename))
438 xmlGenericError(xmlGenericErrorContext,
439 "%s(%d): ", ctxt->input->filename,
440 ctxt->input->line);
441 xmlGenericError(xmlGenericErrorContext,
442 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
443 }
444 inputPush(ctxt, input);
445 GROW;
446}
447
448/**
449 * xmlParseCharRef:
450 * @ctxt: an XML parser context
451 *
452 * parse Reference declarations
453 *
454 * [66] CharRef ::= '&#' [0-9]+ ';' |
455 * '&#x' [0-9a-fA-F]+ ';'
456 *
457 * [ WFC: Legal Character ]
458 * Characters referred to using character references must match the
459 * production for Char.
460 *
461 * Returns the value parsed (as an int), 0 in case of error
462 */
463int
464xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000465 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000466 int count = 0;
467
468 if (ctxt->token != 0) {
469 val = ctxt->token;
470 ctxt->token = 0;
471 return(val);
472 }
473 /*
474 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
475 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000476 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000477 (NXT(2) == 'x')) {
478 SKIP(3);
479 GROW;
480 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000481 if (count++ > 20) {
482 count = 0;
483 GROW;
484 }
485 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000486 val = val * 16 + (CUR - '0');
487 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
488 val = val * 16 + (CUR - 'a') + 10;
489 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
490 val = val * 16 + (CUR - 'A') + 10;
491 else {
492 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
493 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
494 ctxt->sax->error(ctxt->userData,
495 "xmlParseCharRef: invalid hexadecimal value\n");
496 ctxt->wellFormed = 0;
497 ctxt->disableSAX = 1;
498 val = 0;
499 break;
500 }
501 NEXT;
502 count++;
503 }
504 if (RAW == ';') {
505 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
506 ctxt->nbChars ++;
507 ctxt->input->cur++;
508 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000509 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000510 SKIP(2);
511 GROW;
512 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000513 if (count++ > 20) {
514 count = 0;
515 GROW;
516 }
517 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000518 val = val * 10 + (CUR - '0');
519 else {
520 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
522 ctxt->sax->error(ctxt->userData,
523 "xmlParseCharRef: invalid decimal value\n");
524 ctxt->wellFormed = 0;
525 ctxt->disableSAX = 1;
526 val = 0;
527 break;
528 }
529 NEXT;
530 count++;
531 }
532 if (RAW == ';') {
533 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
534 ctxt->nbChars ++;
535 ctxt->input->cur++;
536 }
537 } else {
538 ctxt->errNo = XML_ERR_INVALID_CHARREF;
539 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
540 ctxt->sax->error(ctxt->userData,
541 "xmlParseCharRef: invalid value\n");
542 ctxt->wellFormed = 0;
543 ctxt->disableSAX = 1;
544 }
545
546 /*
547 * [ WFC: Legal Character ]
548 * Characters referred to using character references must match the
549 * production for Char.
550 */
551 if (IS_CHAR(val)) {
552 return(val);
553 } else {
554 ctxt->errNo = XML_ERR_INVALID_CHAR;
555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000556 ctxt->sax->error(ctxt->userData,
557 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000558 val);
559 ctxt->wellFormed = 0;
560 ctxt->disableSAX = 1;
561 }
562 return(0);
563}
564
565/**
566 * xmlParseStringCharRef:
567 * @ctxt: an XML parser context
568 * @str: a pointer to an index in the string
569 *
570 * parse Reference declarations, variant parsing from a string rather
571 * than an an input flow.
572 *
573 * [66] CharRef ::= '&#' [0-9]+ ';' |
574 * '&#x' [0-9a-fA-F]+ ';'
575 *
576 * [ WFC: Legal Character ]
577 * Characters referred to using character references must match the
578 * production for Char.
579 *
580 * Returns the value parsed (as an int), 0 in case of error, str will be
581 * updated to the current value of the index
582 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000583static int
Owen Taylor3473f882001-02-23 17:55:21 +0000584xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
585 const xmlChar *ptr;
586 xmlChar cur;
587 int val = 0;
588
589 if ((str == NULL) || (*str == NULL)) return(0);
590 ptr = *str;
591 cur = *ptr;
592 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
593 ptr += 3;
594 cur = *ptr;
595 while (cur != ';') { /* Non input consuming loop */
596 if ((cur >= '0') && (cur <= '9'))
597 val = val * 16 + (cur - '0');
598 else if ((cur >= 'a') && (cur <= 'f'))
599 val = val * 16 + (cur - 'a') + 10;
600 else if ((cur >= 'A') && (cur <= 'F'))
601 val = val * 16 + (cur - 'A') + 10;
602 else {
603 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
605 ctxt->sax->error(ctxt->userData,
606 "xmlParseStringCharRef: invalid hexadecimal value\n");
607 ctxt->wellFormed = 0;
608 ctxt->disableSAX = 1;
609 val = 0;
610 break;
611 }
612 ptr++;
613 cur = *ptr;
614 }
615 if (cur == ';')
616 ptr++;
617 } else if ((cur == '&') && (ptr[1] == '#')){
618 ptr += 2;
619 cur = *ptr;
620 while (cur != ';') { /* Non input consuming loops */
621 if ((cur >= '0') && (cur <= '9'))
622 val = val * 10 + (cur - '0');
623 else {
624 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
626 ctxt->sax->error(ctxt->userData,
627 "xmlParseStringCharRef: invalid decimal value\n");
628 ctxt->wellFormed = 0;
629 ctxt->disableSAX = 1;
630 val = 0;
631 break;
632 }
633 ptr++;
634 cur = *ptr;
635 }
636 if (cur == ';')
637 ptr++;
638 } else {
639 ctxt->errNo = XML_ERR_INVALID_CHARREF;
640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
641 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000642 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000643 ctxt->wellFormed = 0;
644 ctxt->disableSAX = 1;
645 return(0);
646 }
647 *str = ptr;
648
649 /*
650 * [ WFC: Legal Character ]
651 * Characters referred to using character references must match the
652 * production for Char.
653 */
654 if (IS_CHAR(val)) {
655 return(val);
656 } else {
657 ctxt->errNo = XML_ERR_INVALID_CHAR;
658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
659 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000660 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000661 ctxt->wellFormed = 0;
662 ctxt->disableSAX = 1;
663 }
664 return(0);
665}
666
667/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000668 * xmlNewBlanksWrapperInputStream:
669 * @ctxt: an XML parser context
670 * @entity: an Entity pointer
671 *
672 * Create a new input stream for wrapping
673 * blanks around a PEReference
674 *
675 * Returns the new input stream or NULL
676 */
677
678static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
679
680xmlParserInputPtr
681xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
682 xmlParserInputPtr input;
683 xmlChar *buffer;
684 size_t length;
685 if (entity == NULL) {
686 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
688 ctxt->sax->error(ctxt->userData,
689 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
690 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
691 return(NULL);
692 }
693 if (xmlParserDebugEntities)
694 xmlGenericError(xmlGenericErrorContext,
695 "new blanks wrapper for entity: %s\n", entity->name);
696 input = xmlNewInputStream(ctxt);
697 if (input == NULL) {
698 return(NULL);
699 }
700 length = xmlStrlen(entity->name) + 5;
701 buffer = xmlMalloc(length);
702 if (buffer == NULL) {
703 return(NULL);
704 }
705 buffer [0] = ' ';
706 buffer [1] = '%';
707 buffer [length-3] = ';';
708 buffer [length-2] = ' ';
709 buffer [length-1] = 0;
710 memcpy(buffer + 2, entity->name, length - 5);
711 input->free = deallocblankswrapper;
712 input->base = buffer;
713 input->cur = buffer;
714 input->length = length;
715 input->end = &buffer[length];
716 return(input);
717}
718
719/**
Owen Taylor3473f882001-02-23 17:55:21 +0000720 * xmlParserHandlePEReference:
721 * @ctxt: the parser context
722 *
723 * [69] PEReference ::= '%' Name ';'
724 *
725 * [ WFC: No Recursion ]
726 * A parsed entity must not contain a recursive
727 * reference to itself, either directly or indirectly.
728 *
729 * [ WFC: Entity Declared ]
730 * In a document without any DTD, a document with only an internal DTD
731 * subset which contains no parameter entity references, or a document
732 * with "standalone='yes'", ... ... The declaration of a parameter
733 * entity must precede any reference to it...
734 *
735 * [ VC: Entity Declared ]
736 * In a document with an external subset or external parameter entities
737 * with "standalone='no'", ... ... The declaration of a parameter entity
738 * must precede any reference to it...
739 *
740 * [ WFC: In DTD ]
741 * Parameter-entity references may only appear in the DTD.
742 * NOTE: misleading but this is handled.
743 *
744 * A PEReference may have been detected in the current input stream
745 * the handling is done accordingly to
746 * http://www.w3.org/TR/REC-xml#entproc
747 * i.e.
748 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000749 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000750 */
751void
752xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
753 xmlChar *name;
754 xmlEntityPtr entity = NULL;
755 xmlParserInputPtr input;
756
757 if (ctxt->token != 0) {
758 return;
759 }
760 if (RAW != '%') return;
761 switch(ctxt->instate) {
762 case XML_PARSER_CDATA_SECTION:
763 return;
764 case XML_PARSER_COMMENT:
765 return;
766 case XML_PARSER_START_TAG:
767 return;
768 case XML_PARSER_END_TAG:
769 return;
770 case XML_PARSER_EOF:
771 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
773 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
774 ctxt->wellFormed = 0;
775 ctxt->disableSAX = 1;
776 return;
777 case XML_PARSER_PROLOG:
778 case XML_PARSER_START:
779 case XML_PARSER_MISC:
780 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
782 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
783 ctxt->wellFormed = 0;
784 ctxt->disableSAX = 1;
785 return;
786 case XML_PARSER_ENTITY_DECL:
787 case XML_PARSER_CONTENT:
788 case XML_PARSER_ATTRIBUTE_VALUE:
789 case XML_PARSER_PI:
790 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000791 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000792 /* we just ignore it there */
793 return;
794 case XML_PARSER_EPILOG:
795 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
797 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
798 ctxt->wellFormed = 0;
799 ctxt->disableSAX = 1;
800 return;
801 case XML_PARSER_ENTITY_VALUE:
802 /*
803 * NOTE: in the case of entity values, we don't do the
804 * substitution here since we need the literal
805 * entity value to be able to save the internal
806 * subset of the document.
807 * This will be handled by xmlStringDecodeEntities
808 */
809 return;
810 case XML_PARSER_DTD:
811 /*
812 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
813 * In the internal DTD subset, parameter-entity references
814 * can occur only where markup declarations can occur, not
815 * within markup declarations.
816 * In that case this is handled in xmlParseMarkupDecl
817 */
818 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
819 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000820 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
821 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000822 break;
823 case XML_PARSER_IGNORE:
824 return;
825 }
826
827 NEXT;
828 name = xmlParseName(ctxt);
829 if (xmlParserDebugEntities)
830 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000831 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000832 if (name == NULL) {
833 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
834 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000835 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000836 ctxt->wellFormed = 0;
837 ctxt->disableSAX = 1;
838 } else {
839 if (RAW == ';') {
840 NEXT;
841 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
842 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
843 if (entity == NULL) {
844
845 /*
846 * [ WFC: Entity Declared ]
847 * In a document without any DTD, a document with only an
848 * internal DTD subset which contains no parameter entity
849 * references, or a document with "standalone='yes'", ...
850 * ... The declaration of a parameter entity must precede
851 * any reference to it...
852 */
853 if ((ctxt->standalone == 1) ||
854 ((ctxt->hasExternalSubset == 0) &&
855 (ctxt->hasPErefs == 0))) {
856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
857 ctxt->sax->error(ctxt->userData,
858 "PEReference: %%%s; not found\n", name);
859 ctxt->wellFormed = 0;
860 ctxt->disableSAX = 1;
861 } else {
862 /*
863 * [ VC: Entity Declared ]
864 * In a document with an external subset or external
865 * parameter entities with "standalone='no'", ...
866 * ... The declaration of a parameter entity must precede
867 * any reference to it...
868 */
869 if ((!ctxt->disableSAX) &&
870 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
871 ctxt->vctxt.error(ctxt->vctxt.userData,
872 "PEReference: %%%s; not found\n", name);
873 } else if ((!ctxt->disableSAX) &&
874 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
875 ctxt->sax->warning(ctxt->userData,
876 "PEReference: %%%s; not found\n", name);
877 ctxt->valid = 0;
878 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000879 } else if (ctxt->input->free != deallocblankswrapper) {
880 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
881 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000882 } else {
883 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
884 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000885 xmlChar start[4];
886 xmlCharEncoding enc;
887
Owen Taylor3473f882001-02-23 17:55:21 +0000888 /*
889 * handle the extra spaces added before and after
890 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000891 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000892 */
893 input = xmlNewEntityInputStream(ctxt, entity);
894 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000895
896 /*
897 * Get the 4 first bytes and decode the charset
898 * if enc != XML_CHAR_ENCODING_NONE
899 * plug some encoding conversion routines.
900 */
901 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000902 if (entity->length >= 4) {
903 start[0] = RAW;
904 start[1] = NXT(1);
905 start[2] = NXT(2);
906 start[3] = NXT(3);
907 enc = xmlDetectCharEncoding(start, 4);
908 if (enc != XML_CHAR_ENCODING_NONE) {
909 xmlSwitchEncoding(ctxt, enc);
910 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000911 }
912
Owen Taylor3473f882001-02-23 17:55:21 +0000913 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
914 (RAW == '<') && (NXT(1) == '?') &&
915 (NXT(2) == 'x') && (NXT(3) == 'm') &&
916 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
917 xmlParseTextDecl(ctxt);
918 }
Owen Taylor3473f882001-02-23 17:55:21 +0000919 } else {
920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
921 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000922 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000923 name);
924 ctxt->wellFormed = 0;
925 ctxt->disableSAX = 1;
926 }
927 }
928 } else {
929 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
931 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000932 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000933 ctxt->wellFormed = 0;
934 ctxt->disableSAX = 1;
935 }
936 xmlFree(name);
937 }
938}
939
940/*
941 * Macro used to grow the current buffer.
942 */
943#define growBuffer(buffer) { \
944 buffer##_size *= 2; \
945 buffer = (xmlChar *) \
946 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
947 if (buffer == NULL) { \
948 perror("realloc failed"); \
949 return(NULL); \
950 } \
951}
952
953/**
954 * xmlStringDecodeEntities:
955 * @ctxt: the parser context
956 * @str: the input string
957 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
958 * @end: an end marker xmlChar, 0 if none
959 * @end2: an end marker xmlChar, 0 if none
960 * @end3: an end marker xmlChar, 0 if none
961 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000962 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000963 *
964 * [67] Reference ::= EntityRef | CharRef
965 *
966 * [69] PEReference ::= '%' Name ';'
967 *
968 * Returns A newly allocated string with the substitution done. The caller
969 * must deallocate it !
970 */
971xmlChar *
972xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
973 xmlChar end, xmlChar end2, xmlChar end3) {
974 xmlChar *buffer = NULL;
975 int buffer_size = 0;
976
977 xmlChar *current = NULL;
978 xmlEntityPtr ent;
979 int c,l;
980 int nbchars = 0;
981
982 if (str == NULL)
983 return(NULL);
984
985 if (ctxt->depth > 40) {
986 ctxt->errNo = XML_ERR_ENTITY_LOOP;
987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
988 ctxt->sax->error(ctxt->userData,
989 "Detected entity reference loop\n");
990 ctxt->wellFormed = 0;
991 ctxt->disableSAX = 1;
992 return(NULL);
993 }
994
995 /*
996 * allocate a translation buffer.
997 */
998 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
999 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1000 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001001 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001002 return(NULL);
1003 }
1004
1005 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001006 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001007 * we are operating on already parsed values.
1008 */
1009 c = CUR_SCHAR(str, l);
1010 while ((c != 0) && (c != end) && /* non input consuming loop */
1011 (c != end2) && (c != end3)) {
1012
1013 if (c == 0) break;
1014 if ((c == '&') && (str[1] == '#')) {
1015 int val = xmlParseStringCharRef(ctxt, &str);
1016 if (val != 0) {
1017 COPY_BUF(0,buffer,nbchars,val);
1018 }
1019 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1020 if (xmlParserDebugEntities)
1021 xmlGenericError(xmlGenericErrorContext,
1022 "String decoding Entity Reference: %.30s\n",
1023 str);
1024 ent = xmlParseStringEntityRef(ctxt, &str);
1025 if ((ent != NULL) &&
1026 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1027 if (ent->content != NULL) {
1028 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1029 } else {
1030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1031 ctxt->sax->error(ctxt->userData,
1032 "internal error entity has no content\n");
1033 }
1034 } else if ((ent != NULL) && (ent->content != NULL)) {
1035 xmlChar *rep;
1036
1037 ctxt->depth++;
1038 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1039 0, 0, 0);
1040 ctxt->depth--;
1041 if (rep != NULL) {
1042 current = rep;
1043 while (*current != 0) { /* non input consuming loop */
1044 buffer[nbchars++] = *current++;
1045 if (nbchars >
1046 buffer_size - XML_PARSER_BUFFER_SIZE) {
1047 growBuffer(buffer);
1048 }
1049 }
1050 xmlFree(rep);
1051 }
1052 } else if (ent != NULL) {
1053 int i = xmlStrlen(ent->name);
1054 const xmlChar *cur = ent->name;
1055
1056 buffer[nbchars++] = '&';
1057 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1058 growBuffer(buffer);
1059 }
1060 for (;i > 0;i--)
1061 buffer[nbchars++] = *cur++;
1062 buffer[nbchars++] = ';';
1063 }
1064 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1065 if (xmlParserDebugEntities)
1066 xmlGenericError(xmlGenericErrorContext,
1067 "String decoding PE Reference: %.30s\n", str);
1068 ent = xmlParseStringPEReference(ctxt, &str);
1069 if (ent != NULL) {
1070 xmlChar *rep;
1071
1072 ctxt->depth++;
1073 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1074 0, 0, 0);
1075 ctxt->depth--;
1076 if (rep != NULL) {
1077 current = rep;
1078 while (*current != 0) { /* non input consuming loop */
1079 buffer[nbchars++] = *current++;
1080 if (nbchars >
1081 buffer_size - XML_PARSER_BUFFER_SIZE) {
1082 growBuffer(buffer);
1083 }
1084 }
1085 xmlFree(rep);
1086 }
1087 }
1088 } else {
1089 COPY_BUF(l,buffer,nbchars,c);
1090 str += l;
1091 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1092 growBuffer(buffer);
1093 }
1094 }
1095 c = CUR_SCHAR(str, l);
1096 }
1097 buffer[nbchars++] = 0;
1098 return(buffer);
1099}
1100
1101
1102/************************************************************************
1103 * *
1104 * Commodity functions to handle xmlChars *
1105 * *
1106 ************************************************************************/
1107
1108/**
1109 * xmlStrndup:
1110 * @cur: the input xmlChar *
1111 * @len: the len of @cur
1112 *
1113 * a strndup for array of xmlChar's
1114 *
1115 * Returns a new xmlChar * or NULL
1116 */
1117xmlChar *
1118xmlStrndup(const xmlChar *cur, int len) {
1119 xmlChar *ret;
1120
1121 if ((cur == NULL) || (len < 0)) return(NULL);
1122 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1123 if (ret == NULL) {
1124 xmlGenericError(xmlGenericErrorContext,
1125 "malloc of %ld byte failed\n",
1126 (len + 1) * (long)sizeof(xmlChar));
1127 return(NULL);
1128 }
1129 memcpy(ret, cur, len * sizeof(xmlChar));
1130 ret[len] = 0;
1131 return(ret);
1132}
1133
1134/**
1135 * xmlStrdup:
1136 * @cur: the input xmlChar *
1137 *
1138 * a strdup for array of xmlChar's. Since they are supposed to be
1139 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1140 * a termination mark of '0'.
1141 *
1142 * Returns a new xmlChar * or NULL
1143 */
1144xmlChar *
1145xmlStrdup(const xmlChar *cur) {
1146 const xmlChar *p = cur;
1147
1148 if (cur == NULL) return(NULL);
1149 while (*p != 0) p++; /* non input consuming */
1150 return(xmlStrndup(cur, p - cur));
1151}
1152
1153/**
1154 * xmlCharStrndup:
1155 * @cur: the input char *
1156 * @len: the len of @cur
1157 *
1158 * a strndup for char's to xmlChar's
1159 *
1160 * Returns a new xmlChar * or NULL
1161 */
1162
1163xmlChar *
1164xmlCharStrndup(const char *cur, int len) {
1165 int i;
1166 xmlChar *ret;
1167
1168 if ((cur == NULL) || (len < 0)) return(NULL);
1169 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1170 if (ret == NULL) {
1171 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1172 (len + 1) * (long)sizeof(xmlChar));
1173 return(NULL);
1174 }
1175 for (i = 0;i < len;i++)
1176 ret[i] = (xmlChar) cur[i];
1177 ret[len] = 0;
1178 return(ret);
1179}
1180
1181/**
1182 * xmlCharStrdup:
1183 * @cur: the input char *
1184 * @len: the len of @cur
1185 *
1186 * a strdup for char's to xmlChar's
1187 *
1188 * Returns a new xmlChar * or NULL
1189 */
1190
1191xmlChar *
1192xmlCharStrdup(const char *cur) {
1193 const char *p = cur;
1194
1195 if (cur == NULL) return(NULL);
1196 while (*p != '\0') p++; /* non input consuming */
1197 return(xmlCharStrndup(cur, p - cur));
1198}
1199
1200/**
1201 * xmlStrcmp:
1202 * @str1: the first xmlChar *
1203 * @str2: the second xmlChar *
1204 *
1205 * a strcmp for xmlChar's
1206 *
1207 * Returns the integer result of the comparison
1208 */
1209
1210int
1211xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1212 register int tmp;
1213
1214 if (str1 == str2) return(0);
1215 if (str1 == NULL) return(-1);
1216 if (str2 == NULL) return(1);
1217 do {
1218 tmp = *str1++ - *str2;
1219 if (tmp != 0) return(tmp);
1220 } while (*str2++ != 0);
1221 return 0;
1222}
1223
1224/**
1225 * xmlStrEqual:
1226 * @str1: the first xmlChar *
1227 * @str2: the second xmlChar *
1228 *
1229 * Check if both string are equal of have same content
1230 * Should be a bit more readable and faster than xmlStrEqual()
1231 *
1232 * Returns 1 if they are equal, 0 if they are different
1233 */
1234
1235int
1236xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1237 if (str1 == str2) return(1);
1238 if (str1 == NULL) return(0);
1239 if (str2 == NULL) return(0);
1240 do {
1241 if (*str1++ != *str2) return(0);
1242 } while (*str2++);
1243 return(1);
1244}
1245
1246/**
1247 * xmlStrncmp:
1248 * @str1: the first xmlChar *
1249 * @str2: the second xmlChar *
1250 * @len: the max comparison length
1251 *
1252 * a strncmp for xmlChar's
1253 *
1254 * Returns the integer result of the comparison
1255 */
1256
1257int
1258xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1259 register int tmp;
1260
1261 if (len <= 0) return(0);
1262 if (str1 == str2) return(0);
1263 if (str1 == NULL) return(-1);
1264 if (str2 == NULL) return(1);
1265 do {
1266 tmp = *str1++ - *str2;
1267 if (tmp != 0 || --len == 0) return(tmp);
1268 } while (*str2++ != 0);
1269 return 0;
1270}
1271
Daniel Veillardb44025c2001-10-11 22:55:55 +00001272static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001273 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1274 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1275 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1276 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1277 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1278 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1279 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1280 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1281 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1282 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1283 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1284 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1285 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1286 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1287 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1288 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1289 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1290 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1291 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1292 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1293 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1294 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1295 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1296 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1297 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1298 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1299 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1300 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1301 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1302 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1303 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1304 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1305};
1306
1307/**
1308 * xmlStrcasecmp:
1309 * @str1: the first xmlChar *
1310 * @str2: the second xmlChar *
1311 *
1312 * a strcasecmp for xmlChar's
1313 *
1314 * Returns the integer result of the comparison
1315 */
1316
1317int
1318xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1319 register int tmp;
1320
1321 if (str1 == str2) return(0);
1322 if (str1 == NULL) return(-1);
1323 if (str2 == NULL) return(1);
1324 do {
1325 tmp = casemap[*str1++] - casemap[*str2];
1326 if (tmp != 0) return(tmp);
1327 } while (*str2++ != 0);
1328 return 0;
1329}
1330
1331/**
1332 * xmlStrncasecmp:
1333 * @str1: the first xmlChar *
1334 * @str2: the second xmlChar *
1335 * @len: the max comparison length
1336 *
1337 * a strncasecmp for xmlChar's
1338 *
1339 * Returns the integer result of the comparison
1340 */
1341
1342int
1343xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1344 register int tmp;
1345
1346 if (len <= 0) return(0);
1347 if (str1 == str2) return(0);
1348 if (str1 == NULL) return(-1);
1349 if (str2 == NULL) return(1);
1350 do {
1351 tmp = casemap[*str1++] - casemap[*str2];
1352 if (tmp != 0 || --len == 0) return(tmp);
1353 } while (*str2++ != 0);
1354 return 0;
1355}
1356
1357/**
1358 * xmlStrchr:
1359 * @str: the xmlChar * array
1360 * @val: the xmlChar to search
1361 *
1362 * a strchr for xmlChar's
1363 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001364 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001365 */
1366
1367const xmlChar *
1368xmlStrchr(const xmlChar *str, xmlChar val) {
1369 if (str == NULL) return(NULL);
1370 while (*str != 0) { /* non input consuming */
1371 if (*str == val) return((xmlChar *) str);
1372 str++;
1373 }
1374 return(NULL);
1375}
1376
1377/**
1378 * xmlStrstr:
1379 * @str: the xmlChar * array (haystack)
1380 * @val: the xmlChar to search (needle)
1381 *
1382 * a strstr for xmlChar's
1383 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001384 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001385 */
1386
1387const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001388xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001389 int n;
1390
1391 if (str == NULL) return(NULL);
1392 if (val == NULL) return(NULL);
1393 n = xmlStrlen(val);
1394
1395 if (n == 0) return(str);
1396 while (*str != 0) { /* non input consuming */
1397 if (*str == *val) {
1398 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1399 }
1400 str++;
1401 }
1402 return(NULL);
1403}
1404
1405/**
1406 * xmlStrcasestr:
1407 * @str: the xmlChar * array (haystack)
1408 * @val: the xmlChar to search (needle)
1409 *
1410 * a case-ignoring strstr for xmlChar's
1411 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001412 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001413 */
1414
1415const xmlChar *
1416xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1417 int n;
1418
1419 if (str == NULL) return(NULL);
1420 if (val == NULL) return(NULL);
1421 n = xmlStrlen(val);
1422
1423 if (n == 0) return(str);
1424 while (*str != 0) { /* non input consuming */
1425 if (casemap[*str] == casemap[*val])
1426 if (!xmlStrncasecmp(str, val, n)) return(str);
1427 str++;
1428 }
1429 return(NULL);
1430}
1431
1432/**
1433 * xmlStrsub:
1434 * @str: the xmlChar * array (haystack)
1435 * @start: the index of the first char (zero based)
1436 * @len: the length of the substring
1437 *
1438 * Extract a substring of a given string
1439 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001440 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001441 */
1442
1443xmlChar *
1444xmlStrsub(const xmlChar *str, int start, int len) {
1445 int i;
1446
1447 if (str == NULL) return(NULL);
1448 if (start < 0) return(NULL);
1449 if (len < 0) return(NULL);
1450
1451 for (i = 0;i < start;i++) {
1452 if (*str == 0) return(NULL);
1453 str++;
1454 }
1455 if (*str == 0) return(NULL);
1456 return(xmlStrndup(str, len));
1457}
1458
1459/**
1460 * xmlStrlen:
1461 * @str: the xmlChar * array
1462 *
1463 * length of a xmlChar's string
1464 *
1465 * Returns the number of xmlChar contained in the ARRAY.
1466 */
1467
1468int
1469xmlStrlen(const xmlChar *str) {
1470 int len = 0;
1471
1472 if (str == NULL) return(0);
1473 while (*str != 0) { /* non input consuming */
1474 str++;
1475 len++;
1476 }
1477 return(len);
1478}
1479
1480/**
1481 * xmlStrncat:
1482 * @cur: the original xmlChar * array
1483 * @add: the xmlChar * array added
1484 * @len: the length of @add
1485 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001486 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001487 * first bytes of @add.
1488 *
1489 * Returns a new xmlChar *, the original @cur is reallocated if needed
1490 * and should not be freed
1491 */
1492
1493xmlChar *
1494xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1495 int size;
1496 xmlChar *ret;
1497
1498 if ((add == NULL) || (len == 0))
1499 return(cur);
1500 if (cur == NULL)
1501 return(xmlStrndup(add, len));
1502
1503 size = xmlStrlen(cur);
1504 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1505 if (ret == NULL) {
1506 xmlGenericError(xmlGenericErrorContext,
1507 "xmlStrncat: realloc of %ld byte failed\n",
1508 (size + len + 1) * (long)sizeof(xmlChar));
1509 return(cur);
1510 }
1511 memcpy(&ret[size], add, len * sizeof(xmlChar));
1512 ret[size + len] = 0;
1513 return(ret);
1514}
1515
1516/**
1517 * xmlStrcat:
1518 * @cur: the original xmlChar * array
1519 * @add: the xmlChar * array added
1520 *
1521 * a strcat for array of xmlChar's. Since they are supposed to be
1522 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1523 * a termination mark of '0'.
1524 *
1525 * Returns a new xmlChar * containing the concatenated string.
1526 */
1527xmlChar *
1528xmlStrcat(xmlChar *cur, const xmlChar *add) {
1529 const xmlChar *p = add;
1530
1531 if (add == NULL) return(cur);
1532 if (cur == NULL)
1533 return(xmlStrdup(add));
1534
1535 while (*p != 0) p++; /* non input consuming */
1536 return(xmlStrncat(cur, add, p - add));
1537}
1538
1539/************************************************************************
1540 * *
1541 * Commodity functions, cleanup needed ? *
1542 * *
1543 ************************************************************************/
1544
1545/**
1546 * areBlanks:
1547 * @ctxt: an XML parser context
1548 * @str: a xmlChar *
1549 * @len: the size of @str
1550 *
1551 * Is this a sequence of blank chars that one can ignore ?
1552 *
1553 * Returns 1 if ignorable 0 otherwise.
1554 */
1555
1556static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1557 int i, ret;
1558 xmlNodePtr lastChild;
1559
Daniel Veillard05c13a22001-09-09 08:38:09 +00001560 /*
1561 * Don't spend time trying to differentiate them, the same callback is
1562 * used !
1563 */
1564 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001565 return(0);
1566
Owen Taylor3473f882001-02-23 17:55:21 +00001567 /*
1568 * Check for xml:space value.
1569 */
1570 if (*(ctxt->space) == 1)
1571 return(0);
1572
1573 /*
1574 * Check that the string is made of blanks
1575 */
1576 for (i = 0;i < len;i++)
1577 if (!(IS_BLANK(str[i]))) return(0);
1578
1579 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001580 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001581 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001582 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001583 if (ctxt->myDoc != NULL) {
1584 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1585 if (ret == 0) return(1);
1586 if (ret == 1) return(0);
1587 }
1588
1589 /*
1590 * Otherwise, heuristic :-\
1591 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001592 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001593 if ((ctxt->node->children == NULL) &&
1594 (RAW == '<') && (NXT(1) == '/')) return(0);
1595
1596 lastChild = xmlGetLastChild(ctxt->node);
1597 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001598 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1599 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001600 } else if (xmlNodeIsText(lastChild))
1601 return(0);
1602 else if ((ctxt->node->children != NULL) &&
1603 (xmlNodeIsText(ctxt->node->children)))
1604 return(0);
1605 return(1);
1606}
1607
Owen Taylor3473f882001-02-23 17:55:21 +00001608/************************************************************************
1609 * *
1610 * Extra stuff for namespace support *
1611 * Relates to http://www.w3.org/TR/WD-xml-names *
1612 * *
1613 ************************************************************************/
1614
1615/**
1616 * xmlSplitQName:
1617 * @ctxt: an XML parser context
1618 * @name: an XML parser context
1619 * @prefix: a xmlChar **
1620 *
1621 * parse an UTF8 encoded XML qualified name string
1622 *
1623 * [NS 5] QName ::= (Prefix ':')? LocalPart
1624 *
1625 * [NS 6] Prefix ::= NCName
1626 *
1627 * [NS 7] LocalPart ::= NCName
1628 *
1629 * Returns the local part, and prefix is updated
1630 * to get the Prefix if any.
1631 */
1632
1633xmlChar *
1634xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1635 xmlChar buf[XML_MAX_NAMELEN + 5];
1636 xmlChar *buffer = NULL;
1637 int len = 0;
1638 int max = XML_MAX_NAMELEN;
1639 xmlChar *ret = NULL;
1640 const xmlChar *cur = name;
1641 int c;
1642
1643 *prefix = NULL;
1644
1645#ifndef XML_XML_NAMESPACE
1646 /* xml: prefix is not really a namespace */
1647 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1648 (cur[2] == 'l') && (cur[3] == ':'))
1649 return(xmlStrdup(name));
1650#endif
1651
1652 /* nasty but valid */
1653 if (cur[0] == ':')
1654 return(xmlStrdup(name));
1655
1656 c = *cur++;
1657 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1658 buf[len++] = c;
1659 c = *cur++;
1660 }
1661 if (len >= max) {
1662 /*
1663 * Okay someone managed to make a huge name, so he's ready to pay
1664 * for the processing speed.
1665 */
1666 max = len * 2;
1667
1668 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1669 if (buffer == NULL) {
1670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1671 ctxt->sax->error(ctxt->userData,
1672 "xmlSplitQName: out of memory\n");
1673 return(NULL);
1674 }
1675 memcpy(buffer, buf, len);
1676 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1677 if (len + 10 > max) {
1678 max *= 2;
1679 buffer = (xmlChar *) xmlRealloc(buffer,
1680 max * sizeof(xmlChar));
1681 if (buffer == NULL) {
1682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1683 ctxt->sax->error(ctxt->userData,
1684 "xmlSplitQName: out of memory\n");
1685 return(NULL);
1686 }
1687 }
1688 buffer[len++] = c;
1689 c = *cur++;
1690 }
1691 buffer[len] = 0;
1692 }
1693
1694 if (buffer == NULL)
1695 ret = xmlStrndup(buf, len);
1696 else {
1697 ret = buffer;
1698 buffer = NULL;
1699 max = XML_MAX_NAMELEN;
1700 }
1701
1702
1703 if (c == ':') {
1704 c = *cur++;
1705 if (c == 0) return(ret);
1706 *prefix = ret;
1707 len = 0;
1708
1709 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1710 buf[len++] = c;
1711 c = *cur++;
1712 }
1713 if (len >= max) {
1714 /*
1715 * Okay someone managed to make a huge name, so he's ready to pay
1716 * for the processing speed.
1717 */
1718 max = len * 2;
1719
1720 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1721 if (buffer == NULL) {
1722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1723 ctxt->sax->error(ctxt->userData,
1724 "xmlSplitQName: out of memory\n");
1725 return(NULL);
1726 }
1727 memcpy(buffer, buf, len);
1728 while (c != 0) { /* tested bigname2.xml */
1729 if (len + 10 > max) {
1730 max *= 2;
1731 buffer = (xmlChar *) xmlRealloc(buffer,
1732 max * sizeof(xmlChar));
1733 if (buffer == NULL) {
1734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1735 ctxt->sax->error(ctxt->userData,
1736 "xmlSplitQName: out of memory\n");
1737 return(NULL);
1738 }
1739 }
1740 buffer[len++] = c;
1741 c = *cur++;
1742 }
1743 buffer[len] = 0;
1744 }
1745
1746 if (buffer == NULL)
1747 ret = xmlStrndup(buf, len);
1748 else {
1749 ret = buffer;
1750 }
1751 }
1752
1753 return(ret);
1754}
1755
1756/************************************************************************
1757 * *
1758 * The parser itself *
1759 * Relates to http://www.w3.org/TR/REC-xml *
1760 * *
1761 ************************************************************************/
1762
Daniel Veillard76d66f42001-05-16 21:05:17 +00001763static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001764/**
1765 * xmlParseName:
1766 * @ctxt: an XML parser context
1767 *
1768 * parse an XML name.
1769 *
1770 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1771 * CombiningChar | Extender
1772 *
1773 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1774 *
1775 * [6] Names ::= Name (S Name)*
1776 *
1777 * Returns the Name parsed or NULL
1778 */
1779
1780xmlChar *
1781xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001782 const xmlChar *in;
1783 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001784 int count = 0;
1785
1786 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001787
1788 /*
1789 * Accelerator for simple ASCII names
1790 */
1791 in = ctxt->input->cur;
1792 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1793 ((*in >= 0x41) && (*in <= 0x5A)) ||
1794 (*in == '_') || (*in == ':')) {
1795 in++;
1796 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1797 ((*in >= 0x41) && (*in <= 0x5A)) ||
1798 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001799 (*in == '_') || (*in == '-') ||
1800 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001801 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001802 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001803 count = in - ctxt->input->cur;
1804 ret = xmlStrndup(ctxt->input->cur, count);
1805 ctxt->input->cur = in;
1806 return(ret);
1807 }
1808 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001809 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001810}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001811
Daniel Veillard46de64e2002-05-29 08:21:33 +00001812/**
1813 * xmlParseNameAndCompare:
1814 * @ctxt: an XML parser context
1815 *
1816 * parse an XML name and compares for match
1817 * (specialized for endtag parsing)
1818 *
1819 *
1820 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1821 * and the name for mismatch
1822 */
1823
1824xmlChar *
1825xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1826 const xmlChar *cmp = other;
1827 const xmlChar *in;
1828 xmlChar *ret;
1829 int count = 0;
1830
1831 GROW;
1832
1833 in = ctxt->input->cur;
1834 while (*in != 0 && *in == *cmp) {
1835 ++in;
1836 ++cmp;
1837 }
1838 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1839 /* success */
1840 ctxt->input->cur = in;
1841 return (xmlChar*) 1;
1842 }
1843 /* failure (or end of input buffer), check with full function */
1844 ret = xmlParseName (ctxt);
1845 if (ret != 0 && xmlStrEqual (ret, other)) {
1846 xmlFree (ret);
1847 return (xmlChar*) 1;
1848 }
1849 return ret;
1850}
1851
Daniel Veillard76d66f42001-05-16 21:05:17 +00001852static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001853xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1854 xmlChar buf[XML_MAX_NAMELEN + 5];
1855 int len = 0, l;
1856 int c;
1857 int count = 0;
1858
1859 /*
1860 * Handler for more complex cases
1861 */
1862 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001863 c = CUR_CHAR(l);
1864 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1865 (!IS_LETTER(c) && (c != '_') &&
1866 (c != ':'))) {
1867 return(NULL);
1868 }
1869
1870 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1871 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1872 (c == '.') || (c == '-') ||
1873 (c == '_') || (c == ':') ||
1874 (IS_COMBINING(c)) ||
1875 (IS_EXTENDER(c)))) {
1876 if (count++ > 100) {
1877 count = 0;
1878 GROW;
1879 }
1880 COPY_BUF(l,buf,len,c);
1881 NEXTL(l);
1882 c = CUR_CHAR(l);
1883 if (len >= XML_MAX_NAMELEN) {
1884 /*
1885 * Okay someone managed to make a huge name, so he's ready to pay
1886 * for the processing speed.
1887 */
1888 xmlChar *buffer;
1889 int max = len * 2;
1890
1891 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1892 if (buffer == NULL) {
1893 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1894 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001895 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001896 return(NULL);
1897 }
1898 memcpy(buffer, buf, len);
1899 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1900 (c == '.') || (c == '-') ||
1901 (c == '_') || (c == ':') ||
1902 (IS_COMBINING(c)) ||
1903 (IS_EXTENDER(c))) {
1904 if (count++ > 100) {
1905 count = 0;
1906 GROW;
1907 }
1908 if (len + 10 > max) {
1909 max *= 2;
1910 buffer = (xmlChar *) xmlRealloc(buffer,
1911 max * sizeof(xmlChar));
1912 if (buffer == NULL) {
1913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1914 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001915 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001916 return(NULL);
1917 }
1918 }
1919 COPY_BUF(l,buffer,len,c);
1920 NEXTL(l);
1921 c = CUR_CHAR(l);
1922 }
1923 buffer[len] = 0;
1924 return(buffer);
1925 }
1926 }
1927 return(xmlStrndup(buf, len));
1928}
1929
1930/**
1931 * xmlParseStringName:
1932 * @ctxt: an XML parser context
1933 * @str: a pointer to the string pointer (IN/OUT)
1934 *
1935 * parse an XML name.
1936 *
1937 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1938 * CombiningChar | Extender
1939 *
1940 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1941 *
1942 * [6] Names ::= Name (S Name)*
1943 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001944 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001945 * is updated to the current location in the string.
1946 */
1947
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001948static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001949xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1950 xmlChar buf[XML_MAX_NAMELEN + 5];
1951 const xmlChar *cur = *str;
1952 int len = 0, l;
1953 int c;
1954
1955 c = CUR_SCHAR(cur, l);
1956 if (!IS_LETTER(c) && (c != '_') &&
1957 (c != ':')) {
1958 return(NULL);
1959 }
1960
1961 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1962 (c == '.') || (c == '-') ||
1963 (c == '_') || (c == ':') ||
1964 (IS_COMBINING(c)) ||
1965 (IS_EXTENDER(c))) {
1966 COPY_BUF(l,buf,len,c);
1967 cur += l;
1968 c = CUR_SCHAR(cur, l);
1969 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1970 /*
1971 * Okay someone managed to make a huge name, so he's ready to pay
1972 * for the processing speed.
1973 */
1974 xmlChar *buffer;
1975 int max = len * 2;
1976
1977 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1978 if (buffer == NULL) {
1979 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1980 ctxt->sax->error(ctxt->userData,
1981 "xmlParseStringName: out of memory\n");
1982 return(NULL);
1983 }
1984 memcpy(buffer, buf, len);
1985 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1986 (c == '.') || (c == '-') ||
1987 (c == '_') || (c == ':') ||
1988 (IS_COMBINING(c)) ||
1989 (IS_EXTENDER(c))) {
1990 if (len + 10 > max) {
1991 max *= 2;
1992 buffer = (xmlChar *) xmlRealloc(buffer,
1993 max * sizeof(xmlChar));
1994 if (buffer == NULL) {
1995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1996 ctxt->sax->error(ctxt->userData,
1997 "xmlParseStringName: out of memory\n");
1998 return(NULL);
1999 }
2000 }
2001 COPY_BUF(l,buffer,len,c);
2002 cur += l;
2003 c = CUR_SCHAR(cur, l);
2004 }
2005 buffer[len] = 0;
2006 *str = cur;
2007 return(buffer);
2008 }
2009 }
2010 *str = cur;
2011 return(xmlStrndup(buf, len));
2012}
2013
2014/**
2015 * xmlParseNmtoken:
2016 * @ctxt: an XML parser context
2017 *
2018 * parse an XML Nmtoken.
2019 *
2020 * [7] Nmtoken ::= (NameChar)+
2021 *
2022 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2023 *
2024 * Returns the Nmtoken parsed or NULL
2025 */
2026
2027xmlChar *
2028xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2029 xmlChar buf[XML_MAX_NAMELEN + 5];
2030 int len = 0, l;
2031 int c;
2032 int count = 0;
2033
2034 GROW;
2035 c = CUR_CHAR(l);
2036
2037 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2038 (c == '.') || (c == '-') ||
2039 (c == '_') || (c == ':') ||
2040 (IS_COMBINING(c)) ||
2041 (IS_EXTENDER(c))) {
2042 if (count++ > 100) {
2043 count = 0;
2044 GROW;
2045 }
2046 COPY_BUF(l,buf,len,c);
2047 NEXTL(l);
2048 c = CUR_CHAR(l);
2049 if (len >= XML_MAX_NAMELEN) {
2050 /*
2051 * Okay someone managed to make a huge token, so he's ready to pay
2052 * for the processing speed.
2053 */
2054 xmlChar *buffer;
2055 int max = len * 2;
2056
2057 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2058 if (buffer == NULL) {
2059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2060 ctxt->sax->error(ctxt->userData,
2061 "xmlParseNmtoken: out of memory\n");
2062 return(NULL);
2063 }
2064 memcpy(buffer, buf, len);
2065 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2066 (c == '.') || (c == '-') ||
2067 (c == '_') || (c == ':') ||
2068 (IS_COMBINING(c)) ||
2069 (IS_EXTENDER(c))) {
2070 if (count++ > 100) {
2071 count = 0;
2072 GROW;
2073 }
2074 if (len + 10 > max) {
2075 max *= 2;
2076 buffer = (xmlChar *) xmlRealloc(buffer,
2077 max * sizeof(xmlChar));
2078 if (buffer == NULL) {
2079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2080 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002081 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002082 return(NULL);
2083 }
2084 }
2085 COPY_BUF(l,buffer,len,c);
2086 NEXTL(l);
2087 c = CUR_CHAR(l);
2088 }
2089 buffer[len] = 0;
2090 return(buffer);
2091 }
2092 }
2093 if (len == 0)
2094 return(NULL);
2095 return(xmlStrndup(buf, len));
2096}
2097
2098/**
2099 * xmlParseEntityValue:
2100 * @ctxt: an XML parser context
2101 * @orig: if non-NULL store a copy of the original entity value
2102 *
2103 * parse a value for ENTITY declarations
2104 *
2105 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2106 * "'" ([^%&'] | PEReference | Reference)* "'"
2107 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002108 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002109 */
2110
2111xmlChar *
2112xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2113 xmlChar *buf = NULL;
2114 int len = 0;
2115 int size = XML_PARSER_BUFFER_SIZE;
2116 int c, l;
2117 xmlChar stop;
2118 xmlChar *ret = NULL;
2119 const xmlChar *cur = NULL;
2120 xmlParserInputPtr input;
2121
2122 if (RAW == '"') stop = '"';
2123 else if (RAW == '\'') stop = '\'';
2124 else {
2125 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2127 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2128 ctxt->wellFormed = 0;
2129 ctxt->disableSAX = 1;
2130 return(NULL);
2131 }
2132 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2133 if (buf == NULL) {
2134 xmlGenericError(xmlGenericErrorContext,
2135 "malloc of %d byte failed\n", size);
2136 return(NULL);
2137 }
2138
2139 /*
2140 * The content of the entity definition is copied in a buffer.
2141 */
2142
2143 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2144 input = ctxt->input;
2145 GROW;
2146 NEXT;
2147 c = CUR_CHAR(l);
2148 /*
2149 * NOTE: 4.4.5 Included in Literal
2150 * When a parameter entity reference appears in a literal entity
2151 * value, ... a single or double quote character in the replacement
2152 * text is always treated as a normal data character and will not
2153 * terminate the literal.
2154 * In practice it means we stop the loop only when back at parsing
2155 * the initial entity and the quote is found
2156 */
2157 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2158 (ctxt->input != input))) {
2159 if (len + 5 >= size) {
2160 size *= 2;
2161 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2162 if (buf == NULL) {
2163 xmlGenericError(xmlGenericErrorContext,
2164 "realloc of %d byte failed\n", size);
2165 return(NULL);
2166 }
2167 }
2168 COPY_BUF(l,buf,len,c);
2169 NEXTL(l);
2170 /*
2171 * Pop-up of finished entities.
2172 */
2173 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2174 xmlPopInput(ctxt);
2175
2176 GROW;
2177 c = CUR_CHAR(l);
2178 if (c == 0) {
2179 GROW;
2180 c = CUR_CHAR(l);
2181 }
2182 }
2183 buf[len] = 0;
2184
2185 /*
2186 * Raise problem w.r.t. '&' and '%' being used in non-entities
2187 * reference constructs. Note Charref will be handled in
2188 * xmlStringDecodeEntities()
2189 */
2190 cur = buf;
2191 while (*cur != 0) { /* non input consuming */
2192 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2193 xmlChar *name;
2194 xmlChar tmp = *cur;
2195
2196 cur++;
2197 name = xmlParseStringName(ctxt, &cur);
2198 if ((name == NULL) || (*cur != ';')) {
2199 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2201 ctxt->sax->error(ctxt->userData,
2202 "EntityValue: '%c' forbidden except for entities references\n",
2203 tmp);
2204 ctxt->wellFormed = 0;
2205 ctxt->disableSAX = 1;
2206 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002207 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2208 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002209 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2210 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2211 ctxt->sax->error(ctxt->userData,
2212 "EntityValue: PEReferences forbidden in internal subset\n",
2213 tmp);
2214 ctxt->wellFormed = 0;
2215 ctxt->disableSAX = 1;
2216 }
2217 if (name != NULL)
2218 xmlFree(name);
2219 }
2220 cur++;
2221 }
2222
2223 /*
2224 * Then PEReference entities are substituted.
2225 */
2226 if (c != stop) {
2227 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2229 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2230 ctxt->wellFormed = 0;
2231 ctxt->disableSAX = 1;
2232 xmlFree(buf);
2233 } else {
2234 NEXT;
2235 /*
2236 * NOTE: 4.4.7 Bypassed
2237 * When a general entity reference appears in the EntityValue in
2238 * an entity declaration, it is bypassed and left as is.
2239 * so XML_SUBSTITUTE_REF is not set here.
2240 */
2241 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2242 0, 0, 0);
2243 if (orig != NULL)
2244 *orig = buf;
2245 else
2246 xmlFree(buf);
2247 }
2248
2249 return(ret);
2250}
2251
2252/**
2253 * xmlParseAttValue:
2254 * @ctxt: an XML parser context
2255 *
2256 * parse a value for an attribute
2257 * Note: the parser won't do substitution of entities here, this
2258 * will be handled later in xmlStringGetNodeList
2259 *
2260 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2261 * "'" ([^<&'] | Reference)* "'"
2262 *
2263 * 3.3.3 Attribute-Value Normalization:
2264 * Before the value of an attribute is passed to the application or
2265 * checked for validity, the XML processor must normalize it as follows:
2266 * - a character reference is processed by appending the referenced
2267 * character to the attribute value
2268 * - an entity reference is processed by recursively processing the
2269 * replacement text of the entity
2270 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2271 * appending #x20 to the normalized value, except that only a single
2272 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2273 * parsed entity or the literal entity value of an internal parsed entity
2274 * - other characters are processed by appending them to the normalized value
2275 * If the declared value is not CDATA, then the XML processor must further
2276 * process the normalized attribute value by discarding any leading and
2277 * trailing space (#x20) characters, and by replacing sequences of space
2278 * (#x20) characters by a single space (#x20) character.
2279 * All attributes for which no declaration has been read should be treated
2280 * by a non-validating parser as if declared CDATA.
2281 *
2282 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2283 */
2284
2285xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002286xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2287
2288xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002289xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2290 xmlChar limit = 0;
2291 xmlChar *buf = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002292 xmlChar *in = NULL;
2293 xmlChar *ret = NULL;
2294 SHRINK;
2295 GROW;
2296 in = CUR_PTR;
2297 if (*in != '"' && *in != '\'') {
2298 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2300 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2301 ctxt->wellFormed = 0;
2302 ctxt->disableSAX = 1;
2303 return(NULL);
2304 }
2305 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2306 limit = *in;
2307 ++in;
2308
2309 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2310 *in != '&' && *in != '<'
2311 ) {
2312 ++in;
2313 }
2314 if (*in != limit) {
2315 return xmlParseAttValueComplex(ctxt);
2316 }
2317 ++in;
2318 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2319 CUR_PTR = in;
2320 return ret;
2321}
2322
2323xmlChar *
2324xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2325 xmlChar limit = 0;
2326 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002327 int len = 0;
2328 int buf_size = 0;
2329 int c, l;
2330 xmlChar *current = NULL;
2331 xmlEntityPtr ent;
2332
2333
2334 SHRINK;
2335 if (NXT(0) == '"') {
2336 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2337 limit = '"';
2338 NEXT;
2339 } else if (NXT(0) == '\'') {
2340 limit = '\'';
2341 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2342 NEXT;
2343 } else {
2344 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2345 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2346 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2347 ctxt->wellFormed = 0;
2348 ctxt->disableSAX = 1;
2349 return(NULL);
2350 }
2351
2352 /*
2353 * allocate a translation buffer.
2354 */
2355 buf_size = XML_PARSER_BUFFER_SIZE;
2356 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2357 if (buf == NULL) {
2358 perror("xmlParseAttValue: malloc failed");
2359 return(NULL);
2360 }
2361
2362 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002363 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002364 */
2365 c = CUR_CHAR(l);
2366 while (((NXT(0) != limit) && /* checked */
2367 (c != '<')) || (ctxt->token != 0)) {
2368 if (c == 0) break;
2369 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002370 if (ctxt->replaceEntities) {
2371 if (len > buf_size - 10) {
2372 growBuffer(buf);
2373 }
2374 buf[len++] = '&';
2375 } else {
2376 /*
2377 * The reparsing will be done in xmlStringGetNodeList()
2378 * called by the attribute() function in SAX.c
2379 */
2380 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002381
Daniel Veillard319a7422001-09-11 09:27:09 +00002382 if (len > buf_size - 10) {
2383 growBuffer(buf);
2384 }
2385 current = &buffer[0];
2386 while (*current != 0) { /* non input consuming */
2387 buf[len++] = *current++;
2388 }
2389 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002390 }
Owen Taylor3473f882001-02-23 17:55:21 +00002391 } else if (c == '&') {
2392 if (NXT(1) == '#') {
2393 int val = xmlParseCharRef(ctxt);
2394 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002395 if (ctxt->replaceEntities) {
2396 if (len > buf_size - 10) {
2397 growBuffer(buf);
2398 }
2399 buf[len++] = '&';
2400 } else {
2401 /*
2402 * The reparsing will be done in xmlStringGetNodeList()
2403 * called by the attribute() function in SAX.c
2404 */
2405 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002406
Daniel Veillard319a7422001-09-11 09:27:09 +00002407 if (len > buf_size - 10) {
2408 growBuffer(buf);
2409 }
2410 current = &buffer[0];
2411 while (*current != 0) { /* non input consuming */
2412 buf[len++] = *current++;
2413 }
Owen Taylor3473f882001-02-23 17:55:21 +00002414 }
2415 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002416 if (len > buf_size - 10) {
2417 growBuffer(buf);
2418 }
Owen Taylor3473f882001-02-23 17:55:21 +00002419 len += xmlCopyChar(0, &buf[len], val);
2420 }
2421 } else {
2422 ent = xmlParseEntityRef(ctxt);
2423 if ((ent != NULL) &&
2424 (ctxt->replaceEntities != 0)) {
2425 xmlChar *rep;
2426
2427 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2428 rep = xmlStringDecodeEntities(ctxt, ent->content,
2429 XML_SUBSTITUTE_REF, 0, 0, 0);
2430 if (rep != NULL) {
2431 current = rep;
2432 while (*current != 0) { /* non input consuming */
2433 buf[len++] = *current++;
2434 if (len > buf_size - 10) {
2435 growBuffer(buf);
2436 }
2437 }
2438 xmlFree(rep);
2439 }
2440 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002441 if (len > buf_size - 10) {
2442 growBuffer(buf);
2443 }
Owen Taylor3473f882001-02-23 17:55:21 +00002444 if (ent->content != NULL)
2445 buf[len++] = ent->content[0];
2446 }
2447 } else if (ent != NULL) {
2448 int i = xmlStrlen(ent->name);
2449 const xmlChar *cur = ent->name;
2450
2451 /*
2452 * This may look absurd but is needed to detect
2453 * entities problems
2454 */
2455 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2456 (ent->content != NULL)) {
2457 xmlChar *rep;
2458 rep = xmlStringDecodeEntities(ctxt, ent->content,
2459 XML_SUBSTITUTE_REF, 0, 0, 0);
2460 if (rep != NULL)
2461 xmlFree(rep);
2462 }
2463
2464 /*
2465 * Just output the reference
2466 */
2467 buf[len++] = '&';
2468 if (len > buf_size - i - 10) {
2469 growBuffer(buf);
2470 }
2471 for (;i > 0;i--)
2472 buf[len++] = *cur++;
2473 buf[len++] = ';';
2474 }
2475 }
2476 } else {
2477 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2478 COPY_BUF(l,buf,len,0x20);
2479 if (len > buf_size - 10) {
2480 growBuffer(buf);
2481 }
2482 } else {
2483 COPY_BUF(l,buf,len,c);
2484 if (len > buf_size - 10) {
2485 growBuffer(buf);
2486 }
2487 }
2488 NEXTL(l);
2489 }
2490 GROW;
2491 c = CUR_CHAR(l);
2492 }
2493 buf[len++] = 0;
2494 if (RAW == '<') {
2495 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2497 ctxt->sax->error(ctxt->userData,
2498 "Unescaped '<' not allowed in attributes values\n");
2499 ctxt->wellFormed = 0;
2500 ctxt->disableSAX = 1;
2501 } else if (RAW != limit) {
2502 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2503 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2504 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2505 ctxt->wellFormed = 0;
2506 ctxt->disableSAX = 1;
2507 } else
2508 NEXT;
2509 return(buf);
2510}
2511
2512/**
2513 * xmlParseSystemLiteral:
2514 * @ctxt: an XML parser context
2515 *
2516 * parse an XML Literal
2517 *
2518 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2519 *
2520 * Returns the SystemLiteral parsed or NULL
2521 */
2522
2523xmlChar *
2524xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2525 xmlChar *buf = NULL;
2526 int len = 0;
2527 int size = XML_PARSER_BUFFER_SIZE;
2528 int cur, l;
2529 xmlChar stop;
2530 int state = ctxt->instate;
2531 int count = 0;
2532
2533 SHRINK;
2534 if (RAW == '"') {
2535 NEXT;
2536 stop = '"';
2537 } else if (RAW == '\'') {
2538 NEXT;
2539 stop = '\'';
2540 } else {
2541 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2542 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2543 ctxt->sax->error(ctxt->userData,
2544 "SystemLiteral \" or ' expected\n");
2545 ctxt->wellFormed = 0;
2546 ctxt->disableSAX = 1;
2547 return(NULL);
2548 }
2549
2550 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2551 if (buf == NULL) {
2552 xmlGenericError(xmlGenericErrorContext,
2553 "malloc of %d byte failed\n", size);
2554 return(NULL);
2555 }
2556 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2557 cur = CUR_CHAR(l);
2558 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2559 if (len + 5 >= size) {
2560 size *= 2;
2561 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2562 if (buf == NULL) {
2563 xmlGenericError(xmlGenericErrorContext,
2564 "realloc of %d byte failed\n", size);
2565 ctxt->instate = (xmlParserInputState) state;
2566 return(NULL);
2567 }
2568 }
2569 count++;
2570 if (count > 50) {
2571 GROW;
2572 count = 0;
2573 }
2574 COPY_BUF(l,buf,len,cur);
2575 NEXTL(l);
2576 cur = CUR_CHAR(l);
2577 if (cur == 0) {
2578 GROW;
2579 SHRINK;
2580 cur = CUR_CHAR(l);
2581 }
2582 }
2583 buf[len] = 0;
2584 ctxt->instate = (xmlParserInputState) state;
2585 if (!IS_CHAR(cur)) {
2586 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2588 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2589 ctxt->wellFormed = 0;
2590 ctxt->disableSAX = 1;
2591 } else {
2592 NEXT;
2593 }
2594 return(buf);
2595}
2596
2597/**
2598 * xmlParsePubidLiteral:
2599 * @ctxt: an XML parser context
2600 *
2601 * parse an XML public literal
2602 *
2603 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2604 *
2605 * Returns the PubidLiteral parsed or NULL.
2606 */
2607
2608xmlChar *
2609xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2610 xmlChar *buf = NULL;
2611 int len = 0;
2612 int size = XML_PARSER_BUFFER_SIZE;
2613 xmlChar cur;
2614 xmlChar stop;
2615 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002616 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002617
2618 SHRINK;
2619 if (RAW == '"') {
2620 NEXT;
2621 stop = '"';
2622 } else if (RAW == '\'') {
2623 NEXT;
2624 stop = '\'';
2625 } else {
2626 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2628 ctxt->sax->error(ctxt->userData,
2629 "SystemLiteral \" or ' expected\n");
2630 ctxt->wellFormed = 0;
2631 ctxt->disableSAX = 1;
2632 return(NULL);
2633 }
2634 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2635 if (buf == NULL) {
2636 xmlGenericError(xmlGenericErrorContext,
2637 "malloc of %d byte failed\n", size);
2638 return(NULL);
2639 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002640 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002641 cur = CUR;
2642 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2643 if (len + 1 >= size) {
2644 size *= 2;
2645 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2646 if (buf == NULL) {
2647 xmlGenericError(xmlGenericErrorContext,
2648 "realloc of %d byte failed\n", size);
2649 return(NULL);
2650 }
2651 }
2652 buf[len++] = cur;
2653 count++;
2654 if (count > 50) {
2655 GROW;
2656 count = 0;
2657 }
2658 NEXT;
2659 cur = CUR;
2660 if (cur == 0) {
2661 GROW;
2662 SHRINK;
2663 cur = CUR;
2664 }
2665 }
2666 buf[len] = 0;
2667 if (cur != stop) {
2668 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2670 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2671 ctxt->wellFormed = 0;
2672 ctxt->disableSAX = 1;
2673 } else {
2674 NEXT;
2675 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002676 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002677 return(buf);
2678}
2679
Daniel Veillard48b2f892001-02-25 16:11:03 +00002680void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002681/**
2682 * xmlParseCharData:
2683 * @ctxt: an XML parser context
2684 * @cdata: int indicating whether we are within a CDATA section
2685 *
2686 * parse a CharData section.
2687 * if we are within a CDATA section ']]>' marks an end of section.
2688 *
2689 * The right angle bracket (>) may be represented using the string "&gt;",
2690 * and must, for compatibility, be escaped using "&gt;" or a character
2691 * reference when it appears in the string "]]>" in content, when that
2692 * string is not marking the end of a CDATA section.
2693 *
2694 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2695 */
2696
2697void
2698xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002699 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002700 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002701 int line = ctxt->input->line;
2702 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002703
2704 SHRINK;
2705 GROW;
2706 /*
2707 * Accelerated common case where input don't need to be
2708 * modified before passing it to the handler.
2709 */
2710 if ((ctxt->token == 0) && (!cdata)) {
2711 in = ctxt->input->cur;
2712 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002713get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002714 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2715 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002716 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002717 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002718 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002719 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002720 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002721 ctxt->input->line++;
2722 in++;
2723 }
2724 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002725 }
2726 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002727 if ((in[1] == ']') && (in[2] == '>')) {
2728 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2730 ctxt->sax->error(ctxt->userData,
2731 "Sequence ']]>' not allowed in content\n");
2732 ctxt->input->cur = in;
2733 ctxt->wellFormed = 0;
2734 ctxt->disableSAX = 1;
2735 return;
2736 }
2737 in++;
2738 goto get_more;
2739 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002740 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002741 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002742 if (IS_BLANK(*ctxt->input->cur)) {
2743 const xmlChar *tmp = ctxt->input->cur;
2744 ctxt->input->cur = in;
2745 if (areBlanks(ctxt, tmp, nbchar)) {
2746 if (ctxt->sax->ignorableWhitespace != NULL)
2747 ctxt->sax->ignorableWhitespace(ctxt->userData,
2748 tmp, nbchar);
2749 } else {
2750 if (ctxt->sax->characters != NULL)
2751 ctxt->sax->characters(ctxt->userData,
2752 tmp, nbchar);
2753 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002754 line = ctxt->input->line;
2755 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002756 } else {
2757 if (ctxt->sax->characters != NULL)
2758 ctxt->sax->characters(ctxt->userData,
2759 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002760 line = ctxt->input->line;
2761 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002762 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002763 }
2764 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002765 if (*in == 0xD) {
2766 in++;
2767 if (*in == 0xA) {
2768 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002769 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002770 ctxt->input->line++;
2771 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002772 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002773 in--;
2774 }
2775 if (*in == '<') {
2776 return;
2777 }
2778 if (*in == '&') {
2779 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002780 }
2781 SHRINK;
2782 GROW;
2783 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002784 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002785 nbchar = 0;
2786 }
Daniel Veillard50582112001-03-26 22:52:16 +00002787 ctxt->input->line = line;
2788 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002789 xmlParseCharDataComplex(ctxt, cdata);
2790}
2791
2792void
2793xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002794 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2795 int nbchar = 0;
2796 int cur, l;
2797 int count = 0;
2798
2799 SHRINK;
2800 GROW;
2801 cur = CUR_CHAR(l);
2802 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2803 ((cur != '&') || (ctxt->token == '&')) &&
2804 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2805 if ((cur == ']') && (NXT(1) == ']') &&
2806 (NXT(2) == '>')) {
2807 if (cdata) break;
2808 else {
2809 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2811 ctxt->sax->error(ctxt->userData,
2812 "Sequence ']]>' not allowed in content\n");
2813 /* Should this be relaxed ??? I see a "must here */
2814 ctxt->wellFormed = 0;
2815 ctxt->disableSAX = 1;
2816 }
2817 }
2818 COPY_BUF(l,buf,nbchar,cur);
2819 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2820 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002821 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002822 */
2823 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2824 if (areBlanks(ctxt, buf, nbchar)) {
2825 if (ctxt->sax->ignorableWhitespace != NULL)
2826 ctxt->sax->ignorableWhitespace(ctxt->userData,
2827 buf, nbchar);
2828 } else {
2829 if (ctxt->sax->characters != NULL)
2830 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2831 }
2832 }
2833 nbchar = 0;
2834 }
2835 count++;
2836 if (count > 50) {
2837 GROW;
2838 count = 0;
2839 }
2840 NEXTL(l);
2841 cur = CUR_CHAR(l);
2842 }
2843 if (nbchar != 0) {
2844 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002845 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002846 */
2847 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2848 if (areBlanks(ctxt, buf, nbchar)) {
2849 if (ctxt->sax->ignorableWhitespace != NULL)
2850 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2851 } else {
2852 if (ctxt->sax->characters != NULL)
2853 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2854 }
2855 }
2856 }
2857}
2858
2859/**
2860 * xmlParseExternalID:
2861 * @ctxt: an XML parser context
2862 * @publicID: a xmlChar** receiving PubidLiteral
2863 * @strict: indicate whether we should restrict parsing to only
2864 * production [75], see NOTE below
2865 *
2866 * Parse an External ID or a Public ID
2867 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002868 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002869 * 'PUBLIC' S PubidLiteral S SystemLiteral
2870 *
2871 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2872 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2873 *
2874 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2875 *
2876 * Returns the function returns SystemLiteral and in the second
2877 * case publicID receives PubidLiteral, is strict is off
2878 * it is possible to return NULL and have publicID set.
2879 */
2880
2881xmlChar *
2882xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2883 xmlChar *URI = NULL;
2884
2885 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002886
2887 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002888 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2889 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2890 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2891 SKIP(6);
2892 if (!IS_BLANK(CUR)) {
2893 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2895 ctxt->sax->error(ctxt->userData,
2896 "Space required after 'SYSTEM'\n");
2897 ctxt->wellFormed = 0;
2898 ctxt->disableSAX = 1;
2899 }
2900 SKIP_BLANKS;
2901 URI = xmlParseSystemLiteral(ctxt);
2902 if (URI == NULL) {
2903 ctxt->errNo = XML_ERR_URI_REQUIRED;
2904 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2905 ctxt->sax->error(ctxt->userData,
2906 "xmlParseExternalID: SYSTEM, no URI\n");
2907 ctxt->wellFormed = 0;
2908 ctxt->disableSAX = 1;
2909 }
2910 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2911 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2912 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2913 SKIP(6);
2914 if (!IS_BLANK(CUR)) {
2915 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2917 ctxt->sax->error(ctxt->userData,
2918 "Space required after 'PUBLIC'\n");
2919 ctxt->wellFormed = 0;
2920 ctxt->disableSAX = 1;
2921 }
2922 SKIP_BLANKS;
2923 *publicID = xmlParsePubidLiteral(ctxt);
2924 if (*publicID == NULL) {
2925 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2927 ctxt->sax->error(ctxt->userData,
2928 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2929 ctxt->wellFormed = 0;
2930 ctxt->disableSAX = 1;
2931 }
2932 if (strict) {
2933 /*
2934 * We don't handle [83] so "S SystemLiteral" is required.
2935 */
2936 if (!IS_BLANK(CUR)) {
2937 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2939 ctxt->sax->error(ctxt->userData,
2940 "Space required after the Public Identifier\n");
2941 ctxt->wellFormed = 0;
2942 ctxt->disableSAX = 1;
2943 }
2944 } else {
2945 /*
2946 * We handle [83] so we return immediately, if
2947 * "S SystemLiteral" is not detected. From a purely parsing
2948 * point of view that's a nice mess.
2949 */
2950 const xmlChar *ptr;
2951 GROW;
2952
2953 ptr = CUR_PTR;
2954 if (!IS_BLANK(*ptr)) return(NULL);
2955
2956 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2957 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2958 }
2959 SKIP_BLANKS;
2960 URI = xmlParseSystemLiteral(ctxt);
2961 if (URI == NULL) {
2962 ctxt->errNo = XML_ERR_URI_REQUIRED;
2963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2964 ctxt->sax->error(ctxt->userData,
2965 "xmlParseExternalID: PUBLIC, no URI\n");
2966 ctxt->wellFormed = 0;
2967 ctxt->disableSAX = 1;
2968 }
2969 }
2970 return(URI);
2971}
2972
2973/**
2974 * xmlParseComment:
2975 * @ctxt: an XML parser context
2976 *
2977 * Skip an XML (SGML) comment <!-- .... -->
2978 * The spec says that "For compatibility, the string "--" (double-hyphen)
2979 * must not occur within comments. "
2980 *
2981 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2982 */
2983void
2984xmlParseComment(xmlParserCtxtPtr ctxt) {
2985 xmlChar *buf = NULL;
2986 int len;
2987 int size = XML_PARSER_BUFFER_SIZE;
2988 int q, ql;
2989 int r, rl;
2990 int cur, l;
2991 xmlParserInputState state;
2992 xmlParserInputPtr input = ctxt->input;
2993 int count = 0;
2994
2995 /*
2996 * Check that there is a comment right here.
2997 */
2998 if ((RAW != '<') || (NXT(1) != '!') ||
2999 (NXT(2) != '-') || (NXT(3) != '-')) return;
3000
3001 state = ctxt->instate;
3002 ctxt->instate = XML_PARSER_COMMENT;
3003 SHRINK;
3004 SKIP(4);
3005 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3006 if (buf == NULL) {
3007 xmlGenericError(xmlGenericErrorContext,
3008 "malloc of %d byte failed\n", size);
3009 ctxt->instate = state;
3010 return;
3011 }
3012 q = CUR_CHAR(ql);
3013 NEXTL(ql);
3014 r = CUR_CHAR(rl);
3015 NEXTL(rl);
3016 cur = CUR_CHAR(l);
3017 len = 0;
3018 while (IS_CHAR(cur) && /* checked */
3019 ((cur != '>') ||
3020 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003021 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003022 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3024 ctxt->sax->error(ctxt->userData,
3025 "Comment must not contain '--' (double-hyphen)`\n");
3026 ctxt->wellFormed = 0;
3027 ctxt->disableSAX = 1;
3028 }
3029 if (len + 5 >= size) {
3030 size *= 2;
3031 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3032 if (buf == NULL) {
3033 xmlGenericError(xmlGenericErrorContext,
3034 "realloc of %d byte failed\n", size);
3035 ctxt->instate = state;
3036 return;
3037 }
3038 }
3039 COPY_BUF(ql,buf,len,q);
3040 q = r;
3041 ql = rl;
3042 r = cur;
3043 rl = l;
3044
3045 count++;
3046 if (count > 50) {
3047 GROW;
3048 count = 0;
3049 }
3050 NEXTL(l);
3051 cur = CUR_CHAR(l);
3052 if (cur == 0) {
3053 SHRINK;
3054 GROW;
3055 cur = CUR_CHAR(l);
3056 }
3057 }
3058 buf[len] = 0;
3059 if (!IS_CHAR(cur)) {
3060 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3062 ctxt->sax->error(ctxt->userData,
3063 "Comment not terminated \n<!--%.50s\n", buf);
3064 ctxt->wellFormed = 0;
3065 ctxt->disableSAX = 1;
3066 xmlFree(buf);
3067 } else {
3068 if (input != ctxt->input) {
3069 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3070 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3071 ctxt->sax->error(ctxt->userData,
3072"Comment doesn't start and stop in the same entity\n");
3073 ctxt->wellFormed = 0;
3074 ctxt->disableSAX = 1;
3075 }
3076 NEXT;
3077 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3078 (!ctxt->disableSAX))
3079 ctxt->sax->comment(ctxt->userData, buf);
3080 xmlFree(buf);
3081 }
3082 ctxt->instate = state;
3083}
3084
3085/**
3086 * xmlParsePITarget:
3087 * @ctxt: an XML parser context
3088 *
3089 * parse the name of a PI
3090 *
3091 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3092 *
3093 * Returns the PITarget name or NULL
3094 */
3095
3096xmlChar *
3097xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3098 xmlChar *name;
3099
3100 name = xmlParseName(ctxt);
3101 if ((name != NULL) &&
3102 ((name[0] == 'x') || (name[0] == 'X')) &&
3103 ((name[1] == 'm') || (name[1] == 'M')) &&
3104 ((name[2] == 'l') || (name[2] == 'L'))) {
3105 int i;
3106 if ((name[0] == 'x') && (name[1] == 'm') &&
3107 (name[2] == 'l') && (name[3] == 0)) {
3108 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3110 ctxt->sax->error(ctxt->userData,
3111 "XML declaration allowed only at the start of the document\n");
3112 ctxt->wellFormed = 0;
3113 ctxt->disableSAX = 1;
3114 return(name);
3115 } else if (name[3] == 0) {
3116 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3118 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3119 ctxt->wellFormed = 0;
3120 ctxt->disableSAX = 1;
3121 return(name);
3122 }
3123 for (i = 0;;i++) {
3124 if (xmlW3CPIs[i] == NULL) break;
3125 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3126 return(name);
3127 }
3128 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3129 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3130 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003131 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003132 }
3133 }
3134 return(name);
3135}
3136
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003137#ifdef LIBXML_CATALOG_ENABLED
3138/**
3139 * xmlParseCatalogPI:
3140 * @ctxt: an XML parser context
3141 * @catalog: the PI value string
3142 *
3143 * parse an XML Catalog Processing Instruction.
3144 *
3145 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3146 *
3147 * Occurs only if allowed by the user and if happening in the Misc
3148 * part of the document before any doctype informations
3149 * This will add the given catalog to the parsing context in order
3150 * to be used if there is a resolution need further down in the document
3151 */
3152
3153static void
3154xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3155 xmlChar *URL = NULL;
3156 const xmlChar *tmp, *base;
3157 xmlChar marker;
3158
3159 tmp = catalog;
3160 while (IS_BLANK(*tmp)) tmp++;
3161 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3162 goto error;
3163 tmp += 7;
3164 while (IS_BLANK(*tmp)) tmp++;
3165 if (*tmp != '=') {
3166 return;
3167 }
3168 tmp++;
3169 while (IS_BLANK(*tmp)) tmp++;
3170 marker = *tmp;
3171 if ((marker != '\'') && (marker != '"'))
3172 goto error;
3173 tmp++;
3174 base = tmp;
3175 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3176 if (*tmp == 0)
3177 goto error;
3178 URL = xmlStrndup(base, tmp - base);
3179 tmp++;
3180 while (IS_BLANK(*tmp)) tmp++;
3181 if (*tmp != 0)
3182 goto error;
3183
3184 if (URL != NULL) {
3185 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3186 xmlFree(URL);
3187 }
3188 return;
3189
3190error:
3191 ctxt->errNo = XML_WAR_CATALOG_PI;
3192 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3193 ctxt->sax->warning(ctxt->userData,
3194 "Catalog PI syntax error: %s\n", catalog);
3195 if (URL != NULL)
3196 xmlFree(URL);
3197}
3198#endif
3199
Owen Taylor3473f882001-02-23 17:55:21 +00003200/**
3201 * xmlParsePI:
3202 * @ctxt: an XML parser context
3203 *
3204 * parse an XML Processing Instruction.
3205 *
3206 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3207 *
3208 * The processing is transfered to SAX once parsed.
3209 */
3210
3211void
3212xmlParsePI(xmlParserCtxtPtr ctxt) {
3213 xmlChar *buf = NULL;
3214 int len = 0;
3215 int size = XML_PARSER_BUFFER_SIZE;
3216 int cur, l;
3217 xmlChar *target;
3218 xmlParserInputState state;
3219 int count = 0;
3220
3221 if ((RAW == '<') && (NXT(1) == '?')) {
3222 xmlParserInputPtr input = ctxt->input;
3223 state = ctxt->instate;
3224 ctxt->instate = XML_PARSER_PI;
3225 /*
3226 * this is a Processing Instruction.
3227 */
3228 SKIP(2);
3229 SHRINK;
3230
3231 /*
3232 * Parse the target name and check for special support like
3233 * namespace.
3234 */
3235 target = xmlParsePITarget(ctxt);
3236 if (target != NULL) {
3237 if ((RAW == '?') && (NXT(1) == '>')) {
3238 if (input != ctxt->input) {
3239 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3241 ctxt->sax->error(ctxt->userData,
3242 "PI declaration doesn't start and stop in the same entity\n");
3243 ctxt->wellFormed = 0;
3244 ctxt->disableSAX = 1;
3245 }
3246 SKIP(2);
3247
3248 /*
3249 * SAX: PI detected.
3250 */
3251 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3252 (ctxt->sax->processingInstruction != NULL))
3253 ctxt->sax->processingInstruction(ctxt->userData,
3254 target, NULL);
3255 ctxt->instate = state;
3256 xmlFree(target);
3257 return;
3258 }
3259 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3260 if (buf == NULL) {
3261 xmlGenericError(xmlGenericErrorContext,
3262 "malloc of %d byte failed\n", size);
3263 ctxt->instate = state;
3264 return;
3265 }
3266 cur = CUR;
3267 if (!IS_BLANK(cur)) {
3268 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3270 ctxt->sax->error(ctxt->userData,
3271 "xmlParsePI: PI %s space expected\n", target);
3272 ctxt->wellFormed = 0;
3273 ctxt->disableSAX = 1;
3274 }
3275 SKIP_BLANKS;
3276 cur = CUR_CHAR(l);
3277 while (IS_CHAR(cur) && /* checked */
3278 ((cur != '?') || (NXT(1) != '>'))) {
3279 if (len + 5 >= size) {
3280 size *= 2;
3281 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3282 if (buf == NULL) {
3283 xmlGenericError(xmlGenericErrorContext,
3284 "realloc of %d byte failed\n", size);
3285 ctxt->instate = state;
3286 return;
3287 }
3288 }
3289 count++;
3290 if (count > 50) {
3291 GROW;
3292 count = 0;
3293 }
3294 COPY_BUF(l,buf,len,cur);
3295 NEXTL(l);
3296 cur = CUR_CHAR(l);
3297 if (cur == 0) {
3298 SHRINK;
3299 GROW;
3300 cur = CUR_CHAR(l);
3301 }
3302 }
3303 buf[len] = 0;
3304 if (cur != '?') {
3305 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3307 ctxt->sax->error(ctxt->userData,
3308 "xmlParsePI: PI %s never end ...\n", target);
3309 ctxt->wellFormed = 0;
3310 ctxt->disableSAX = 1;
3311 } else {
3312 if (input != ctxt->input) {
3313 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3314 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3315 ctxt->sax->error(ctxt->userData,
3316 "PI declaration doesn't start and stop in the same entity\n");
3317 ctxt->wellFormed = 0;
3318 ctxt->disableSAX = 1;
3319 }
3320 SKIP(2);
3321
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003322#ifdef LIBXML_CATALOG_ENABLED
3323 if (((state == XML_PARSER_MISC) ||
3324 (state == XML_PARSER_START)) &&
3325 (xmlStrEqual(target, XML_CATALOG_PI))) {
3326 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3327 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3328 (allow == XML_CATA_ALLOW_ALL))
3329 xmlParseCatalogPI(ctxt, buf);
3330 }
3331#endif
3332
3333
Owen Taylor3473f882001-02-23 17:55:21 +00003334 /*
3335 * SAX: PI detected.
3336 */
3337 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3338 (ctxt->sax->processingInstruction != NULL))
3339 ctxt->sax->processingInstruction(ctxt->userData,
3340 target, buf);
3341 }
3342 xmlFree(buf);
3343 xmlFree(target);
3344 } else {
3345 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3347 ctxt->sax->error(ctxt->userData,
3348 "xmlParsePI : no target name\n");
3349 ctxt->wellFormed = 0;
3350 ctxt->disableSAX = 1;
3351 }
3352 ctxt->instate = state;
3353 }
3354}
3355
3356/**
3357 * xmlParseNotationDecl:
3358 * @ctxt: an XML parser context
3359 *
3360 * parse a notation declaration
3361 *
3362 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3363 *
3364 * Hence there is actually 3 choices:
3365 * 'PUBLIC' S PubidLiteral
3366 * 'PUBLIC' S PubidLiteral S SystemLiteral
3367 * and 'SYSTEM' S SystemLiteral
3368 *
3369 * See the NOTE on xmlParseExternalID().
3370 */
3371
3372void
3373xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3374 xmlChar *name;
3375 xmlChar *Pubid;
3376 xmlChar *Systemid;
3377
3378 if ((RAW == '<') && (NXT(1) == '!') &&
3379 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3380 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3381 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3382 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3383 xmlParserInputPtr input = ctxt->input;
3384 SHRINK;
3385 SKIP(10);
3386 if (!IS_BLANK(CUR)) {
3387 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3389 ctxt->sax->error(ctxt->userData,
3390 "Space required after '<!NOTATION'\n");
3391 ctxt->wellFormed = 0;
3392 ctxt->disableSAX = 1;
3393 return;
3394 }
3395 SKIP_BLANKS;
3396
Daniel Veillard76d66f42001-05-16 21:05:17 +00003397 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003398 if (name == NULL) {
3399 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3401 ctxt->sax->error(ctxt->userData,
3402 "NOTATION: Name expected here\n");
3403 ctxt->wellFormed = 0;
3404 ctxt->disableSAX = 1;
3405 return;
3406 }
3407 if (!IS_BLANK(CUR)) {
3408 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3410 ctxt->sax->error(ctxt->userData,
3411 "Space required after the NOTATION name'\n");
3412 ctxt->wellFormed = 0;
3413 ctxt->disableSAX = 1;
3414 return;
3415 }
3416 SKIP_BLANKS;
3417
3418 /*
3419 * Parse the IDs.
3420 */
3421 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3422 SKIP_BLANKS;
3423
3424 if (RAW == '>') {
3425 if (input != ctxt->input) {
3426 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3428 ctxt->sax->error(ctxt->userData,
3429"Notation declaration doesn't start and stop in the same entity\n");
3430 ctxt->wellFormed = 0;
3431 ctxt->disableSAX = 1;
3432 }
3433 NEXT;
3434 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3435 (ctxt->sax->notationDecl != NULL))
3436 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3437 } else {
3438 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3440 ctxt->sax->error(ctxt->userData,
3441 "'>' required to close NOTATION declaration\n");
3442 ctxt->wellFormed = 0;
3443 ctxt->disableSAX = 1;
3444 }
3445 xmlFree(name);
3446 if (Systemid != NULL) xmlFree(Systemid);
3447 if (Pubid != NULL) xmlFree(Pubid);
3448 }
3449}
3450
3451/**
3452 * xmlParseEntityDecl:
3453 * @ctxt: an XML parser context
3454 *
3455 * parse <!ENTITY declarations
3456 *
3457 * [70] EntityDecl ::= GEDecl | PEDecl
3458 *
3459 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3460 *
3461 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3462 *
3463 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3464 *
3465 * [74] PEDef ::= EntityValue | ExternalID
3466 *
3467 * [76] NDataDecl ::= S 'NDATA' S Name
3468 *
3469 * [ VC: Notation Declared ]
3470 * The Name must match the declared name of a notation.
3471 */
3472
3473void
3474xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3475 xmlChar *name = NULL;
3476 xmlChar *value = NULL;
3477 xmlChar *URI = NULL, *literal = NULL;
3478 xmlChar *ndata = NULL;
3479 int isParameter = 0;
3480 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003481 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003482
3483 GROW;
3484 if ((RAW == '<') && (NXT(1) == '!') &&
3485 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3486 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3487 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3488 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003489 SHRINK;
3490 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003491 skipped = SKIP_BLANKS;
3492 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003493 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3495 ctxt->sax->error(ctxt->userData,
3496 "Space required after '<!ENTITY'\n");
3497 ctxt->wellFormed = 0;
3498 ctxt->disableSAX = 1;
3499 }
Owen Taylor3473f882001-02-23 17:55:21 +00003500
3501 if (RAW == '%') {
3502 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003503 skipped = SKIP_BLANKS;
3504 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003505 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3507 ctxt->sax->error(ctxt->userData,
3508 "Space required after '%'\n");
3509 ctxt->wellFormed = 0;
3510 ctxt->disableSAX = 1;
3511 }
Owen Taylor3473f882001-02-23 17:55:21 +00003512 isParameter = 1;
3513 }
3514
Daniel Veillard76d66f42001-05-16 21:05:17 +00003515 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003516 if (name == NULL) {
3517 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3518 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3519 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3520 ctxt->wellFormed = 0;
3521 ctxt->disableSAX = 1;
3522 return;
3523 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003524 skipped = SKIP_BLANKS;
3525 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003526 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3527 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3528 ctxt->sax->error(ctxt->userData,
3529 "Space required after the entity name\n");
3530 ctxt->wellFormed = 0;
3531 ctxt->disableSAX = 1;
3532 }
Owen Taylor3473f882001-02-23 17:55:21 +00003533
Daniel Veillardf5582f12002-06-11 10:08:16 +00003534 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003535 /*
3536 * handle the various case of definitions...
3537 */
3538 if (isParameter) {
3539 if ((RAW == '"') || (RAW == '\'')) {
3540 value = xmlParseEntityValue(ctxt, &orig);
3541 if (value) {
3542 if ((ctxt->sax != NULL) &&
3543 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3544 ctxt->sax->entityDecl(ctxt->userData, name,
3545 XML_INTERNAL_PARAMETER_ENTITY,
3546 NULL, NULL, value);
3547 }
3548 } else {
3549 URI = xmlParseExternalID(ctxt, &literal, 1);
3550 if ((URI == NULL) && (literal == NULL)) {
3551 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3553 ctxt->sax->error(ctxt->userData,
3554 "Entity value required\n");
3555 ctxt->wellFormed = 0;
3556 ctxt->disableSAX = 1;
3557 }
3558 if (URI) {
3559 xmlURIPtr uri;
3560
3561 uri = xmlParseURI((const char *) URI);
3562 if (uri == NULL) {
3563 ctxt->errNo = XML_ERR_INVALID_URI;
3564 if ((ctxt->sax != NULL) &&
3565 (!ctxt->disableSAX) &&
3566 (ctxt->sax->error != NULL))
3567 ctxt->sax->error(ctxt->userData,
3568 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003569 /*
3570 * This really ought to be a well formedness error
3571 * but the XML Core WG decided otherwise c.f. issue
3572 * E26 of the XML erratas.
3573 */
Owen Taylor3473f882001-02-23 17:55:21 +00003574 } else {
3575 if (uri->fragment != NULL) {
3576 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3577 if ((ctxt->sax != NULL) &&
3578 (!ctxt->disableSAX) &&
3579 (ctxt->sax->error != NULL))
3580 ctxt->sax->error(ctxt->userData,
3581 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003582 /*
3583 * Okay this is foolish to block those but not
3584 * invalid URIs.
3585 */
Owen Taylor3473f882001-02-23 17:55:21 +00003586 ctxt->wellFormed = 0;
3587 } else {
3588 if ((ctxt->sax != NULL) &&
3589 (!ctxt->disableSAX) &&
3590 (ctxt->sax->entityDecl != NULL))
3591 ctxt->sax->entityDecl(ctxt->userData, name,
3592 XML_EXTERNAL_PARAMETER_ENTITY,
3593 literal, URI, NULL);
3594 }
3595 xmlFreeURI(uri);
3596 }
3597 }
3598 }
3599 } else {
3600 if ((RAW == '"') || (RAW == '\'')) {
3601 value = xmlParseEntityValue(ctxt, &orig);
3602 if ((ctxt->sax != NULL) &&
3603 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3604 ctxt->sax->entityDecl(ctxt->userData, name,
3605 XML_INTERNAL_GENERAL_ENTITY,
3606 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003607 /*
3608 * For expat compatibility in SAX mode.
3609 */
3610 if ((ctxt->myDoc == NULL) ||
3611 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3612 if (ctxt->myDoc == NULL) {
3613 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3614 }
3615 if (ctxt->myDoc->intSubset == NULL)
3616 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3617 BAD_CAST "fake", NULL, NULL);
3618
3619 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3620 NULL, NULL, value);
3621 }
Owen Taylor3473f882001-02-23 17:55:21 +00003622 } else {
3623 URI = xmlParseExternalID(ctxt, &literal, 1);
3624 if ((URI == NULL) && (literal == NULL)) {
3625 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3627 ctxt->sax->error(ctxt->userData,
3628 "Entity value required\n");
3629 ctxt->wellFormed = 0;
3630 ctxt->disableSAX = 1;
3631 }
3632 if (URI) {
3633 xmlURIPtr uri;
3634
3635 uri = xmlParseURI((const char *)URI);
3636 if (uri == NULL) {
3637 ctxt->errNo = XML_ERR_INVALID_URI;
3638 if ((ctxt->sax != NULL) &&
3639 (!ctxt->disableSAX) &&
3640 (ctxt->sax->error != NULL))
3641 ctxt->sax->error(ctxt->userData,
3642 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003643 /*
3644 * This really ought to be a well formedness error
3645 * but the XML Core WG decided otherwise c.f. issue
3646 * E26 of the XML erratas.
3647 */
Owen Taylor3473f882001-02-23 17:55:21 +00003648 } else {
3649 if (uri->fragment != NULL) {
3650 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3651 if ((ctxt->sax != NULL) &&
3652 (!ctxt->disableSAX) &&
3653 (ctxt->sax->error != NULL))
3654 ctxt->sax->error(ctxt->userData,
3655 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003656 /*
3657 * Okay this is foolish to block those but not
3658 * invalid URIs.
3659 */
Owen Taylor3473f882001-02-23 17:55:21 +00003660 ctxt->wellFormed = 0;
3661 }
3662 xmlFreeURI(uri);
3663 }
3664 }
3665 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3666 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3668 ctxt->sax->error(ctxt->userData,
3669 "Space required before 'NDATA'\n");
3670 ctxt->wellFormed = 0;
3671 ctxt->disableSAX = 1;
3672 }
3673 SKIP_BLANKS;
3674 if ((RAW == 'N') && (NXT(1) == 'D') &&
3675 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3676 (NXT(4) == 'A')) {
3677 SKIP(5);
3678 if (!IS_BLANK(CUR)) {
3679 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3681 ctxt->sax->error(ctxt->userData,
3682 "Space required after 'NDATA'\n");
3683 ctxt->wellFormed = 0;
3684 ctxt->disableSAX = 1;
3685 }
3686 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003687 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003688 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3689 (ctxt->sax->unparsedEntityDecl != NULL))
3690 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3691 literal, URI, ndata);
3692 } else {
3693 if ((ctxt->sax != NULL) &&
3694 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3695 ctxt->sax->entityDecl(ctxt->userData, name,
3696 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3697 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003698 /*
3699 * For expat compatibility in SAX mode.
3700 * assuming the entity repalcement was asked for
3701 */
3702 if ((ctxt->replaceEntities != 0) &&
3703 ((ctxt->myDoc == NULL) ||
3704 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3705 if (ctxt->myDoc == NULL) {
3706 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3707 }
3708
3709 if (ctxt->myDoc->intSubset == NULL)
3710 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3711 BAD_CAST "fake", NULL, NULL);
3712 entityDecl(ctxt, name,
3713 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3714 literal, URI, NULL);
3715 }
Owen Taylor3473f882001-02-23 17:55:21 +00003716 }
3717 }
3718 }
3719 SKIP_BLANKS;
3720 if (RAW != '>') {
3721 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3723 ctxt->sax->error(ctxt->userData,
3724 "xmlParseEntityDecl: entity %s not terminated\n", name);
3725 ctxt->wellFormed = 0;
3726 ctxt->disableSAX = 1;
3727 } else {
3728 if (input != ctxt->input) {
3729 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3731 ctxt->sax->error(ctxt->userData,
3732"Entity declaration doesn't start and stop in the same entity\n");
3733 ctxt->wellFormed = 0;
3734 ctxt->disableSAX = 1;
3735 }
3736 NEXT;
3737 }
3738 if (orig != NULL) {
3739 /*
3740 * Ugly mechanism to save the raw entity value.
3741 */
3742 xmlEntityPtr cur = NULL;
3743
3744 if (isParameter) {
3745 if ((ctxt->sax != NULL) &&
3746 (ctxt->sax->getParameterEntity != NULL))
3747 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3748 } else {
3749 if ((ctxt->sax != NULL) &&
3750 (ctxt->sax->getEntity != NULL))
3751 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003752 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3753 cur = getEntity(ctxt, name);
3754 }
Owen Taylor3473f882001-02-23 17:55:21 +00003755 }
3756 if (cur != NULL) {
3757 if (cur->orig != NULL)
3758 xmlFree(orig);
3759 else
3760 cur->orig = orig;
3761 } else
3762 xmlFree(orig);
3763 }
3764 if (name != NULL) xmlFree(name);
3765 if (value != NULL) xmlFree(value);
3766 if (URI != NULL) xmlFree(URI);
3767 if (literal != NULL) xmlFree(literal);
3768 if (ndata != NULL) xmlFree(ndata);
3769 }
3770}
3771
3772/**
3773 * xmlParseDefaultDecl:
3774 * @ctxt: an XML parser context
3775 * @value: Receive a possible fixed default value for the attribute
3776 *
3777 * Parse an attribute default declaration
3778 *
3779 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3780 *
3781 * [ VC: Required Attribute ]
3782 * if the default declaration is the keyword #REQUIRED, then the
3783 * attribute must be specified for all elements of the type in the
3784 * attribute-list declaration.
3785 *
3786 * [ VC: Attribute Default Legal ]
3787 * The declared default value must meet the lexical constraints of
3788 * the declared attribute type c.f. xmlValidateAttributeDecl()
3789 *
3790 * [ VC: Fixed Attribute Default ]
3791 * if an attribute has a default value declared with the #FIXED
3792 * keyword, instances of that attribute must match the default value.
3793 *
3794 * [ WFC: No < in Attribute Values ]
3795 * handled in xmlParseAttValue()
3796 *
3797 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3798 * or XML_ATTRIBUTE_FIXED.
3799 */
3800
3801int
3802xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3803 int val;
3804 xmlChar *ret;
3805
3806 *value = NULL;
3807 if ((RAW == '#') && (NXT(1) == 'R') &&
3808 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3809 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3810 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3811 (NXT(8) == 'D')) {
3812 SKIP(9);
3813 return(XML_ATTRIBUTE_REQUIRED);
3814 }
3815 if ((RAW == '#') && (NXT(1) == 'I') &&
3816 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3817 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3818 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3819 SKIP(8);
3820 return(XML_ATTRIBUTE_IMPLIED);
3821 }
3822 val = XML_ATTRIBUTE_NONE;
3823 if ((RAW == '#') && (NXT(1) == 'F') &&
3824 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3825 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3826 SKIP(6);
3827 val = XML_ATTRIBUTE_FIXED;
3828 if (!IS_BLANK(CUR)) {
3829 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3831 ctxt->sax->error(ctxt->userData,
3832 "Space required after '#FIXED'\n");
3833 ctxt->wellFormed = 0;
3834 ctxt->disableSAX = 1;
3835 }
3836 SKIP_BLANKS;
3837 }
3838 ret = xmlParseAttValue(ctxt);
3839 ctxt->instate = XML_PARSER_DTD;
3840 if (ret == NULL) {
3841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3842 ctxt->sax->error(ctxt->userData,
3843 "Attribute default value declaration error\n");
3844 ctxt->wellFormed = 0;
3845 ctxt->disableSAX = 1;
3846 } else
3847 *value = ret;
3848 return(val);
3849}
3850
3851/**
3852 * xmlParseNotationType:
3853 * @ctxt: an XML parser context
3854 *
3855 * parse an Notation attribute type.
3856 *
3857 * Note: the leading 'NOTATION' S part has already being parsed...
3858 *
3859 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3860 *
3861 * [ VC: Notation Attributes ]
3862 * Values of this type must match one of the notation names included
3863 * in the declaration; all notation names in the declaration must be declared.
3864 *
3865 * Returns: the notation attribute tree built while parsing
3866 */
3867
3868xmlEnumerationPtr
3869xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3870 xmlChar *name;
3871 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3872
3873 if (RAW != '(') {
3874 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3876 ctxt->sax->error(ctxt->userData,
3877 "'(' required to start 'NOTATION'\n");
3878 ctxt->wellFormed = 0;
3879 ctxt->disableSAX = 1;
3880 return(NULL);
3881 }
3882 SHRINK;
3883 do {
3884 NEXT;
3885 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003886 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003887 if (name == NULL) {
3888 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3890 ctxt->sax->error(ctxt->userData,
3891 "Name expected in NOTATION declaration\n");
3892 ctxt->wellFormed = 0;
3893 ctxt->disableSAX = 1;
3894 return(ret);
3895 }
3896 cur = xmlCreateEnumeration(name);
3897 xmlFree(name);
3898 if (cur == NULL) return(ret);
3899 if (last == NULL) ret = last = cur;
3900 else {
3901 last->next = cur;
3902 last = cur;
3903 }
3904 SKIP_BLANKS;
3905 } while (RAW == '|');
3906 if (RAW != ')') {
3907 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3909 ctxt->sax->error(ctxt->userData,
3910 "')' required to finish NOTATION declaration\n");
3911 ctxt->wellFormed = 0;
3912 ctxt->disableSAX = 1;
3913 if ((last != NULL) && (last != ret))
3914 xmlFreeEnumeration(last);
3915 return(ret);
3916 }
3917 NEXT;
3918 return(ret);
3919}
3920
3921/**
3922 * xmlParseEnumerationType:
3923 * @ctxt: an XML parser context
3924 *
3925 * parse an Enumeration attribute type.
3926 *
3927 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3928 *
3929 * [ VC: Enumeration ]
3930 * Values of this type must match one of the Nmtoken tokens in
3931 * the declaration
3932 *
3933 * Returns: the enumeration attribute tree built while parsing
3934 */
3935
3936xmlEnumerationPtr
3937xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3938 xmlChar *name;
3939 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3940
3941 if (RAW != '(') {
3942 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3944 ctxt->sax->error(ctxt->userData,
3945 "'(' required to start ATTLIST enumeration\n");
3946 ctxt->wellFormed = 0;
3947 ctxt->disableSAX = 1;
3948 return(NULL);
3949 }
3950 SHRINK;
3951 do {
3952 NEXT;
3953 SKIP_BLANKS;
3954 name = xmlParseNmtoken(ctxt);
3955 if (name == NULL) {
3956 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3957 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3958 ctxt->sax->error(ctxt->userData,
3959 "NmToken expected in ATTLIST enumeration\n");
3960 ctxt->wellFormed = 0;
3961 ctxt->disableSAX = 1;
3962 return(ret);
3963 }
3964 cur = xmlCreateEnumeration(name);
3965 xmlFree(name);
3966 if (cur == NULL) return(ret);
3967 if (last == NULL) ret = last = cur;
3968 else {
3969 last->next = cur;
3970 last = cur;
3971 }
3972 SKIP_BLANKS;
3973 } while (RAW == '|');
3974 if (RAW != ')') {
3975 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3977 ctxt->sax->error(ctxt->userData,
3978 "')' required to finish ATTLIST enumeration\n");
3979 ctxt->wellFormed = 0;
3980 ctxt->disableSAX = 1;
3981 return(ret);
3982 }
3983 NEXT;
3984 return(ret);
3985}
3986
3987/**
3988 * xmlParseEnumeratedType:
3989 * @ctxt: an XML parser context
3990 * @tree: the enumeration tree built while parsing
3991 *
3992 * parse an Enumerated attribute type.
3993 *
3994 * [57] EnumeratedType ::= NotationType | Enumeration
3995 *
3996 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3997 *
3998 *
3999 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4000 */
4001
4002int
4003xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4004 if ((RAW == 'N') && (NXT(1) == 'O') &&
4005 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4006 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4007 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4008 SKIP(8);
4009 if (!IS_BLANK(CUR)) {
4010 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4012 ctxt->sax->error(ctxt->userData,
4013 "Space required after 'NOTATION'\n");
4014 ctxt->wellFormed = 0;
4015 ctxt->disableSAX = 1;
4016 return(0);
4017 }
4018 SKIP_BLANKS;
4019 *tree = xmlParseNotationType(ctxt);
4020 if (*tree == NULL) return(0);
4021 return(XML_ATTRIBUTE_NOTATION);
4022 }
4023 *tree = xmlParseEnumerationType(ctxt);
4024 if (*tree == NULL) return(0);
4025 return(XML_ATTRIBUTE_ENUMERATION);
4026}
4027
4028/**
4029 * xmlParseAttributeType:
4030 * @ctxt: an XML parser context
4031 * @tree: the enumeration tree built while parsing
4032 *
4033 * parse the Attribute list def for an element
4034 *
4035 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4036 *
4037 * [55] StringType ::= 'CDATA'
4038 *
4039 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4040 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4041 *
4042 * Validity constraints for attribute values syntax are checked in
4043 * xmlValidateAttributeValue()
4044 *
4045 * [ VC: ID ]
4046 * Values of type ID must match the Name production. A name must not
4047 * appear more than once in an XML document as a value of this type;
4048 * i.e., ID values must uniquely identify the elements which bear them.
4049 *
4050 * [ VC: One ID per Element Type ]
4051 * No element type may have more than one ID attribute specified.
4052 *
4053 * [ VC: ID Attribute Default ]
4054 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4055 *
4056 * [ VC: IDREF ]
4057 * Values of type IDREF must match the Name production, and values
4058 * of type IDREFS must match Names; each IDREF Name must match the value
4059 * of an ID attribute on some element in the XML document; i.e. IDREF
4060 * values must match the value of some ID attribute.
4061 *
4062 * [ VC: Entity Name ]
4063 * Values of type ENTITY must match the Name production, values
4064 * of type ENTITIES must match Names; each Entity Name must match the
4065 * name of an unparsed entity declared in the DTD.
4066 *
4067 * [ VC: Name Token ]
4068 * Values of type NMTOKEN must match the Nmtoken production; values
4069 * of type NMTOKENS must match Nmtokens.
4070 *
4071 * Returns the attribute type
4072 */
4073int
4074xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4075 SHRINK;
4076 if ((RAW == 'C') && (NXT(1) == 'D') &&
4077 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4078 (NXT(4) == 'A')) {
4079 SKIP(5);
4080 return(XML_ATTRIBUTE_CDATA);
4081 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4082 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4083 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4084 SKIP(6);
4085 return(XML_ATTRIBUTE_IDREFS);
4086 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4087 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4088 (NXT(4) == 'F')) {
4089 SKIP(5);
4090 return(XML_ATTRIBUTE_IDREF);
4091 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4092 SKIP(2);
4093 return(XML_ATTRIBUTE_ID);
4094 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4095 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4096 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4097 SKIP(6);
4098 return(XML_ATTRIBUTE_ENTITY);
4099 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4100 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4101 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4102 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4103 SKIP(8);
4104 return(XML_ATTRIBUTE_ENTITIES);
4105 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4106 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4107 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4108 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4109 SKIP(8);
4110 return(XML_ATTRIBUTE_NMTOKENS);
4111 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4112 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4113 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4114 (NXT(6) == 'N')) {
4115 SKIP(7);
4116 return(XML_ATTRIBUTE_NMTOKEN);
4117 }
4118 return(xmlParseEnumeratedType(ctxt, tree));
4119}
4120
4121/**
4122 * xmlParseAttributeListDecl:
4123 * @ctxt: an XML parser context
4124 *
4125 * : parse the Attribute list def for an element
4126 *
4127 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4128 *
4129 * [53] AttDef ::= S Name S AttType S DefaultDecl
4130 *
4131 */
4132void
4133xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4134 xmlChar *elemName;
4135 xmlChar *attrName;
4136 xmlEnumerationPtr tree;
4137
4138 if ((RAW == '<') && (NXT(1) == '!') &&
4139 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4140 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4141 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4142 (NXT(8) == 'T')) {
4143 xmlParserInputPtr input = ctxt->input;
4144
4145 SKIP(9);
4146 if (!IS_BLANK(CUR)) {
4147 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4149 ctxt->sax->error(ctxt->userData,
4150 "Space required after '<!ATTLIST'\n");
4151 ctxt->wellFormed = 0;
4152 ctxt->disableSAX = 1;
4153 }
4154 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004155 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004156 if (elemName == NULL) {
4157 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4158 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4159 ctxt->sax->error(ctxt->userData,
4160 "ATTLIST: no name for Element\n");
4161 ctxt->wellFormed = 0;
4162 ctxt->disableSAX = 1;
4163 return;
4164 }
4165 SKIP_BLANKS;
4166 GROW;
4167 while (RAW != '>') {
4168 const xmlChar *check = CUR_PTR;
4169 int type;
4170 int def;
4171 xmlChar *defaultValue = NULL;
4172
4173 GROW;
4174 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004175 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004176 if (attrName == NULL) {
4177 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4179 ctxt->sax->error(ctxt->userData,
4180 "ATTLIST: no name for Attribute\n");
4181 ctxt->wellFormed = 0;
4182 ctxt->disableSAX = 1;
4183 break;
4184 }
4185 GROW;
4186 if (!IS_BLANK(CUR)) {
4187 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4189 ctxt->sax->error(ctxt->userData,
4190 "Space required after the attribute name\n");
4191 ctxt->wellFormed = 0;
4192 ctxt->disableSAX = 1;
4193 if (attrName != NULL)
4194 xmlFree(attrName);
4195 if (defaultValue != NULL)
4196 xmlFree(defaultValue);
4197 break;
4198 }
4199 SKIP_BLANKS;
4200
4201 type = xmlParseAttributeType(ctxt, &tree);
4202 if (type <= 0) {
4203 if (attrName != NULL)
4204 xmlFree(attrName);
4205 if (defaultValue != NULL)
4206 xmlFree(defaultValue);
4207 break;
4208 }
4209
4210 GROW;
4211 if (!IS_BLANK(CUR)) {
4212 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4214 ctxt->sax->error(ctxt->userData,
4215 "Space required after the attribute type\n");
4216 ctxt->wellFormed = 0;
4217 ctxt->disableSAX = 1;
4218 if (attrName != NULL)
4219 xmlFree(attrName);
4220 if (defaultValue != NULL)
4221 xmlFree(defaultValue);
4222 if (tree != NULL)
4223 xmlFreeEnumeration(tree);
4224 break;
4225 }
4226 SKIP_BLANKS;
4227
4228 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4229 if (def <= 0) {
4230 if (attrName != NULL)
4231 xmlFree(attrName);
4232 if (defaultValue != NULL)
4233 xmlFree(defaultValue);
4234 if (tree != NULL)
4235 xmlFreeEnumeration(tree);
4236 break;
4237 }
4238
4239 GROW;
4240 if (RAW != '>') {
4241 if (!IS_BLANK(CUR)) {
4242 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4244 ctxt->sax->error(ctxt->userData,
4245 "Space required after the attribute default value\n");
4246 ctxt->wellFormed = 0;
4247 ctxt->disableSAX = 1;
4248 if (attrName != NULL)
4249 xmlFree(attrName);
4250 if (defaultValue != NULL)
4251 xmlFree(defaultValue);
4252 if (tree != NULL)
4253 xmlFreeEnumeration(tree);
4254 break;
4255 }
4256 SKIP_BLANKS;
4257 }
4258 if (check == CUR_PTR) {
4259 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4260 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4261 ctxt->sax->error(ctxt->userData,
4262 "xmlParseAttributeListDecl: detected internal error\n");
4263 if (attrName != NULL)
4264 xmlFree(attrName);
4265 if (defaultValue != NULL)
4266 xmlFree(defaultValue);
4267 if (tree != NULL)
4268 xmlFreeEnumeration(tree);
4269 break;
4270 }
4271 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4272 (ctxt->sax->attributeDecl != NULL))
4273 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4274 type, def, defaultValue, tree);
4275 if (attrName != NULL)
4276 xmlFree(attrName);
4277 if (defaultValue != NULL)
4278 xmlFree(defaultValue);
4279 GROW;
4280 }
4281 if (RAW == '>') {
4282 if (input != ctxt->input) {
4283 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4285 ctxt->sax->error(ctxt->userData,
4286"Attribute list declaration doesn't start and stop in the same entity\n");
4287 ctxt->wellFormed = 0;
4288 ctxt->disableSAX = 1;
4289 }
4290 NEXT;
4291 }
4292
4293 xmlFree(elemName);
4294 }
4295}
4296
4297/**
4298 * xmlParseElementMixedContentDecl:
4299 * @ctxt: an XML parser context
4300 *
4301 * parse the declaration for a Mixed Element content
4302 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4303 *
4304 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4305 * '(' S? '#PCDATA' S? ')'
4306 *
4307 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4308 *
4309 * [ VC: No Duplicate Types ]
4310 * The same name must not appear more than once in a single
4311 * mixed-content declaration.
4312 *
4313 * returns: the list of the xmlElementContentPtr describing the element choices
4314 */
4315xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004316xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004317 xmlElementContentPtr ret = NULL, cur = NULL, n;
4318 xmlChar *elem = NULL;
4319
4320 GROW;
4321 if ((RAW == '#') && (NXT(1) == 'P') &&
4322 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4323 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4324 (NXT(6) == 'A')) {
4325 SKIP(7);
4326 SKIP_BLANKS;
4327 SHRINK;
4328 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004329 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4330 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4331 if (ctxt->vctxt.error != NULL)
4332 ctxt->vctxt.error(ctxt->vctxt.userData,
4333"Element content declaration doesn't start and stop in the same entity\n");
4334 ctxt->valid = 0;
4335 }
Owen Taylor3473f882001-02-23 17:55:21 +00004336 NEXT;
4337 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4338 if (RAW == '*') {
4339 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4340 NEXT;
4341 }
4342 return(ret);
4343 }
4344 if ((RAW == '(') || (RAW == '|')) {
4345 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4346 if (ret == NULL) return(NULL);
4347 }
4348 while (RAW == '|') {
4349 NEXT;
4350 if (elem == NULL) {
4351 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4352 if (ret == NULL) return(NULL);
4353 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004354 if (cur != NULL)
4355 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004356 cur = ret;
4357 } else {
4358 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4359 if (n == NULL) return(NULL);
4360 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004361 if (n->c1 != NULL)
4362 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004363 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004364 if (n != NULL)
4365 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004366 cur = n;
4367 xmlFree(elem);
4368 }
4369 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004370 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004371 if (elem == NULL) {
4372 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4374 ctxt->sax->error(ctxt->userData,
4375 "xmlParseElementMixedContentDecl : Name expected\n");
4376 ctxt->wellFormed = 0;
4377 ctxt->disableSAX = 1;
4378 xmlFreeElementContent(cur);
4379 return(NULL);
4380 }
4381 SKIP_BLANKS;
4382 GROW;
4383 }
4384 if ((RAW == ')') && (NXT(1) == '*')) {
4385 if (elem != NULL) {
4386 cur->c2 = xmlNewElementContent(elem,
4387 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004388 if (cur->c2 != NULL)
4389 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004390 xmlFree(elem);
4391 }
4392 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004393 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4394 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4395 if (ctxt->vctxt.error != NULL)
4396 ctxt->vctxt.error(ctxt->vctxt.userData,
4397"Element content declaration doesn't start and stop in the same entity\n");
4398 ctxt->valid = 0;
4399 }
Owen Taylor3473f882001-02-23 17:55:21 +00004400 SKIP(2);
4401 } else {
4402 if (elem != NULL) xmlFree(elem);
4403 xmlFreeElementContent(ret);
4404 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4406 ctxt->sax->error(ctxt->userData,
4407 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4408 ctxt->wellFormed = 0;
4409 ctxt->disableSAX = 1;
4410 return(NULL);
4411 }
4412
4413 } else {
4414 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4416 ctxt->sax->error(ctxt->userData,
4417 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4418 ctxt->wellFormed = 0;
4419 ctxt->disableSAX = 1;
4420 }
4421 return(ret);
4422}
4423
4424/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004425 * xmlParseElementChildrenContentD:
4426 * @ctxt: an XML parser context
4427 *
4428 * VMS version of xmlParseElementChildrenContentDecl()
4429 *
4430 * Returns the tree of xmlElementContentPtr describing the element
4431 * hierarchy.
4432 */
4433/**
Owen Taylor3473f882001-02-23 17:55:21 +00004434 * xmlParseElementChildrenContentDecl:
4435 * @ctxt: an XML parser context
4436 *
4437 * parse the declaration for a Mixed Element content
4438 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4439 *
4440 *
4441 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4442 *
4443 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4444 *
4445 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4446 *
4447 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4448 *
4449 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4450 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004451 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004452 * opening or closing parentheses in a choice, seq, or Mixed
4453 * construct is contained in the replacement text for a parameter
4454 * entity, both must be contained in the same replacement text. For
4455 * interoperability, if a parameter-entity reference appears in a
4456 * choice, seq, or Mixed construct, its replacement text should not
4457 * be empty, and neither the first nor last non-blank character of
4458 * the replacement text should be a connector (| or ,).
4459 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004460 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004461 * hierarchy.
4462 */
4463xmlElementContentPtr
4464#ifdef VMS
4465xmlParseElementChildrenContentD
4466#else
4467xmlParseElementChildrenContentDecl
4468#endif
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004469(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004470 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4471 xmlChar *elem;
4472 xmlChar type = 0;
4473
4474 SKIP_BLANKS;
4475 GROW;
4476 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004477 xmlParserInputPtr input = ctxt->input;
4478
Owen Taylor3473f882001-02-23 17:55:21 +00004479 /* Recurse on first child */
4480 NEXT;
4481 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004482 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004483 SKIP_BLANKS;
4484 GROW;
4485 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004486 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004487 if (elem == NULL) {
4488 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4490 ctxt->sax->error(ctxt->userData,
4491 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4492 ctxt->wellFormed = 0;
4493 ctxt->disableSAX = 1;
4494 return(NULL);
4495 }
4496 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4497 GROW;
4498 if (RAW == '?') {
4499 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4500 NEXT;
4501 } else if (RAW == '*') {
4502 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4503 NEXT;
4504 } else if (RAW == '+') {
4505 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4506 NEXT;
4507 } else {
4508 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4509 }
4510 xmlFree(elem);
4511 GROW;
4512 }
4513 SKIP_BLANKS;
4514 SHRINK;
4515 while (RAW != ')') {
4516 /*
4517 * Each loop we parse one separator and one element.
4518 */
4519 if (RAW == ',') {
4520 if (type == 0) type = CUR;
4521
4522 /*
4523 * Detect "Name | Name , Name" error
4524 */
4525 else if (type != CUR) {
4526 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4527 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4528 ctxt->sax->error(ctxt->userData,
4529 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4530 type);
4531 ctxt->wellFormed = 0;
4532 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004533 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004534 xmlFreeElementContent(last);
4535 if (ret != NULL)
4536 xmlFreeElementContent(ret);
4537 return(NULL);
4538 }
4539 NEXT;
4540
4541 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4542 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004543 if ((last != NULL) && (last != ret))
4544 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004545 xmlFreeElementContent(ret);
4546 return(NULL);
4547 }
4548 if (last == NULL) {
4549 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004550 if (ret != NULL)
4551 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004552 ret = cur = op;
4553 } else {
4554 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004555 if (op != NULL)
4556 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004557 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004558 if (last != NULL)
4559 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004560 cur =op;
4561 last = NULL;
4562 }
4563 } else if (RAW == '|') {
4564 if (type == 0) type = CUR;
4565
4566 /*
4567 * Detect "Name , Name | Name" error
4568 */
4569 else if (type != CUR) {
4570 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4572 ctxt->sax->error(ctxt->userData,
4573 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4574 type);
4575 ctxt->wellFormed = 0;
4576 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004577 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004578 xmlFreeElementContent(last);
4579 if (ret != NULL)
4580 xmlFreeElementContent(ret);
4581 return(NULL);
4582 }
4583 NEXT;
4584
4585 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4586 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004587 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004588 xmlFreeElementContent(last);
4589 if (ret != NULL)
4590 xmlFreeElementContent(ret);
4591 return(NULL);
4592 }
4593 if (last == NULL) {
4594 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004595 if (ret != NULL)
4596 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004597 ret = cur = op;
4598 } else {
4599 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004600 if (op != NULL)
4601 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004602 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004603 if (last != NULL)
4604 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004605 cur =op;
4606 last = NULL;
4607 }
4608 } else {
4609 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4611 ctxt->sax->error(ctxt->userData,
4612 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4613 ctxt->wellFormed = 0;
4614 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004615 if (ret != NULL)
4616 xmlFreeElementContent(ret);
4617 return(NULL);
4618 }
4619 GROW;
4620 SKIP_BLANKS;
4621 GROW;
4622 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004623 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004624 /* Recurse on second child */
4625 NEXT;
4626 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004627 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004628 SKIP_BLANKS;
4629 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004630 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004631 if (elem == NULL) {
4632 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4634 ctxt->sax->error(ctxt->userData,
4635 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4636 ctxt->wellFormed = 0;
4637 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004638 if (ret != NULL)
4639 xmlFreeElementContent(ret);
4640 return(NULL);
4641 }
4642 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4643 xmlFree(elem);
4644 if (RAW == '?') {
4645 last->ocur = XML_ELEMENT_CONTENT_OPT;
4646 NEXT;
4647 } else if (RAW == '*') {
4648 last->ocur = XML_ELEMENT_CONTENT_MULT;
4649 NEXT;
4650 } else if (RAW == '+') {
4651 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4652 NEXT;
4653 } else {
4654 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4655 }
4656 }
4657 SKIP_BLANKS;
4658 GROW;
4659 }
4660 if ((cur != NULL) && (last != NULL)) {
4661 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004662 if (last != NULL)
4663 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004664 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004665 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4666 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4667 if (ctxt->vctxt.error != NULL)
4668 ctxt->vctxt.error(ctxt->vctxt.userData,
4669"Element content declaration doesn't start and stop in the same entity\n");
4670 ctxt->valid = 0;
4671 }
Owen Taylor3473f882001-02-23 17:55:21 +00004672 NEXT;
4673 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004674 if (ret != NULL)
4675 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004676 NEXT;
4677 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004678 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004679 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004680 cur = ret;
4681 /*
4682 * Some normalization:
4683 * (a | b* | c?)* == (a | b | c)*
4684 */
4685 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4686 if ((cur->c1 != NULL) &&
4687 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4688 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4689 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4690 if ((cur->c2 != NULL) &&
4691 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4692 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4693 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4694 cur = cur->c2;
4695 }
4696 }
Owen Taylor3473f882001-02-23 17:55:21 +00004697 NEXT;
4698 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004699 if (ret != NULL) {
4700 int found = 0;
4701
Daniel Veillarde470df72001-04-18 21:41:07 +00004702 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004703 /*
4704 * Some normalization:
4705 * (a | b*)+ == (a | b)*
4706 * (a | b?)+ == (a | b)*
4707 */
4708 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4709 if ((cur->c1 != NULL) &&
4710 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4711 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4712 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4713 found = 1;
4714 }
4715 if ((cur->c2 != NULL) &&
4716 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4717 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4718 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4719 found = 1;
4720 }
4721 cur = cur->c2;
4722 }
4723 if (found)
4724 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4725 }
Owen Taylor3473f882001-02-23 17:55:21 +00004726 NEXT;
4727 }
4728 return(ret);
4729}
4730
4731/**
4732 * xmlParseElementContentDecl:
4733 * @ctxt: an XML parser context
4734 * @name: the name of the element being defined.
4735 * @result: the Element Content pointer will be stored here if any
4736 *
4737 * parse the declaration for an Element content either Mixed or Children,
4738 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4739 *
4740 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4741 *
4742 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4743 */
4744
4745int
4746xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4747 xmlElementContentPtr *result) {
4748
4749 xmlElementContentPtr tree = NULL;
4750 xmlParserInputPtr input = ctxt->input;
4751 int res;
4752
4753 *result = NULL;
4754
4755 if (RAW != '(') {
4756 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4758 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004759 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004760 ctxt->wellFormed = 0;
4761 ctxt->disableSAX = 1;
4762 return(-1);
4763 }
4764 NEXT;
4765 GROW;
4766 SKIP_BLANKS;
4767 if ((RAW == '#') && (NXT(1) == 'P') &&
4768 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4769 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4770 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004771 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004772 res = XML_ELEMENT_TYPE_MIXED;
4773 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004774 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004775 res = XML_ELEMENT_TYPE_ELEMENT;
4776 }
Owen Taylor3473f882001-02-23 17:55:21 +00004777 SKIP_BLANKS;
4778 *result = tree;
4779 return(res);
4780}
4781
4782/**
4783 * xmlParseElementDecl:
4784 * @ctxt: an XML parser context
4785 *
4786 * parse an Element declaration.
4787 *
4788 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4789 *
4790 * [ VC: Unique Element Type Declaration ]
4791 * No element type may be declared more than once
4792 *
4793 * Returns the type of the element, or -1 in case of error
4794 */
4795int
4796xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4797 xmlChar *name;
4798 int ret = -1;
4799 xmlElementContentPtr content = NULL;
4800
4801 GROW;
4802 if ((RAW == '<') && (NXT(1) == '!') &&
4803 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4804 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4805 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4806 (NXT(8) == 'T')) {
4807 xmlParserInputPtr input = ctxt->input;
4808
4809 SKIP(9);
4810 if (!IS_BLANK(CUR)) {
4811 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4813 ctxt->sax->error(ctxt->userData,
4814 "Space required after 'ELEMENT'\n");
4815 ctxt->wellFormed = 0;
4816 ctxt->disableSAX = 1;
4817 }
4818 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004819 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004820 if (name == NULL) {
4821 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4823 ctxt->sax->error(ctxt->userData,
4824 "xmlParseElementDecl: no name for Element\n");
4825 ctxt->wellFormed = 0;
4826 ctxt->disableSAX = 1;
4827 return(-1);
4828 }
4829 while ((RAW == 0) && (ctxt->inputNr > 1))
4830 xmlPopInput(ctxt);
4831 if (!IS_BLANK(CUR)) {
4832 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4834 ctxt->sax->error(ctxt->userData,
4835 "Space required after the element name\n");
4836 ctxt->wellFormed = 0;
4837 ctxt->disableSAX = 1;
4838 }
4839 SKIP_BLANKS;
4840 if ((RAW == 'E') && (NXT(1) == 'M') &&
4841 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4842 (NXT(4) == 'Y')) {
4843 SKIP(5);
4844 /*
4845 * Element must always be empty.
4846 */
4847 ret = XML_ELEMENT_TYPE_EMPTY;
4848 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4849 (NXT(2) == 'Y')) {
4850 SKIP(3);
4851 /*
4852 * Element is a generic container.
4853 */
4854 ret = XML_ELEMENT_TYPE_ANY;
4855 } else if (RAW == '(') {
4856 ret = xmlParseElementContentDecl(ctxt, name, &content);
4857 } else {
4858 /*
4859 * [ WFC: PEs in Internal Subset ] error handling.
4860 */
4861 if ((RAW == '%') && (ctxt->external == 0) &&
4862 (ctxt->inputNr == 1)) {
4863 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4865 ctxt->sax->error(ctxt->userData,
4866 "PEReference: forbidden within markup decl in internal subset\n");
4867 } else {
4868 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4870 ctxt->sax->error(ctxt->userData,
4871 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4872 }
4873 ctxt->wellFormed = 0;
4874 ctxt->disableSAX = 1;
4875 if (name != NULL) xmlFree(name);
4876 return(-1);
4877 }
4878
4879 SKIP_BLANKS;
4880 /*
4881 * Pop-up of finished entities.
4882 */
4883 while ((RAW == 0) && (ctxt->inputNr > 1))
4884 xmlPopInput(ctxt);
4885 SKIP_BLANKS;
4886
4887 if (RAW != '>') {
4888 ctxt->errNo = XML_ERR_GT_REQUIRED;
4889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4890 ctxt->sax->error(ctxt->userData,
4891 "xmlParseElementDecl: expected '>' at the end\n");
4892 ctxt->wellFormed = 0;
4893 ctxt->disableSAX = 1;
4894 } else {
4895 if (input != ctxt->input) {
4896 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4898 ctxt->sax->error(ctxt->userData,
4899"Element declaration doesn't start and stop in the same entity\n");
4900 ctxt->wellFormed = 0;
4901 ctxt->disableSAX = 1;
4902 }
4903
4904 NEXT;
4905 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4906 (ctxt->sax->elementDecl != NULL))
4907 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4908 content);
4909 }
4910 if (content != NULL) {
4911 xmlFreeElementContent(content);
4912 }
4913 if (name != NULL) {
4914 xmlFree(name);
4915 }
4916 }
4917 return(ret);
4918}
4919
4920/**
Owen Taylor3473f882001-02-23 17:55:21 +00004921 * xmlParseConditionalSections
4922 * @ctxt: an XML parser context
4923 *
4924 * [61] conditionalSect ::= includeSect | ignoreSect
4925 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4926 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4927 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4928 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4929 */
4930
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004931static void
Owen Taylor3473f882001-02-23 17:55:21 +00004932xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4933 SKIP(3);
4934 SKIP_BLANKS;
4935 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4936 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4937 (NXT(6) == 'E')) {
4938 SKIP(7);
4939 SKIP_BLANKS;
4940 if (RAW != '[') {
4941 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4943 ctxt->sax->error(ctxt->userData,
4944 "XML conditional section '[' expected\n");
4945 ctxt->wellFormed = 0;
4946 ctxt->disableSAX = 1;
4947 } else {
4948 NEXT;
4949 }
4950 if (xmlParserDebugEntities) {
4951 if ((ctxt->input != NULL) && (ctxt->input->filename))
4952 xmlGenericError(xmlGenericErrorContext,
4953 "%s(%d): ", ctxt->input->filename,
4954 ctxt->input->line);
4955 xmlGenericError(xmlGenericErrorContext,
4956 "Entering INCLUDE Conditional Section\n");
4957 }
4958
4959 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4960 (NXT(2) != '>'))) {
4961 const xmlChar *check = CUR_PTR;
4962 int cons = ctxt->input->consumed;
4963 int tok = ctxt->token;
4964
4965 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4966 xmlParseConditionalSections(ctxt);
4967 } else if (IS_BLANK(CUR)) {
4968 NEXT;
4969 } else if (RAW == '%') {
4970 xmlParsePEReference(ctxt);
4971 } else
4972 xmlParseMarkupDecl(ctxt);
4973
4974 /*
4975 * Pop-up of finished entities.
4976 */
4977 while ((RAW == 0) && (ctxt->inputNr > 1))
4978 xmlPopInput(ctxt);
4979
4980 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4981 (tok == ctxt->token)) {
4982 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4984 ctxt->sax->error(ctxt->userData,
4985 "Content error in the external subset\n");
4986 ctxt->wellFormed = 0;
4987 ctxt->disableSAX = 1;
4988 break;
4989 }
4990 }
4991 if (xmlParserDebugEntities) {
4992 if ((ctxt->input != NULL) && (ctxt->input->filename))
4993 xmlGenericError(xmlGenericErrorContext,
4994 "%s(%d): ", ctxt->input->filename,
4995 ctxt->input->line);
4996 xmlGenericError(xmlGenericErrorContext,
4997 "Leaving INCLUDE Conditional Section\n");
4998 }
4999
5000 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5001 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5002 int state;
5003 int instate;
5004 int depth = 0;
5005
5006 SKIP(6);
5007 SKIP_BLANKS;
5008 if (RAW != '[') {
5009 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5011 ctxt->sax->error(ctxt->userData,
5012 "XML conditional section '[' expected\n");
5013 ctxt->wellFormed = 0;
5014 ctxt->disableSAX = 1;
5015 } else {
5016 NEXT;
5017 }
5018 if (xmlParserDebugEntities) {
5019 if ((ctxt->input != NULL) && (ctxt->input->filename))
5020 xmlGenericError(xmlGenericErrorContext,
5021 "%s(%d): ", ctxt->input->filename,
5022 ctxt->input->line);
5023 xmlGenericError(xmlGenericErrorContext,
5024 "Entering IGNORE Conditional Section\n");
5025 }
5026
5027 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005028 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005029 * But disable SAX event generating DTD building in the meantime
5030 */
5031 state = ctxt->disableSAX;
5032 instate = ctxt->instate;
5033 ctxt->disableSAX = 1;
5034 ctxt->instate = XML_PARSER_IGNORE;
5035
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005036 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005037 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5038 depth++;
5039 SKIP(3);
5040 continue;
5041 }
5042 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5043 if (--depth >= 0) SKIP(3);
5044 continue;
5045 }
5046 NEXT;
5047 continue;
5048 }
5049
5050 ctxt->disableSAX = state;
5051 ctxt->instate = instate;
5052
5053 if (xmlParserDebugEntities) {
5054 if ((ctxt->input != NULL) && (ctxt->input->filename))
5055 xmlGenericError(xmlGenericErrorContext,
5056 "%s(%d): ", ctxt->input->filename,
5057 ctxt->input->line);
5058 xmlGenericError(xmlGenericErrorContext,
5059 "Leaving IGNORE Conditional Section\n");
5060 }
5061
5062 } else {
5063 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5064 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5065 ctxt->sax->error(ctxt->userData,
5066 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5067 ctxt->wellFormed = 0;
5068 ctxt->disableSAX = 1;
5069 }
5070
5071 if (RAW == 0)
5072 SHRINK;
5073
5074 if (RAW == 0) {
5075 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5076 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5077 ctxt->sax->error(ctxt->userData,
5078 "XML conditional section not closed\n");
5079 ctxt->wellFormed = 0;
5080 ctxt->disableSAX = 1;
5081 } else {
5082 SKIP(3);
5083 }
5084}
5085
5086/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005087 * xmlParseMarkupDecl:
5088 * @ctxt: an XML parser context
5089 *
5090 * parse Markup declarations
5091 *
5092 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5093 * NotationDecl | PI | Comment
5094 *
5095 * [ VC: Proper Declaration/PE Nesting ]
5096 * Parameter-entity replacement text must be properly nested with
5097 * markup declarations. That is to say, if either the first character
5098 * or the last character of a markup declaration (markupdecl above) is
5099 * contained in the replacement text for a parameter-entity reference,
5100 * both must be contained in the same replacement text.
5101 *
5102 * [ WFC: PEs in Internal Subset ]
5103 * In the internal DTD subset, parameter-entity references can occur
5104 * only where markup declarations can occur, not within markup declarations.
5105 * (This does not apply to references that occur in external parameter
5106 * entities or to the external subset.)
5107 */
5108void
5109xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5110 GROW;
5111 xmlParseElementDecl(ctxt);
5112 xmlParseAttributeListDecl(ctxt);
5113 xmlParseEntityDecl(ctxt);
5114 xmlParseNotationDecl(ctxt);
5115 xmlParsePI(ctxt);
5116 xmlParseComment(ctxt);
5117 /*
5118 * This is only for internal subset. On external entities,
5119 * the replacement is done before parsing stage
5120 */
5121 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5122 xmlParsePEReference(ctxt);
5123
5124 /*
5125 * Conditional sections are allowed from entities included
5126 * by PE References in the internal subset.
5127 */
5128 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5129 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5130 xmlParseConditionalSections(ctxt);
5131 }
5132 }
5133
5134 ctxt->instate = XML_PARSER_DTD;
5135}
5136
5137/**
5138 * xmlParseTextDecl:
5139 * @ctxt: an XML parser context
5140 *
5141 * parse an XML declaration header for external entities
5142 *
5143 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5144 *
5145 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5146 */
5147
5148void
5149xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5150 xmlChar *version;
5151
5152 /*
5153 * We know that '<?xml' is here.
5154 */
5155 if ((RAW == '<') && (NXT(1) == '?') &&
5156 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5157 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5158 SKIP(5);
5159 } else {
5160 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5162 ctxt->sax->error(ctxt->userData,
5163 "Text declaration '<?xml' required\n");
5164 ctxt->wellFormed = 0;
5165 ctxt->disableSAX = 1;
5166
5167 return;
5168 }
5169
5170 if (!IS_BLANK(CUR)) {
5171 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5173 ctxt->sax->error(ctxt->userData,
5174 "Space needed after '<?xml'\n");
5175 ctxt->wellFormed = 0;
5176 ctxt->disableSAX = 1;
5177 }
5178 SKIP_BLANKS;
5179
5180 /*
5181 * We may have the VersionInfo here.
5182 */
5183 version = xmlParseVersionInfo(ctxt);
5184 if (version == NULL)
5185 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005186 else {
5187 if (!IS_BLANK(CUR)) {
5188 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5190 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5191 ctxt->wellFormed = 0;
5192 ctxt->disableSAX = 1;
5193 }
5194 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005195 ctxt->input->version = version;
5196
5197 /*
5198 * We must have the encoding declaration
5199 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005200 xmlParseEncodingDecl(ctxt);
5201 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5202 /*
5203 * The XML REC instructs us to stop parsing right here
5204 */
5205 return;
5206 }
5207
5208 SKIP_BLANKS;
5209 if ((RAW == '?') && (NXT(1) == '>')) {
5210 SKIP(2);
5211 } else if (RAW == '>') {
5212 /* Deprecated old WD ... */
5213 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5215 ctxt->sax->error(ctxt->userData,
5216 "XML declaration must end-up with '?>'\n");
5217 ctxt->wellFormed = 0;
5218 ctxt->disableSAX = 1;
5219 NEXT;
5220 } else {
5221 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5223 ctxt->sax->error(ctxt->userData,
5224 "parsing XML declaration: '?>' expected\n");
5225 ctxt->wellFormed = 0;
5226 ctxt->disableSAX = 1;
5227 MOVETO_ENDTAG(CUR_PTR);
5228 NEXT;
5229 }
5230}
5231
5232/**
Owen Taylor3473f882001-02-23 17:55:21 +00005233 * xmlParseExternalSubset:
5234 * @ctxt: an XML parser context
5235 * @ExternalID: the external identifier
5236 * @SystemID: the system identifier (or URL)
5237 *
5238 * parse Markup declarations from an external subset
5239 *
5240 * [30] extSubset ::= textDecl? extSubsetDecl
5241 *
5242 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5243 */
5244void
5245xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5246 const xmlChar *SystemID) {
5247 GROW;
5248 if ((RAW == '<') && (NXT(1) == '?') &&
5249 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5250 (NXT(4) == 'l')) {
5251 xmlParseTextDecl(ctxt);
5252 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5253 /*
5254 * The XML REC instructs us to stop parsing right here
5255 */
5256 ctxt->instate = XML_PARSER_EOF;
5257 return;
5258 }
5259 }
5260 if (ctxt->myDoc == NULL) {
5261 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5262 }
5263 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5264 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5265
5266 ctxt->instate = XML_PARSER_DTD;
5267 ctxt->external = 1;
5268 while (((RAW == '<') && (NXT(1) == '?')) ||
5269 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005270 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005271 const xmlChar *check = CUR_PTR;
5272 int cons = ctxt->input->consumed;
5273 int tok = ctxt->token;
5274
5275 GROW;
5276 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5277 xmlParseConditionalSections(ctxt);
5278 } else if (IS_BLANK(CUR)) {
5279 NEXT;
5280 } else if (RAW == '%') {
5281 xmlParsePEReference(ctxt);
5282 } else
5283 xmlParseMarkupDecl(ctxt);
5284
5285 /*
5286 * Pop-up of finished entities.
5287 */
5288 while ((RAW == 0) && (ctxt->inputNr > 1))
5289 xmlPopInput(ctxt);
5290
5291 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5292 (tok == ctxt->token)) {
5293 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5295 ctxt->sax->error(ctxt->userData,
5296 "Content error in the external subset\n");
5297 ctxt->wellFormed = 0;
5298 ctxt->disableSAX = 1;
5299 break;
5300 }
5301 }
5302
5303 if (RAW != 0) {
5304 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5306 ctxt->sax->error(ctxt->userData,
5307 "Extra content at the end of the document\n");
5308 ctxt->wellFormed = 0;
5309 ctxt->disableSAX = 1;
5310 }
5311
5312}
5313
5314/**
5315 * xmlParseReference:
5316 * @ctxt: an XML parser context
5317 *
5318 * parse and handle entity references in content, depending on the SAX
5319 * interface, this may end-up in a call to character() if this is a
5320 * CharRef, a predefined entity, if there is no reference() callback.
5321 * or if the parser was asked to switch to that mode.
5322 *
5323 * [67] Reference ::= EntityRef | CharRef
5324 */
5325void
5326xmlParseReference(xmlParserCtxtPtr ctxt) {
5327 xmlEntityPtr ent;
5328 xmlChar *val;
5329 if (RAW != '&') return;
5330
5331 if (NXT(1) == '#') {
5332 int i = 0;
5333 xmlChar out[10];
5334 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005335 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005336
5337 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5338 /*
5339 * So we are using non-UTF-8 buffers
5340 * Check that the char fit on 8bits, if not
5341 * generate a CharRef.
5342 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005343 if (value <= 0xFF) {
5344 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005345 out[1] = 0;
5346 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5347 (!ctxt->disableSAX))
5348 ctxt->sax->characters(ctxt->userData, out, 1);
5349 } else {
5350 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005351 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005352 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005353 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005354 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5355 (!ctxt->disableSAX))
5356 ctxt->sax->reference(ctxt->userData, out);
5357 }
5358 } else {
5359 /*
5360 * Just encode the value in UTF-8
5361 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005362 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005363 out[i] = 0;
5364 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5365 (!ctxt->disableSAX))
5366 ctxt->sax->characters(ctxt->userData, out, i);
5367 }
5368 } else {
5369 ent = xmlParseEntityRef(ctxt);
5370 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005371 if (!ctxt->wellFormed)
5372 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005373 if ((ent->name != NULL) &&
5374 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5375 xmlNodePtr list = NULL;
5376 int ret;
5377
5378
5379 /*
5380 * The first reference to the entity trigger a parsing phase
5381 * where the ent->children is filled with the result from
5382 * the parsing.
5383 */
5384 if (ent->children == NULL) {
5385 xmlChar *value;
5386 value = ent->content;
5387
5388 /*
5389 * Check that this entity is well formed
5390 */
5391 if ((value != NULL) &&
5392 (value[1] == 0) && (value[0] == '<') &&
5393 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5394 /*
5395 * DONE: get definite answer on this !!!
5396 * Lots of entity decls are used to declare a single
5397 * char
5398 * <!ENTITY lt "<">
5399 * Which seems to be valid since
5400 * 2.4: The ampersand character (&) and the left angle
5401 * bracket (<) may appear in their literal form only
5402 * when used ... They are also legal within the literal
5403 * entity value of an internal entity declaration;i
5404 * see "4.3.2 Well-Formed Parsed Entities".
5405 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5406 * Looking at the OASIS test suite and James Clark
5407 * tests, this is broken. However the XML REC uses
5408 * it. Is the XML REC not well-formed ????
5409 * This is a hack to avoid this problem
5410 *
5411 * ANSWER: since lt gt amp .. are already defined,
5412 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005413 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005414 * is lousy but acceptable.
5415 */
5416 list = xmlNewDocText(ctxt->myDoc, value);
5417 if (list != NULL) {
5418 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5419 (ent->children == NULL)) {
5420 ent->children = list;
5421 ent->last = list;
5422 list->parent = (xmlNodePtr) ent;
5423 } else {
5424 xmlFreeNodeList(list);
5425 }
5426 } else if (list != NULL) {
5427 xmlFreeNodeList(list);
5428 }
5429 } else {
5430 /*
5431 * 4.3.2: An internal general parsed entity is well-formed
5432 * if its replacement text matches the production labeled
5433 * content.
5434 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005435
5436 void *user_data;
5437 /*
5438 * This is a bit hackish but this seems the best
5439 * way to make sure both SAX and DOM entity support
5440 * behaves okay.
5441 */
5442 if (ctxt->userData == ctxt)
5443 user_data = NULL;
5444 else
5445 user_data = ctxt->userData;
5446
Owen Taylor3473f882001-02-23 17:55:21 +00005447 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5448 ctxt->depth++;
5449 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005450 ctxt->sax, user_data, ctxt->depth,
Owen Taylor3473f882001-02-23 17:55:21 +00005451 value, &list);
5452 ctxt->depth--;
5453 } else if (ent->etype ==
5454 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5455 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005456 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005457 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005458 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005459 ctxt->depth--;
5460 } else {
5461 ret = -1;
5462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5463 ctxt->sax->error(ctxt->userData,
5464 "Internal: invalid entity type\n");
5465 }
5466 if (ret == XML_ERR_ENTITY_LOOP) {
5467 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5469 ctxt->sax->error(ctxt->userData,
5470 "Detected entity reference loop\n");
5471 ctxt->wellFormed = 0;
5472 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005473 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005474 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005475 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5476 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005477 (ent->children == NULL)) {
5478 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005479 if (ctxt->replaceEntities) {
5480 /*
5481 * Prune it directly in the generated document
5482 * except for single text nodes.
5483 */
5484 if ((list->type == XML_TEXT_NODE) &&
5485 (list->next == NULL)) {
5486 list->parent = (xmlNodePtr) ent;
5487 list = NULL;
5488 } else {
5489 while (list != NULL) {
5490 list->parent = (xmlNodePtr) ctxt->node;
5491 if (list->next == NULL)
5492 ent->last = list;
5493 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005494 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005495 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005496 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5497 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005498 }
5499 } else {
5500 while (list != NULL) {
5501 list->parent = (xmlNodePtr) ent;
5502 if (list->next == NULL)
5503 ent->last = list;
5504 list = list->next;
5505 }
Owen Taylor3473f882001-02-23 17:55:21 +00005506 }
5507 } else {
5508 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005509 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005510 }
5511 } else if (ret > 0) {
5512 ctxt->errNo = ret;
5513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5514 ctxt->sax->error(ctxt->userData,
5515 "Entity value required\n");
5516 ctxt->wellFormed = 0;
5517 ctxt->disableSAX = 1;
5518 } else if (list != NULL) {
5519 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005520 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005521 }
5522 }
5523 }
5524 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5525 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5526 /*
5527 * Create a node.
5528 */
5529 ctxt->sax->reference(ctxt->userData, ent->name);
5530 return;
5531 } else if (ctxt->replaceEntities) {
5532 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5533 /*
5534 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005535 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005536 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005537 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005538 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005539 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005540 cur = ent->children;
5541 while (cur != NULL) {
5542 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005543 if (firstChild == NULL){
5544 firstChild = new;
5545 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005546 xmlAddChild(ctxt->node, new);
5547 if (cur == ent->last)
5548 break;
5549 cur = cur->next;
5550 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005551 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5552 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005553 } else {
5554 /*
5555 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005556 * node with a possible previous text one which
5557 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005558 */
5559 if (ent->children->type == XML_TEXT_NODE)
5560 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5561 if ((ent->last != ent->children) &&
5562 (ent->last->type == XML_TEXT_NODE))
5563 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5564 xmlAddChildList(ctxt->node, ent->children);
5565 }
5566
Owen Taylor3473f882001-02-23 17:55:21 +00005567 /*
5568 * This is to avoid a nasty side effect, see
5569 * characters() in SAX.c
5570 */
5571 ctxt->nodemem = 0;
5572 ctxt->nodelen = 0;
5573 return;
5574 } else {
5575 /*
5576 * Probably running in SAX mode
5577 */
5578 xmlParserInputPtr input;
5579
5580 input = xmlNewEntityInputStream(ctxt, ent);
5581 xmlPushInput(ctxt, input);
5582 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5583 (RAW == '<') && (NXT(1) == '?') &&
5584 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5585 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5586 xmlParseTextDecl(ctxt);
5587 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5588 /*
5589 * The XML REC instructs us to stop parsing right here
5590 */
5591 ctxt->instate = XML_PARSER_EOF;
5592 return;
5593 }
5594 if (input->standalone == 1) {
5595 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5597 ctxt->sax->error(ctxt->userData,
5598 "external parsed entities cannot be standalone\n");
5599 ctxt->wellFormed = 0;
5600 ctxt->disableSAX = 1;
5601 }
5602 }
5603 return;
5604 }
5605 }
5606 } else {
5607 val = ent->content;
5608 if (val == NULL) return;
5609 /*
5610 * inline the entity.
5611 */
5612 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5613 (!ctxt->disableSAX))
5614 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5615 }
5616 }
5617}
5618
5619/**
5620 * xmlParseEntityRef:
5621 * @ctxt: an XML parser context
5622 *
5623 * parse ENTITY references declarations
5624 *
5625 * [68] EntityRef ::= '&' Name ';'
5626 *
5627 * [ WFC: Entity Declared ]
5628 * In a document without any DTD, a document with only an internal DTD
5629 * subset which contains no parameter entity references, or a document
5630 * with "standalone='yes'", the Name given in the entity reference
5631 * must match that in an entity declaration, except that well-formed
5632 * documents need not declare any of the following entities: amp, lt,
5633 * gt, apos, quot. The declaration of a parameter entity must precede
5634 * any reference to it. Similarly, the declaration of a general entity
5635 * must precede any reference to it which appears in a default value in an
5636 * attribute-list declaration. Note that if entities are declared in the
5637 * external subset or in external parameter entities, a non-validating
5638 * processor is not obligated to read and process their declarations;
5639 * for such documents, the rule that an entity must be declared is a
5640 * well-formedness constraint only if standalone='yes'.
5641 *
5642 * [ WFC: Parsed Entity ]
5643 * An entity reference must not contain the name of an unparsed entity
5644 *
5645 * Returns the xmlEntityPtr if found, or NULL otherwise.
5646 */
5647xmlEntityPtr
5648xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5649 xmlChar *name;
5650 xmlEntityPtr ent = NULL;
5651
5652 GROW;
5653
5654 if (RAW == '&') {
5655 NEXT;
5656 name = xmlParseName(ctxt);
5657 if (name == NULL) {
5658 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5660 ctxt->sax->error(ctxt->userData,
5661 "xmlParseEntityRef: no name\n");
5662 ctxt->wellFormed = 0;
5663 ctxt->disableSAX = 1;
5664 } else {
5665 if (RAW == ';') {
5666 NEXT;
5667 /*
5668 * Ask first SAX for entity resolution, otherwise try the
5669 * predefined set.
5670 */
5671 if (ctxt->sax != NULL) {
5672 if (ctxt->sax->getEntity != NULL)
5673 ent = ctxt->sax->getEntity(ctxt->userData, name);
5674 if (ent == NULL)
5675 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005676 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5677 ent = getEntity(ctxt, name);
5678 }
Owen Taylor3473f882001-02-23 17:55:21 +00005679 }
5680 /*
5681 * [ WFC: Entity Declared ]
5682 * In a document without any DTD, a document with only an
5683 * internal DTD subset which contains no parameter entity
5684 * references, or a document with "standalone='yes'", the
5685 * Name given in the entity reference must match that in an
5686 * entity declaration, except that well-formed documents
5687 * need not declare any of the following entities: amp, lt,
5688 * gt, apos, quot.
5689 * The declaration of a parameter entity must precede any
5690 * reference to it.
5691 * Similarly, the declaration of a general entity must
5692 * precede any reference to it which appears in a default
5693 * value in an attribute-list declaration. Note that if
5694 * entities are declared in the external subset or in
5695 * external parameter entities, a non-validating processor
5696 * is not obligated to read and process their declarations;
5697 * for such documents, the rule that an entity must be
5698 * declared is a well-formedness constraint only if
5699 * standalone='yes'.
5700 */
5701 if (ent == NULL) {
5702 if ((ctxt->standalone == 1) ||
5703 ((ctxt->hasExternalSubset == 0) &&
5704 (ctxt->hasPErefs == 0))) {
5705 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5707 ctxt->sax->error(ctxt->userData,
5708 "Entity '%s' not defined\n", name);
5709 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005710 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005711 ctxt->disableSAX = 1;
5712 } else {
5713 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005715 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005716 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005717 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005718 }
5719 }
5720
5721 /*
5722 * [ WFC: Parsed Entity ]
5723 * An entity reference must not contain the name of an
5724 * unparsed entity
5725 */
5726 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5727 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5729 ctxt->sax->error(ctxt->userData,
5730 "Entity reference to unparsed entity %s\n", name);
5731 ctxt->wellFormed = 0;
5732 ctxt->disableSAX = 1;
5733 }
5734
5735 /*
5736 * [ WFC: No External Entity References ]
5737 * Attribute values cannot contain direct or indirect
5738 * entity references to external entities.
5739 */
5740 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5741 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5742 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5744 ctxt->sax->error(ctxt->userData,
5745 "Attribute references external entity '%s'\n", name);
5746 ctxt->wellFormed = 0;
5747 ctxt->disableSAX = 1;
5748 }
5749 /*
5750 * [ WFC: No < in Attribute Values ]
5751 * The replacement text of any entity referred to directly or
5752 * indirectly in an attribute value (other than "&lt;") must
5753 * not contain a <.
5754 */
5755 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5756 (ent != NULL) &&
5757 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5758 (ent->content != NULL) &&
5759 (xmlStrchr(ent->content, '<'))) {
5760 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5762 ctxt->sax->error(ctxt->userData,
5763 "'<' in entity '%s' is not allowed in attributes values\n", name);
5764 ctxt->wellFormed = 0;
5765 ctxt->disableSAX = 1;
5766 }
5767
5768 /*
5769 * Internal check, no parameter entities here ...
5770 */
5771 else {
5772 switch (ent->etype) {
5773 case XML_INTERNAL_PARAMETER_ENTITY:
5774 case XML_EXTERNAL_PARAMETER_ENTITY:
5775 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5777 ctxt->sax->error(ctxt->userData,
5778 "Attempt to reference the parameter entity '%s'\n", name);
5779 ctxt->wellFormed = 0;
5780 ctxt->disableSAX = 1;
5781 break;
5782 default:
5783 break;
5784 }
5785 }
5786
5787 /*
5788 * [ WFC: No Recursion ]
5789 * A parsed entity must not contain a recursive reference
5790 * to itself, either directly or indirectly.
5791 * Done somewhere else
5792 */
5793
5794 } else {
5795 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5797 ctxt->sax->error(ctxt->userData,
5798 "xmlParseEntityRef: expecting ';'\n");
5799 ctxt->wellFormed = 0;
5800 ctxt->disableSAX = 1;
5801 }
5802 xmlFree(name);
5803 }
5804 }
5805 return(ent);
5806}
5807
5808/**
5809 * xmlParseStringEntityRef:
5810 * @ctxt: an XML parser context
5811 * @str: a pointer to an index in the string
5812 *
5813 * parse ENTITY references declarations, but this version parses it from
5814 * a string value.
5815 *
5816 * [68] EntityRef ::= '&' Name ';'
5817 *
5818 * [ WFC: Entity Declared ]
5819 * In a document without any DTD, a document with only an internal DTD
5820 * subset which contains no parameter entity references, or a document
5821 * with "standalone='yes'", the Name given in the entity reference
5822 * must match that in an entity declaration, except that well-formed
5823 * documents need not declare any of the following entities: amp, lt,
5824 * gt, apos, quot. The declaration of a parameter entity must precede
5825 * any reference to it. Similarly, the declaration of a general entity
5826 * must precede any reference to it which appears in a default value in an
5827 * attribute-list declaration. Note that if entities are declared in the
5828 * external subset or in external parameter entities, a non-validating
5829 * processor is not obligated to read and process their declarations;
5830 * for such documents, the rule that an entity must be declared is a
5831 * well-formedness constraint only if standalone='yes'.
5832 *
5833 * [ WFC: Parsed Entity ]
5834 * An entity reference must not contain the name of an unparsed entity
5835 *
5836 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5837 * is updated to the current location in the string.
5838 */
5839xmlEntityPtr
5840xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5841 xmlChar *name;
5842 const xmlChar *ptr;
5843 xmlChar cur;
5844 xmlEntityPtr ent = NULL;
5845
5846 if ((str == NULL) || (*str == NULL))
5847 return(NULL);
5848 ptr = *str;
5849 cur = *ptr;
5850 if (cur == '&') {
5851 ptr++;
5852 cur = *ptr;
5853 name = xmlParseStringName(ctxt, &ptr);
5854 if (name == NULL) {
5855 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5857 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005858 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005859 ctxt->wellFormed = 0;
5860 ctxt->disableSAX = 1;
5861 } else {
5862 if (*ptr == ';') {
5863 ptr++;
5864 /*
5865 * Ask first SAX for entity resolution, otherwise try the
5866 * predefined set.
5867 */
5868 if (ctxt->sax != NULL) {
5869 if (ctxt->sax->getEntity != NULL)
5870 ent = ctxt->sax->getEntity(ctxt->userData, name);
5871 if (ent == NULL)
5872 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005873 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5874 ent = getEntity(ctxt, name);
5875 }
Owen Taylor3473f882001-02-23 17:55:21 +00005876 }
5877 /*
5878 * [ WFC: Entity Declared ]
5879 * In a document without any DTD, a document with only an
5880 * internal DTD subset which contains no parameter entity
5881 * references, or a document with "standalone='yes'", the
5882 * Name given in the entity reference must match that in an
5883 * entity declaration, except that well-formed documents
5884 * need not declare any of the following entities: amp, lt,
5885 * gt, apos, quot.
5886 * The declaration of a parameter entity must precede any
5887 * reference to it.
5888 * Similarly, the declaration of a general entity must
5889 * precede any reference to it which appears in a default
5890 * value in an attribute-list declaration. Note that if
5891 * entities are declared in the external subset or in
5892 * external parameter entities, a non-validating processor
5893 * is not obligated to read and process their declarations;
5894 * for such documents, the rule that an entity must be
5895 * declared is a well-formedness constraint only if
5896 * standalone='yes'.
5897 */
5898 if (ent == NULL) {
5899 if ((ctxt->standalone == 1) ||
5900 ((ctxt->hasExternalSubset == 0) &&
5901 (ctxt->hasPErefs == 0))) {
5902 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5904 ctxt->sax->error(ctxt->userData,
5905 "Entity '%s' not defined\n", name);
5906 ctxt->wellFormed = 0;
5907 ctxt->disableSAX = 1;
5908 } else {
5909 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5910 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5911 ctxt->sax->warning(ctxt->userData,
5912 "Entity '%s' not defined\n", name);
5913 }
5914 }
5915
5916 /*
5917 * [ WFC: Parsed Entity ]
5918 * An entity reference must not contain the name of an
5919 * unparsed entity
5920 */
5921 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5922 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5924 ctxt->sax->error(ctxt->userData,
5925 "Entity reference to unparsed entity %s\n", name);
5926 ctxt->wellFormed = 0;
5927 ctxt->disableSAX = 1;
5928 }
5929
5930 /*
5931 * [ WFC: No External Entity References ]
5932 * Attribute values cannot contain direct or indirect
5933 * entity references to external entities.
5934 */
5935 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5936 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5937 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5939 ctxt->sax->error(ctxt->userData,
5940 "Attribute references external entity '%s'\n", name);
5941 ctxt->wellFormed = 0;
5942 ctxt->disableSAX = 1;
5943 }
5944 /*
5945 * [ WFC: No < in Attribute Values ]
5946 * The replacement text of any entity referred to directly or
5947 * indirectly in an attribute value (other than "&lt;") must
5948 * not contain a <.
5949 */
5950 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5951 (ent != NULL) &&
5952 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5953 (ent->content != NULL) &&
5954 (xmlStrchr(ent->content, '<'))) {
5955 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5957 ctxt->sax->error(ctxt->userData,
5958 "'<' in entity '%s' is not allowed in attributes values\n", name);
5959 ctxt->wellFormed = 0;
5960 ctxt->disableSAX = 1;
5961 }
5962
5963 /*
5964 * Internal check, no parameter entities here ...
5965 */
5966 else {
5967 switch (ent->etype) {
5968 case XML_INTERNAL_PARAMETER_ENTITY:
5969 case XML_EXTERNAL_PARAMETER_ENTITY:
5970 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5972 ctxt->sax->error(ctxt->userData,
5973 "Attempt to reference the parameter entity '%s'\n", name);
5974 ctxt->wellFormed = 0;
5975 ctxt->disableSAX = 1;
5976 break;
5977 default:
5978 break;
5979 }
5980 }
5981
5982 /*
5983 * [ WFC: No Recursion ]
5984 * A parsed entity must not contain a recursive reference
5985 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005986 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005987 */
5988
5989 } else {
5990 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5992 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005993 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005994 ctxt->wellFormed = 0;
5995 ctxt->disableSAX = 1;
5996 }
5997 xmlFree(name);
5998 }
5999 }
6000 *str = ptr;
6001 return(ent);
6002}
6003
6004/**
6005 * xmlParsePEReference:
6006 * @ctxt: an XML parser context
6007 *
6008 * parse PEReference declarations
6009 * The entity content is handled directly by pushing it's content as
6010 * a new input stream.
6011 *
6012 * [69] PEReference ::= '%' Name ';'
6013 *
6014 * [ WFC: No Recursion ]
6015 * A parsed entity must not contain a recursive
6016 * reference to itself, either directly or indirectly.
6017 *
6018 * [ WFC: Entity Declared ]
6019 * In a document without any DTD, a document with only an internal DTD
6020 * subset which contains no parameter entity references, or a document
6021 * with "standalone='yes'", ... ... The declaration of a parameter
6022 * entity must precede any reference to it...
6023 *
6024 * [ VC: Entity Declared ]
6025 * In a document with an external subset or external parameter entities
6026 * with "standalone='no'", ... ... The declaration of a parameter entity
6027 * must precede any reference to it...
6028 *
6029 * [ WFC: In DTD ]
6030 * Parameter-entity references may only appear in the DTD.
6031 * NOTE: misleading but this is handled.
6032 */
6033void
6034xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6035 xmlChar *name;
6036 xmlEntityPtr entity = NULL;
6037 xmlParserInputPtr input;
6038
6039 if (RAW == '%') {
6040 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006041 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006042 if (name == NULL) {
6043 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6045 ctxt->sax->error(ctxt->userData,
6046 "xmlParsePEReference: no name\n");
6047 ctxt->wellFormed = 0;
6048 ctxt->disableSAX = 1;
6049 } else {
6050 if (RAW == ';') {
6051 NEXT;
6052 if ((ctxt->sax != NULL) &&
6053 (ctxt->sax->getParameterEntity != NULL))
6054 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6055 name);
6056 if (entity == NULL) {
6057 /*
6058 * [ WFC: Entity Declared ]
6059 * In a document without any DTD, a document with only an
6060 * internal DTD subset which contains no parameter entity
6061 * references, or a document with "standalone='yes'", ...
6062 * ... The declaration of a parameter entity must precede
6063 * any reference to it...
6064 */
6065 if ((ctxt->standalone == 1) ||
6066 ((ctxt->hasExternalSubset == 0) &&
6067 (ctxt->hasPErefs == 0))) {
6068 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6069 if ((!ctxt->disableSAX) &&
6070 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6071 ctxt->sax->error(ctxt->userData,
6072 "PEReference: %%%s; not found\n", name);
6073 ctxt->wellFormed = 0;
6074 ctxt->disableSAX = 1;
6075 } else {
6076 /*
6077 * [ VC: Entity Declared ]
6078 * In a document with an external subset or external
6079 * parameter entities with "standalone='no'", ...
6080 * ... The declaration of a parameter entity must precede
6081 * any reference to it...
6082 */
6083 if ((!ctxt->disableSAX) &&
6084 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6085 ctxt->sax->warning(ctxt->userData,
6086 "PEReference: %%%s; not found\n", name);
6087 ctxt->valid = 0;
6088 }
6089 } else {
6090 /*
6091 * Internal checking in case the entity quest barfed
6092 */
6093 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6094 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6095 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6096 ctxt->sax->warning(ctxt->userData,
6097 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006098 } else if (ctxt->input->free != deallocblankswrapper) {
6099 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6100 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006101 } else {
6102 /*
6103 * TODO !!!
6104 * handle the extra spaces added before and after
6105 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6106 */
6107 input = xmlNewEntityInputStream(ctxt, entity);
6108 xmlPushInput(ctxt, input);
6109 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6110 (RAW == '<') && (NXT(1) == '?') &&
6111 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6112 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6113 xmlParseTextDecl(ctxt);
6114 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6115 /*
6116 * The XML REC instructs us to stop parsing
6117 * right here
6118 */
6119 ctxt->instate = XML_PARSER_EOF;
6120 xmlFree(name);
6121 return;
6122 }
6123 }
Owen Taylor3473f882001-02-23 17:55:21 +00006124 }
6125 }
6126 ctxt->hasPErefs = 1;
6127 } else {
6128 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6130 ctxt->sax->error(ctxt->userData,
6131 "xmlParsePEReference: expecting ';'\n");
6132 ctxt->wellFormed = 0;
6133 ctxt->disableSAX = 1;
6134 }
6135 xmlFree(name);
6136 }
6137 }
6138}
6139
6140/**
6141 * xmlParseStringPEReference:
6142 * @ctxt: an XML parser context
6143 * @str: a pointer to an index in the string
6144 *
6145 * parse PEReference declarations
6146 *
6147 * [69] PEReference ::= '%' Name ';'
6148 *
6149 * [ WFC: No Recursion ]
6150 * A parsed entity must not contain a recursive
6151 * reference to itself, either directly or indirectly.
6152 *
6153 * [ WFC: Entity Declared ]
6154 * In a document without any DTD, a document with only an internal DTD
6155 * subset which contains no parameter entity references, or a document
6156 * with "standalone='yes'", ... ... The declaration of a parameter
6157 * entity must precede any reference to it...
6158 *
6159 * [ VC: Entity Declared ]
6160 * In a document with an external subset or external parameter entities
6161 * with "standalone='no'", ... ... The declaration of a parameter entity
6162 * must precede any reference to it...
6163 *
6164 * [ WFC: In DTD ]
6165 * Parameter-entity references may only appear in the DTD.
6166 * NOTE: misleading but this is handled.
6167 *
6168 * Returns the string of the entity content.
6169 * str is updated to the current value of the index
6170 */
6171xmlEntityPtr
6172xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6173 const xmlChar *ptr;
6174 xmlChar cur;
6175 xmlChar *name;
6176 xmlEntityPtr entity = NULL;
6177
6178 if ((str == NULL) || (*str == NULL)) return(NULL);
6179 ptr = *str;
6180 cur = *ptr;
6181 if (cur == '%') {
6182 ptr++;
6183 cur = *ptr;
6184 name = xmlParseStringName(ctxt, &ptr);
6185 if (name == NULL) {
6186 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6188 ctxt->sax->error(ctxt->userData,
6189 "xmlParseStringPEReference: no name\n");
6190 ctxt->wellFormed = 0;
6191 ctxt->disableSAX = 1;
6192 } else {
6193 cur = *ptr;
6194 if (cur == ';') {
6195 ptr++;
6196 cur = *ptr;
6197 if ((ctxt->sax != NULL) &&
6198 (ctxt->sax->getParameterEntity != NULL))
6199 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6200 name);
6201 if (entity == NULL) {
6202 /*
6203 * [ WFC: Entity Declared ]
6204 * In a document without any DTD, a document with only an
6205 * internal DTD subset which contains no parameter entity
6206 * references, or a document with "standalone='yes'", ...
6207 * ... The declaration of a parameter entity must precede
6208 * any reference to it...
6209 */
6210 if ((ctxt->standalone == 1) ||
6211 ((ctxt->hasExternalSubset == 0) &&
6212 (ctxt->hasPErefs == 0))) {
6213 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6215 ctxt->sax->error(ctxt->userData,
6216 "PEReference: %%%s; not found\n", name);
6217 ctxt->wellFormed = 0;
6218 ctxt->disableSAX = 1;
6219 } else {
6220 /*
6221 * [ VC: Entity Declared ]
6222 * In a document with an external subset or external
6223 * parameter entities with "standalone='no'", ...
6224 * ... The declaration of a parameter entity must
6225 * precede any reference to it...
6226 */
6227 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6228 ctxt->sax->warning(ctxt->userData,
6229 "PEReference: %%%s; not found\n", name);
6230 ctxt->valid = 0;
6231 }
6232 } else {
6233 /*
6234 * Internal checking in case the entity quest barfed
6235 */
6236 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6237 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6238 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6239 ctxt->sax->warning(ctxt->userData,
6240 "Internal: %%%s; is not a parameter entity\n", name);
6241 }
6242 }
6243 ctxt->hasPErefs = 1;
6244 } else {
6245 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6247 ctxt->sax->error(ctxt->userData,
6248 "xmlParseStringPEReference: expecting ';'\n");
6249 ctxt->wellFormed = 0;
6250 ctxt->disableSAX = 1;
6251 }
6252 xmlFree(name);
6253 }
6254 }
6255 *str = ptr;
6256 return(entity);
6257}
6258
6259/**
6260 * xmlParseDocTypeDecl:
6261 * @ctxt: an XML parser context
6262 *
6263 * parse a DOCTYPE declaration
6264 *
6265 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6266 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6267 *
6268 * [ VC: Root Element Type ]
6269 * The Name in the document type declaration must match the element
6270 * type of the root element.
6271 */
6272
6273void
6274xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6275 xmlChar *name = NULL;
6276 xmlChar *ExternalID = NULL;
6277 xmlChar *URI = NULL;
6278
6279 /*
6280 * We know that '<!DOCTYPE' has been detected.
6281 */
6282 SKIP(9);
6283
6284 SKIP_BLANKS;
6285
6286 /*
6287 * Parse the DOCTYPE name.
6288 */
6289 name = xmlParseName(ctxt);
6290 if (name == NULL) {
6291 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6292 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6293 ctxt->sax->error(ctxt->userData,
6294 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6295 ctxt->wellFormed = 0;
6296 ctxt->disableSAX = 1;
6297 }
6298 ctxt->intSubName = name;
6299
6300 SKIP_BLANKS;
6301
6302 /*
6303 * Check for SystemID and ExternalID
6304 */
6305 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6306
6307 if ((URI != NULL) || (ExternalID != NULL)) {
6308 ctxt->hasExternalSubset = 1;
6309 }
6310 ctxt->extSubURI = URI;
6311 ctxt->extSubSystem = ExternalID;
6312
6313 SKIP_BLANKS;
6314
6315 /*
6316 * Create and update the internal subset.
6317 */
6318 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6319 (!ctxt->disableSAX))
6320 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6321
6322 /*
6323 * Is there any internal subset declarations ?
6324 * they are handled separately in xmlParseInternalSubset()
6325 */
6326 if (RAW == '[')
6327 return;
6328
6329 /*
6330 * We should be at the end of the DOCTYPE declaration.
6331 */
6332 if (RAW != '>') {
6333 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006335 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006336 ctxt->wellFormed = 0;
6337 ctxt->disableSAX = 1;
6338 }
6339 NEXT;
6340}
6341
6342/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006343 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006344 * @ctxt: an XML parser context
6345 *
6346 * parse the internal subset declaration
6347 *
6348 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6349 */
6350
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006351static void
Owen Taylor3473f882001-02-23 17:55:21 +00006352xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6353 /*
6354 * Is there any DTD definition ?
6355 */
6356 if (RAW == '[') {
6357 ctxt->instate = XML_PARSER_DTD;
6358 NEXT;
6359 /*
6360 * Parse the succession of Markup declarations and
6361 * PEReferences.
6362 * Subsequence (markupdecl | PEReference | S)*
6363 */
6364 while (RAW != ']') {
6365 const xmlChar *check = CUR_PTR;
6366 int cons = ctxt->input->consumed;
6367
6368 SKIP_BLANKS;
6369 xmlParseMarkupDecl(ctxt);
6370 xmlParsePEReference(ctxt);
6371
6372 /*
6373 * Pop-up of finished entities.
6374 */
6375 while ((RAW == 0) && (ctxt->inputNr > 1))
6376 xmlPopInput(ctxt);
6377
6378 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6379 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6381 ctxt->sax->error(ctxt->userData,
6382 "xmlParseInternalSubset: error detected in Markup declaration\n");
6383 ctxt->wellFormed = 0;
6384 ctxt->disableSAX = 1;
6385 break;
6386 }
6387 }
6388 if (RAW == ']') {
6389 NEXT;
6390 SKIP_BLANKS;
6391 }
6392 }
6393
6394 /*
6395 * We should be at the end of the DOCTYPE declaration.
6396 */
6397 if (RAW != '>') {
6398 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6399 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006400 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006401 ctxt->wellFormed = 0;
6402 ctxt->disableSAX = 1;
6403 }
6404 NEXT;
6405}
6406
6407/**
6408 * xmlParseAttribute:
6409 * @ctxt: an XML parser context
6410 * @value: a xmlChar ** used to store the value of the attribute
6411 *
6412 * parse an attribute
6413 *
6414 * [41] Attribute ::= Name Eq AttValue
6415 *
6416 * [ WFC: No External Entity References ]
6417 * Attribute values cannot contain direct or indirect entity references
6418 * to external entities.
6419 *
6420 * [ WFC: No < in Attribute Values ]
6421 * The replacement text of any entity referred to directly or indirectly in
6422 * an attribute value (other than "&lt;") must not contain a <.
6423 *
6424 * [ VC: Attribute Value Type ]
6425 * The attribute must have been declared; the value must be of the type
6426 * declared for it.
6427 *
6428 * [25] Eq ::= S? '=' S?
6429 *
6430 * With namespace:
6431 *
6432 * [NS 11] Attribute ::= QName Eq AttValue
6433 *
6434 * Also the case QName == xmlns:??? is handled independently as a namespace
6435 * definition.
6436 *
6437 * Returns the attribute name, and the value in *value.
6438 */
6439
6440xmlChar *
6441xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6442 xmlChar *name, *val;
6443
6444 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006445 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006446 name = xmlParseName(ctxt);
6447 if (name == NULL) {
6448 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6450 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6451 ctxt->wellFormed = 0;
6452 ctxt->disableSAX = 1;
6453 return(NULL);
6454 }
6455
6456 /*
6457 * read the value
6458 */
6459 SKIP_BLANKS;
6460 if (RAW == '=') {
6461 NEXT;
6462 SKIP_BLANKS;
6463 val = xmlParseAttValue(ctxt);
6464 ctxt->instate = XML_PARSER_CONTENT;
6465 } else {
6466 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6468 ctxt->sax->error(ctxt->userData,
6469 "Specification mandate value for attribute %s\n", name);
6470 ctxt->wellFormed = 0;
6471 ctxt->disableSAX = 1;
6472 xmlFree(name);
6473 return(NULL);
6474 }
6475
6476 /*
6477 * Check that xml:lang conforms to the specification
6478 * No more registered as an error, just generate a warning now
6479 * since this was deprecated in XML second edition
6480 */
6481 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6482 if (!xmlCheckLanguageID(val)) {
6483 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6484 ctxt->sax->warning(ctxt->userData,
6485 "Malformed value for xml:lang : %s\n", val);
6486 }
6487 }
6488
6489 /*
6490 * Check that xml:space conforms to the specification
6491 */
6492 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6493 if (xmlStrEqual(val, BAD_CAST "default"))
6494 *(ctxt->space) = 0;
6495 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6496 *(ctxt->space) = 1;
6497 else {
6498 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6500 ctxt->sax->error(ctxt->userData,
6501"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6502 val);
6503 ctxt->wellFormed = 0;
6504 ctxt->disableSAX = 1;
6505 }
6506 }
6507
6508 *value = val;
6509 return(name);
6510}
6511
6512/**
6513 * xmlParseStartTag:
6514 * @ctxt: an XML parser context
6515 *
6516 * parse a start of tag either for rule element or
6517 * EmptyElement. In both case we don't parse the tag closing chars.
6518 *
6519 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6520 *
6521 * [ WFC: Unique Att Spec ]
6522 * No attribute name may appear more than once in the same start-tag or
6523 * empty-element tag.
6524 *
6525 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6526 *
6527 * [ WFC: Unique Att Spec ]
6528 * No attribute name may appear more than once in the same start-tag or
6529 * empty-element tag.
6530 *
6531 * With namespace:
6532 *
6533 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6534 *
6535 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6536 *
6537 * Returns the element name parsed
6538 */
6539
6540xmlChar *
6541xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6542 xmlChar *name;
6543 xmlChar *attname;
6544 xmlChar *attvalue;
6545 const xmlChar **atts = NULL;
6546 int nbatts = 0;
6547 int maxatts = 0;
6548 int i;
6549
6550 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006551 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006552
6553 name = xmlParseName(ctxt);
6554 if (name == NULL) {
6555 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6557 ctxt->sax->error(ctxt->userData,
6558 "xmlParseStartTag: invalid element name\n");
6559 ctxt->wellFormed = 0;
6560 ctxt->disableSAX = 1;
6561 return(NULL);
6562 }
6563
6564 /*
6565 * Now parse the attributes, it ends up with the ending
6566 *
6567 * (S Attribute)* S?
6568 */
6569 SKIP_BLANKS;
6570 GROW;
6571
Daniel Veillard21a0f912001-02-25 19:54:14 +00006572 while ((RAW != '>') &&
6573 ((RAW != '/') || (NXT(1) != '>')) &&
6574 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006575 const xmlChar *q = CUR_PTR;
6576 int cons = ctxt->input->consumed;
6577
6578 attname = xmlParseAttribute(ctxt, &attvalue);
6579 if ((attname != NULL) && (attvalue != NULL)) {
6580 /*
6581 * [ WFC: Unique Att Spec ]
6582 * No attribute name may appear more than once in the same
6583 * start-tag or empty-element tag.
6584 */
6585 for (i = 0; i < nbatts;i += 2) {
6586 if (xmlStrEqual(atts[i], attname)) {
6587 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6589 ctxt->sax->error(ctxt->userData,
6590 "Attribute %s redefined\n",
6591 attname);
6592 ctxt->wellFormed = 0;
6593 ctxt->disableSAX = 1;
6594 xmlFree(attname);
6595 xmlFree(attvalue);
6596 goto failed;
6597 }
6598 }
6599
6600 /*
6601 * Add the pair to atts
6602 */
6603 if (atts == NULL) {
6604 maxatts = 10;
6605 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6606 if (atts == NULL) {
6607 xmlGenericError(xmlGenericErrorContext,
6608 "malloc of %ld byte failed\n",
6609 maxatts * (long)sizeof(xmlChar *));
6610 return(NULL);
6611 }
6612 } else if (nbatts + 4 > maxatts) {
6613 maxatts *= 2;
6614 atts = (const xmlChar **) xmlRealloc((void *) atts,
6615 maxatts * sizeof(xmlChar *));
6616 if (atts == NULL) {
6617 xmlGenericError(xmlGenericErrorContext,
6618 "realloc of %ld byte failed\n",
6619 maxatts * (long)sizeof(xmlChar *));
6620 return(NULL);
6621 }
6622 }
6623 atts[nbatts++] = attname;
6624 atts[nbatts++] = attvalue;
6625 atts[nbatts] = NULL;
6626 atts[nbatts + 1] = NULL;
6627 } else {
6628 if (attname != NULL)
6629 xmlFree(attname);
6630 if (attvalue != NULL)
6631 xmlFree(attvalue);
6632 }
6633
6634failed:
6635
6636 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6637 break;
6638 if (!IS_BLANK(RAW)) {
6639 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6641 ctxt->sax->error(ctxt->userData,
6642 "attributes construct error\n");
6643 ctxt->wellFormed = 0;
6644 ctxt->disableSAX = 1;
6645 }
6646 SKIP_BLANKS;
6647 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6648 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6650 ctxt->sax->error(ctxt->userData,
6651 "xmlParseStartTag: problem parsing attributes\n");
6652 ctxt->wellFormed = 0;
6653 ctxt->disableSAX = 1;
6654 break;
6655 }
6656 GROW;
6657 }
6658
6659 /*
6660 * SAX: Start of Element !
6661 */
6662 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6663 (!ctxt->disableSAX))
6664 ctxt->sax->startElement(ctxt->userData, name, atts);
6665
6666 if (atts != NULL) {
6667 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6668 xmlFree((void *) atts);
6669 }
6670 return(name);
6671}
6672
6673/**
6674 * xmlParseEndTag:
6675 * @ctxt: an XML parser context
6676 *
6677 * parse an end of tag
6678 *
6679 * [42] ETag ::= '</' Name S? '>'
6680 *
6681 * With namespace
6682 *
6683 * [NS 9] ETag ::= '</' QName S? '>'
6684 */
6685
6686void
6687xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6688 xmlChar *name;
6689 xmlChar *oldname;
6690
6691 GROW;
6692 if ((RAW != '<') || (NXT(1) != '/')) {
6693 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6695 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6696 ctxt->wellFormed = 0;
6697 ctxt->disableSAX = 1;
6698 return;
6699 }
6700 SKIP(2);
6701
Daniel Veillard46de64e2002-05-29 08:21:33 +00006702 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006703
6704 /*
6705 * We should definitely be at the ending "S? '>'" part
6706 */
6707 GROW;
6708 SKIP_BLANKS;
6709 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6710 ctxt->errNo = XML_ERR_GT_REQUIRED;
6711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6712 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6713 ctxt->wellFormed = 0;
6714 ctxt->disableSAX = 1;
6715 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006716 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006717
6718 /*
6719 * [ WFC: Element Type Match ]
6720 * The Name in an element's end-tag must match the element type in the
6721 * start-tag.
6722 *
6723 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006724 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006725 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006727 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006728 ctxt->sax->error(ctxt->userData,
6729 "Opening and ending tag mismatch: %s and %s\n",
6730 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006731 xmlFree(name);
6732 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006733 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006734 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006735 }
6736
6737 }
6738 ctxt->wellFormed = 0;
6739 ctxt->disableSAX = 1;
6740 }
6741
6742 /*
6743 * SAX: End of Tag
6744 */
6745 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6746 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006747 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006748
Owen Taylor3473f882001-02-23 17:55:21 +00006749 oldname = namePop(ctxt);
6750 spacePop(ctxt);
6751 if (oldname != NULL) {
6752#ifdef DEBUG_STACK
6753 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6754#endif
6755 xmlFree(oldname);
6756 }
6757 return;
6758}
6759
6760/**
6761 * xmlParseCDSect:
6762 * @ctxt: an XML parser context
6763 *
6764 * Parse escaped pure raw content.
6765 *
6766 * [18] CDSect ::= CDStart CData CDEnd
6767 *
6768 * [19] CDStart ::= '<![CDATA['
6769 *
6770 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6771 *
6772 * [21] CDEnd ::= ']]>'
6773 */
6774void
6775xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6776 xmlChar *buf = NULL;
6777 int len = 0;
6778 int size = XML_PARSER_BUFFER_SIZE;
6779 int r, rl;
6780 int s, sl;
6781 int cur, l;
6782 int count = 0;
6783
6784 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6785 (NXT(2) == '[') && (NXT(3) == 'C') &&
6786 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6787 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6788 (NXT(8) == '[')) {
6789 SKIP(9);
6790 } else
6791 return;
6792
6793 ctxt->instate = XML_PARSER_CDATA_SECTION;
6794 r = CUR_CHAR(rl);
6795 if (!IS_CHAR(r)) {
6796 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6798 ctxt->sax->error(ctxt->userData,
6799 "CData section not finished\n");
6800 ctxt->wellFormed = 0;
6801 ctxt->disableSAX = 1;
6802 ctxt->instate = XML_PARSER_CONTENT;
6803 return;
6804 }
6805 NEXTL(rl);
6806 s = CUR_CHAR(sl);
6807 if (!IS_CHAR(s)) {
6808 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6810 ctxt->sax->error(ctxt->userData,
6811 "CData section not finished\n");
6812 ctxt->wellFormed = 0;
6813 ctxt->disableSAX = 1;
6814 ctxt->instate = XML_PARSER_CONTENT;
6815 return;
6816 }
6817 NEXTL(sl);
6818 cur = CUR_CHAR(l);
6819 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6820 if (buf == NULL) {
6821 xmlGenericError(xmlGenericErrorContext,
6822 "malloc of %d byte failed\n", size);
6823 return;
6824 }
6825 while (IS_CHAR(cur) &&
6826 ((r != ']') || (s != ']') || (cur != '>'))) {
6827 if (len + 5 >= size) {
6828 size *= 2;
6829 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6830 if (buf == NULL) {
6831 xmlGenericError(xmlGenericErrorContext,
6832 "realloc of %d byte failed\n", size);
6833 return;
6834 }
6835 }
6836 COPY_BUF(rl,buf,len,r);
6837 r = s;
6838 rl = sl;
6839 s = cur;
6840 sl = l;
6841 count++;
6842 if (count > 50) {
6843 GROW;
6844 count = 0;
6845 }
6846 NEXTL(l);
6847 cur = CUR_CHAR(l);
6848 }
6849 buf[len] = 0;
6850 ctxt->instate = XML_PARSER_CONTENT;
6851 if (cur != '>') {
6852 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6854 ctxt->sax->error(ctxt->userData,
6855 "CData section not finished\n%.50s\n", buf);
6856 ctxt->wellFormed = 0;
6857 ctxt->disableSAX = 1;
6858 xmlFree(buf);
6859 return;
6860 }
6861 NEXTL(l);
6862
6863 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006864 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006865 */
6866 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6867 if (ctxt->sax->cdataBlock != NULL)
6868 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006869 else if (ctxt->sax->characters != NULL)
6870 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006871 }
6872 xmlFree(buf);
6873}
6874
6875/**
6876 * xmlParseContent:
6877 * @ctxt: an XML parser context
6878 *
6879 * Parse a content:
6880 *
6881 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6882 */
6883
6884void
6885xmlParseContent(xmlParserCtxtPtr ctxt) {
6886 GROW;
Daniel Veillard561b7f82002-03-20 21:55:57 +00006887 while (((RAW != 0) || (ctxt->token != 0)) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006888 ((RAW != '<') || (NXT(1) != '/'))) {
6889 const xmlChar *test = CUR_PTR;
6890 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006891 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006892 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006893
6894 /*
6895 * Handle possible processed charrefs.
6896 */
6897 if (ctxt->token != 0) {
6898 xmlParseCharData(ctxt, 0);
6899 }
6900 /*
6901 * First case : a Processing Instruction.
6902 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006903 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006904 xmlParsePI(ctxt);
6905 }
6906
6907 /*
6908 * Second case : a CDSection
6909 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006910 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006911 (NXT(2) == '[') && (NXT(3) == 'C') &&
6912 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6913 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6914 (NXT(8) == '[')) {
6915 xmlParseCDSect(ctxt);
6916 }
6917
6918 /*
6919 * Third case : a comment
6920 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006921 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006922 (NXT(2) == '-') && (NXT(3) == '-')) {
6923 xmlParseComment(ctxt);
6924 ctxt->instate = XML_PARSER_CONTENT;
6925 }
6926
6927 /*
6928 * Fourth case : a sub-element.
6929 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006930 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006931 xmlParseElement(ctxt);
6932 }
6933
6934 /*
6935 * Fifth case : a reference. If if has not been resolved,
6936 * parsing returns it's Name, create the node
6937 */
6938
Daniel Veillard21a0f912001-02-25 19:54:14 +00006939 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006940 xmlParseReference(ctxt);
6941 }
6942
6943 /*
6944 * Last case, text. Note that References are handled directly.
6945 */
6946 else {
6947 xmlParseCharData(ctxt, 0);
6948 }
6949
6950 GROW;
6951 /*
6952 * Pop-up of finished entities.
6953 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00006954 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00006955 xmlPopInput(ctxt);
6956 SHRINK;
6957
6958 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6959 (tok == ctxt->token)) {
6960 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6962 ctxt->sax->error(ctxt->userData,
6963 "detected an error in element content\n");
6964 ctxt->wellFormed = 0;
6965 ctxt->disableSAX = 1;
6966 ctxt->instate = XML_PARSER_EOF;
6967 break;
6968 }
6969 }
6970}
6971
6972/**
6973 * xmlParseElement:
6974 * @ctxt: an XML parser context
6975 *
6976 * parse an XML element, this is highly recursive
6977 *
6978 * [39] element ::= EmptyElemTag | STag content ETag
6979 *
6980 * [ WFC: Element Type Match ]
6981 * The Name in an element's end-tag must match the element type in the
6982 * start-tag.
6983 *
6984 * [ VC: Element Valid ]
6985 * An element is valid if there is a declaration matching elementdecl
6986 * where the Name matches the element type and one of the following holds:
6987 * - The declaration matches EMPTY and the element has no content.
6988 * - The declaration matches children and the sequence of child elements
6989 * belongs to the language generated by the regular expression in the
6990 * content model, with optional white space (characters matching the
6991 * nonterminal S) between each pair of child elements.
6992 * - The declaration matches Mixed and the content consists of character
6993 * data and child elements whose types match names in the content model.
6994 * - The declaration matches ANY, and the types of any child elements have
6995 * been declared.
6996 */
6997
6998void
6999xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007000 xmlChar *name;
7001 xmlChar *oldname;
7002 xmlParserNodeInfo node_info;
7003 xmlNodePtr ret;
7004
7005 /* Capture start position */
7006 if (ctxt->record_info) {
7007 node_info.begin_pos = ctxt->input->consumed +
7008 (CUR_PTR - ctxt->input->base);
7009 node_info.begin_line = ctxt->input->line;
7010 }
7011
7012 if (ctxt->spaceNr == 0)
7013 spacePush(ctxt, -1);
7014 else
7015 spacePush(ctxt, *ctxt->space);
7016
7017 name = xmlParseStartTag(ctxt);
7018 if (name == NULL) {
7019 spacePop(ctxt);
7020 return;
7021 }
7022 namePush(ctxt, name);
7023 ret = ctxt->node;
7024
7025 /*
7026 * [ VC: Root Element Type ]
7027 * The Name in the document type declaration must match the element
7028 * type of the root element.
7029 */
7030 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7031 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7032 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7033
7034 /*
7035 * Check for an Empty Element.
7036 */
7037 if ((RAW == '/') && (NXT(1) == '>')) {
7038 SKIP(2);
7039 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7040 (!ctxt->disableSAX))
7041 ctxt->sax->endElement(ctxt->userData, name);
7042 oldname = namePop(ctxt);
7043 spacePop(ctxt);
7044 if (oldname != NULL) {
7045#ifdef DEBUG_STACK
7046 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7047#endif
7048 xmlFree(oldname);
7049 }
7050 if ( ret != NULL && ctxt->record_info ) {
7051 node_info.end_pos = ctxt->input->consumed +
7052 (CUR_PTR - ctxt->input->base);
7053 node_info.end_line = ctxt->input->line;
7054 node_info.node = ret;
7055 xmlParserAddNodeInfo(ctxt, &node_info);
7056 }
7057 return;
7058 }
7059 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007060 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007061 } else {
7062 ctxt->errNo = XML_ERR_GT_REQUIRED;
7063 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7064 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007065 "Couldn't find end of Start Tag %s\n",
7066 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007067 ctxt->wellFormed = 0;
7068 ctxt->disableSAX = 1;
7069
7070 /*
7071 * end of parsing of this node.
7072 */
7073 nodePop(ctxt);
7074 oldname = namePop(ctxt);
7075 spacePop(ctxt);
7076 if (oldname != NULL) {
7077#ifdef DEBUG_STACK
7078 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7079#endif
7080 xmlFree(oldname);
7081 }
7082
7083 /*
7084 * Capture end position and add node
7085 */
7086 if ( ret != NULL && ctxt->record_info ) {
7087 node_info.end_pos = ctxt->input->consumed +
7088 (CUR_PTR - ctxt->input->base);
7089 node_info.end_line = ctxt->input->line;
7090 node_info.node = ret;
7091 xmlParserAddNodeInfo(ctxt, &node_info);
7092 }
7093 return;
7094 }
7095
7096 /*
7097 * Parse the content of the element:
7098 */
7099 xmlParseContent(ctxt);
7100 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007101 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7103 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007104 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007105 ctxt->wellFormed = 0;
7106 ctxt->disableSAX = 1;
7107
7108 /*
7109 * end of parsing of this node.
7110 */
7111 nodePop(ctxt);
7112 oldname = namePop(ctxt);
7113 spacePop(ctxt);
7114 if (oldname != NULL) {
7115#ifdef DEBUG_STACK
7116 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7117#endif
7118 xmlFree(oldname);
7119 }
7120 return;
7121 }
7122
7123 /*
7124 * parse the end of tag: '</' should be here.
7125 */
7126 xmlParseEndTag(ctxt);
7127
7128 /*
7129 * Capture end position and add node
7130 */
7131 if ( ret != NULL && ctxt->record_info ) {
7132 node_info.end_pos = ctxt->input->consumed +
7133 (CUR_PTR - ctxt->input->base);
7134 node_info.end_line = ctxt->input->line;
7135 node_info.node = ret;
7136 xmlParserAddNodeInfo(ctxt, &node_info);
7137 }
7138}
7139
7140/**
7141 * xmlParseVersionNum:
7142 * @ctxt: an XML parser context
7143 *
7144 * parse the XML version value.
7145 *
7146 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7147 *
7148 * Returns the string giving the XML version number, or NULL
7149 */
7150xmlChar *
7151xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7152 xmlChar *buf = NULL;
7153 int len = 0;
7154 int size = 10;
7155 xmlChar cur;
7156
7157 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7158 if (buf == NULL) {
7159 xmlGenericError(xmlGenericErrorContext,
7160 "malloc of %d byte failed\n", size);
7161 return(NULL);
7162 }
7163 cur = CUR;
7164 while (((cur >= 'a') && (cur <= 'z')) ||
7165 ((cur >= 'A') && (cur <= 'Z')) ||
7166 ((cur >= '0') && (cur <= '9')) ||
7167 (cur == '_') || (cur == '.') ||
7168 (cur == ':') || (cur == '-')) {
7169 if (len + 1 >= size) {
7170 size *= 2;
7171 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7172 if (buf == NULL) {
7173 xmlGenericError(xmlGenericErrorContext,
7174 "realloc of %d byte failed\n", size);
7175 return(NULL);
7176 }
7177 }
7178 buf[len++] = cur;
7179 NEXT;
7180 cur=CUR;
7181 }
7182 buf[len] = 0;
7183 return(buf);
7184}
7185
7186/**
7187 * xmlParseVersionInfo:
7188 * @ctxt: an XML parser context
7189 *
7190 * parse the XML version.
7191 *
7192 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7193 *
7194 * [25] Eq ::= S? '=' S?
7195 *
7196 * Returns the version string, e.g. "1.0"
7197 */
7198
7199xmlChar *
7200xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7201 xmlChar *version = NULL;
7202 const xmlChar *q;
7203
7204 if ((RAW == 'v') && (NXT(1) == 'e') &&
7205 (NXT(2) == 'r') && (NXT(3) == 's') &&
7206 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7207 (NXT(6) == 'n')) {
7208 SKIP(7);
7209 SKIP_BLANKS;
7210 if (RAW != '=') {
7211 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7213 ctxt->sax->error(ctxt->userData,
7214 "xmlParseVersionInfo : expected '='\n");
7215 ctxt->wellFormed = 0;
7216 ctxt->disableSAX = 1;
7217 return(NULL);
7218 }
7219 NEXT;
7220 SKIP_BLANKS;
7221 if (RAW == '"') {
7222 NEXT;
7223 q = CUR_PTR;
7224 version = xmlParseVersionNum(ctxt);
7225 if (RAW != '"') {
7226 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7228 ctxt->sax->error(ctxt->userData,
7229 "String not closed\n%.50s\n", q);
7230 ctxt->wellFormed = 0;
7231 ctxt->disableSAX = 1;
7232 } else
7233 NEXT;
7234 } else if (RAW == '\''){
7235 NEXT;
7236 q = CUR_PTR;
7237 version = xmlParseVersionNum(ctxt);
7238 if (RAW != '\'') {
7239 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7241 ctxt->sax->error(ctxt->userData,
7242 "String not closed\n%.50s\n", q);
7243 ctxt->wellFormed = 0;
7244 ctxt->disableSAX = 1;
7245 } else
7246 NEXT;
7247 } else {
7248 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7250 ctxt->sax->error(ctxt->userData,
7251 "xmlParseVersionInfo : expected ' or \"\n");
7252 ctxt->wellFormed = 0;
7253 ctxt->disableSAX = 1;
7254 }
7255 }
7256 return(version);
7257}
7258
7259/**
7260 * xmlParseEncName:
7261 * @ctxt: an XML parser context
7262 *
7263 * parse the XML encoding name
7264 *
7265 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7266 *
7267 * Returns the encoding name value or NULL
7268 */
7269xmlChar *
7270xmlParseEncName(xmlParserCtxtPtr ctxt) {
7271 xmlChar *buf = NULL;
7272 int len = 0;
7273 int size = 10;
7274 xmlChar cur;
7275
7276 cur = CUR;
7277 if (((cur >= 'a') && (cur <= 'z')) ||
7278 ((cur >= 'A') && (cur <= 'Z'))) {
7279 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7280 if (buf == NULL) {
7281 xmlGenericError(xmlGenericErrorContext,
7282 "malloc of %d byte failed\n", size);
7283 return(NULL);
7284 }
7285
7286 buf[len++] = cur;
7287 NEXT;
7288 cur = CUR;
7289 while (((cur >= 'a') && (cur <= 'z')) ||
7290 ((cur >= 'A') && (cur <= 'Z')) ||
7291 ((cur >= '0') && (cur <= '9')) ||
7292 (cur == '.') || (cur == '_') ||
7293 (cur == '-')) {
7294 if (len + 1 >= size) {
7295 size *= 2;
7296 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7297 if (buf == NULL) {
7298 xmlGenericError(xmlGenericErrorContext,
7299 "realloc of %d byte failed\n", size);
7300 return(NULL);
7301 }
7302 }
7303 buf[len++] = cur;
7304 NEXT;
7305 cur = CUR;
7306 if (cur == 0) {
7307 SHRINK;
7308 GROW;
7309 cur = CUR;
7310 }
7311 }
7312 buf[len] = 0;
7313 } else {
7314 ctxt->errNo = XML_ERR_ENCODING_NAME;
7315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7316 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7317 ctxt->wellFormed = 0;
7318 ctxt->disableSAX = 1;
7319 }
7320 return(buf);
7321}
7322
7323/**
7324 * xmlParseEncodingDecl:
7325 * @ctxt: an XML parser context
7326 *
7327 * parse the XML encoding declaration
7328 *
7329 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7330 *
7331 * this setups the conversion filters.
7332 *
7333 * Returns the encoding value or NULL
7334 */
7335
7336xmlChar *
7337xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7338 xmlChar *encoding = NULL;
7339 const xmlChar *q;
7340
7341 SKIP_BLANKS;
7342 if ((RAW == 'e') && (NXT(1) == 'n') &&
7343 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7344 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7345 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7346 SKIP(8);
7347 SKIP_BLANKS;
7348 if (RAW != '=') {
7349 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7351 ctxt->sax->error(ctxt->userData,
7352 "xmlParseEncodingDecl : expected '='\n");
7353 ctxt->wellFormed = 0;
7354 ctxt->disableSAX = 1;
7355 return(NULL);
7356 }
7357 NEXT;
7358 SKIP_BLANKS;
7359 if (RAW == '"') {
7360 NEXT;
7361 q = CUR_PTR;
7362 encoding = xmlParseEncName(ctxt);
7363 if (RAW != '"') {
7364 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7366 ctxt->sax->error(ctxt->userData,
7367 "String not closed\n%.50s\n", q);
7368 ctxt->wellFormed = 0;
7369 ctxt->disableSAX = 1;
7370 } else
7371 NEXT;
7372 } else if (RAW == '\''){
7373 NEXT;
7374 q = CUR_PTR;
7375 encoding = xmlParseEncName(ctxt);
7376 if (RAW != '\'') {
7377 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7379 ctxt->sax->error(ctxt->userData,
7380 "String not closed\n%.50s\n", q);
7381 ctxt->wellFormed = 0;
7382 ctxt->disableSAX = 1;
7383 } else
7384 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007385 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007386 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7388 ctxt->sax->error(ctxt->userData,
7389 "xmlParseEncodingDecl : expected ' or \"\n");
7390 ctxt->wellFormed = 0;
7391 ctxt->disableSAX = 1;
7392 }
7393 if (encoding != NULL) {
7394 xmlCharEncoding enc;
7395 xmlCharEncodingHandlerPtr handler;
7396
7397 if (ctxt->input->encoding != NULL)
7398 xmlFree((xmlChar *) ctxt->input->encoding);
7399 ctxt->input->encoding = encoding;
7400
7401 enc = xmlParseCharEncoding((const char *) encoding);
7402 /*
7403 * registered set of known encodings
7404 */
7405 if (enc != XML_CHAR_ENCODING_ERROR) {
7406 xmlSwitchEncoding(ctxt, enc);
7407 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007408 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007409 xmlFree(encoding);
7410 return(NULL);
7411 }
7412 } else {
7413 /*
7414 * fallback for unknown encodings
7415 */
7416 handler = xmlFindCharEncodingHandler((const char *) encoding);
7417 if (handler != NULL) {
7418 xmlSwitchToEncoding(ctxt, handler);
7419 } else {
7420 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7421 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7422 ctxt->sax->error(ctxt->userData,
7423 "Unsupported encoding %s\n", encoding);
7424 return(NULL);
7425 }
7426 }
7427 }
7428 }
7429 return(encoding);
7430}
7431
7432/**
7433 * xmlParseSDDecl:
7434 * @ctxt: an XML parser context
7435 *
7436 * parse the XML standalone declaration
7437 *
7438 * [32] SDDecl ::= S 'standalone' Eq
7439 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7440 *
7441 * [ VC: Standalone Document Declaration ]
7442 * TODO The standalone document declaration must have the value "no"
7443 * if any external markup declarations contain declarations of:
7444 * - attributes with default values, if elements to which these
7445 * attributes apply appear in the document without specifications
7446 * of values for these attributes, or
7447 * - entities (other than amp, lt, gt, apos, quot), if references
7448 * to those entities appear in the document, or
7449 * - attributes with values subject to normalization, where the
7450 * attribute appears in the document with a value which will change
7451 * as a result of normalization, or
7452 * - element types with element content, if white space occurs directly
7453 * within any instance of those types.
7454 *
7455 * Returns 1 if standalone, 0 otherwise
7456 */
7457
7458int
7459xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7460 int standalone = -1;
7461
7462 SKIP_BLANKS;
7463 if ((RAW == 's') && (NXT(1) == 't') &&
7464 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7465 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7466 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7467 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7468 SKIP(10);
7469 SKIP_BLANKS;
7470 if (RAW != '=') {
7471 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7473 ctxt->sax->error(ctxt->userData,
7474 "XML standalone declaration : expected '='\n");
7475 ctxt->wellFormed = 0;
7476 ctxt->disableSAX = 1;
7477 return(standalone);
7478 }
7479 NEXT;
7480 SKIP_BLANKS;
7481 if (RAW == '\''){
7482 NEXT;
7483 if ((RAW == 'n') && (NXT(1) == 'o')) {
7484 standalone = 0;
7485 SKIP(2);
7486 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7487 (NXT(2) == 's')) {
7488 standalone = 1;
7489 SKIP(3);
7490 } else {
7491 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7493 ctxt->sax->error(ctxt->userData,
7494 "standalone accepts only 'yes' or 'no'\n");
7495 ctxt->wellFormed = 0;
7496 ctxt->disableSAX = 1;
7497 }
7498 if (RAW != '\'') {
7499 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7501 ctxt->sax->error(ctxt->userData, "String not closed\n");
7502 ctxt->wellFormed = 0;
7503 ctxt->disableSAX = 1;
7504 } else
7505 NEXT;
7506 } else if (RAW == '"'){
7507 NEXT;
7508 if ((RAW == 'n') && (NXT(1) == 'o')) {
7509 standalone = 0;
7510 SKIP(2);
7511 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7512 (NXT(2) == 's')) {
7513 standalone = 1;
7514 SKIP(3);
7515 } else {
7516 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7518 ctxt->sax->error(ctxt->userData,
7519 "standalone accepts only 'yes' or 'no'\n");
7520 ctxt->wellFormed = 0;
7521 ctxt->disableSAX = 1;
7522 }
7523 if (RAW != '"') {
7524 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7526 ctxt->sax->error(ctxt->userData, "String not closed\n");
7527 ctxt->wellFormed = 0;
7528 ctxt->disableSAX = 1;
7529 } else
7530 NEXT;
7531 } else {
7532 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7534 ctxt->sax->error(ctxt->userData,
7535 "Standalone value not found\n");
7536 ctxt->wellFormed = 0;
7537 ctxt->disableSAX = 1;
7538 }
7539 }
7540 return(standalone);
7541}
7542
7543/**
7544 * xmlParseXMLDecl:
7545 * @ctxt: an XML parser context
7546 *
7547 * parse an XML declaration header
7548 *
7549 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7550 */
7551
7552void
7553xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7554 xmlChar *version;
7555
7556 /*
7557 * We know that '<?xml' is here.
7558 */
7559 SKIP(5);
7560
7561 if (!IS_BLANK(RAW)) {
7562 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7564 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7565 ctxt->wellFormed = 0;
7566 ctxt->disableSAX = 1;
7567 }
7568 SKIP_BLANKS;
7569
7570 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007571 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007572 */
7573 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007574 if (version == NULL) {
7575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7576 ctxt->sax->error(ctxt->userData,
7577 "Malformed declaration expecting version\n");
7578 ctxt->wellFormed = 0;
7579 ctxt->disableSAX = 1;
7580 } else {
7581 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7582 /*
7583 * TODO: Blueberry should be detected here
7584 */
7585 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7586 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7587 version);
7588 }
7589 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007590 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007591 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007592 }
Owen Taylor3473f882001-02-23 17:55:21 +00007593
7594 /*
7595 * We may have the encoding declaration
7596 */
7597 if (!IS_BLANK(RAW)) {
7598 if ((RAW == '?') && (NXT(1) == '>')) {
7599 SKIP(2);
7600 return;
7601 }
7602 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7604 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7605 ctxt->wellFormed = 0;
7606 ctxt->disableSAX = 1;
7607 }
7608 xmlParseEncodingDecl(ctxt);
7609 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7610 /*
7611 * The XML REC instructs us to stop parsing right here
7612 */
7613 return;
7614 }
7615
7616 /*
7617 * We may have the standalone status.
7618 */
7619 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7620 if ((RAW == '?') && (NXT(1) == '>')) {
7621 SKIP(2);
7622 return;
7623 }
7624 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7626 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7627 ctxt->wellFormed = 0;
7628 ctxt->disableSAX = 1;
7629 }
7630 SKIP_BLANKS;
7631 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7632
7633 SKIP_BLANKS;
7634 if ((RAW == '?') && (NXT(1) == '>')) {
7635 SKIP(2);
7636 } else if (RAW == '>') {
7637 /* Deprecated old WD ... */
7638 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7640 ctxt->sax->error(ctxt->userData,
7641 "XML declaration must end-up with '?>'\n");
7642 ctxt->wellFormed = 0;
7643 ctxt->disableSAX = 1;
7644 NEXT;
7645 } else {
7646 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7648 ctxt->sax->error(ctxt->userData,
7649 "parsing XML declaration: '?>' expected\n");
7650 ctxt->wellFormed = 0;
7651 ctxt->disableSAX = 1;
7652 MOVETO_ENDTAG(CUR_PTR);
7653 NEXT;
7654 }
7655}
7656
7657/**
7658 * xmlParseMisc:
7659 * @ctxt: an XML parser context
7660 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007661 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007662 *
7663 * [27] Misc ::= Comment | PI | S
7664 */
7665
7666void
7667xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007668 while (((RAW == '<') && (NXT(1) == '?')) ||
7669 ((RAW == '<') && (NXT(1) == '!') &&
7670 (NXT(2) == '-') && (NXT(3) == '-')) ||
7671 IS_BLANK(CUR)) {
7672 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007673 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007674 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007675 NEXT;
7676 } else
7677 xmlParseComment(ctxt);
7678 }
7679}
7680
7681/**
7682 * xmlParseDocument:
7683 * @ctxt: an XML parser context
7684 *
7685 * parse an XML document (and build a tree if using the standard SAX
7686 * interface).
7687 *
7688 * [1] document ::= prolog element Misc*
7689 *
7690 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7691 *
7692 * Returns 0, -1 in case of error. the parser context is augmented
7693 * as a result of the parsing.
7694 */
7695
7696int
7697xmlParseDocument(xmlParserCtxtPtr ctxt) {
7698 xmlChar start[4];
7699 xmlCharEncoding enc;
7700
7701 xmlInitParser();
7702
7703 GROW;
7704
7705 /*
7706 * SAX: beginning of the document processing.
7707 */
7708 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7709 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7710
Daniel Veillard50f34372001-08-03 12:06:36 +00007711 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007712 /*
7713 * Get the 4 first bytes and decode the charset
7714 * if enc != XML_CHAR_ENCODING_NONE
7715 * plug some encoding conversion routines.
7716 */
7717 start[0] = RAW;
7718 start[1] = NXT(1);
7719 start[2] = NXT(2);
7720 start[3] = NXT(3);
7721 enc = xmlDetectCharEncoding(start, 4);
7722 if (enc != XML_CHAR_ENCODING_NONE) {
7723 xmlSwitchEncoding(ctxt, enc);
7724 }
Owen Taylor3473f882001-02-23 17:55:21 +00007725 }
7726
7727
7728 if (CUR == 0) {
7729 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7731 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7732 ctxt->wellFormed = 0;
7733 ctxt->disableSAX = 1;
7734 }
7735
7736 /*
7737 * Check for the XMLDecl in the Prolog.
7738 */
7739 GROW;
7740 if ((RAW == '<') && (NXT(1) == '?') &&
7741 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7742 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7743
7744 /*
7745 * Note that we will switch encoding on the fly.
7746 */
7747 xmlParseXMLDecl(ctxt);
7748 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7749 /*
7750 * The XML REC instructs us to stop parsing right here
7751 */
7752 return(-1);
7753 }
7754 ctxt->standalone = ctxt->input->standalone;
7755 SKIP_BLANKS;
7756 } else {
7757 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7758 }
7759 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7760 ctxt->sax->startDocument(ctxt->userData);
7761
7762 /*
7763 * The Misc part of the Prolog
7764 */
7765 GROW;
7766 xmlParseMisc(ctxt);
7767
7768 /*
7769 * Then possibly doc type declaration(s) and more Misc
7770 * (doctypedecl Misc*)?
7771 */
7772 GROW;
7773 if ((RAW == '<') && (NXT(1) == '!') &&
7774 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7775 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7776 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7777 (NXT(8) == 'E')) {
7778
7779 ctxt->inSubset = 1;
7780 xmlParseDocTypeDecl(ctxt);
7781 if (RAW == '[') {
7782 ctxt->instate = XML_PARSER_DTD;
7783 xmlParseInternalSubset(ctxt);
7784 }
7785
7786 /*
7787 * Create and update the external subset.
7788 */
7789 ctxt->inSubset = 2;
7790 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7791 (!ctxt->disableSAX))
7792 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7793 ctxt->extSubSystem, ctxt->extSubURI);
7794 ctxt->inSubset = 0;
7795
7796
7797 ctxt->instate = XML_PARSER_PROLOG;
7798 xmlParseMisc(ctxt);
7799 }
7800
7801 /*
7802 * Time to start parsing the tree itself
7803 */
7804 GROW;
7805 if (RAW != '<') {
7806 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7808 ctxt->sax->error(ctxt->userData,
7809 "Start tag expected, '<' not found\n");
7810 ctxt->wellFormed = 0;
7811 ctxt->disableSAX = 1;
7812 ctxt->instate = XML_PARSER_EOF;
7813 } else {
7814 ctxt->instate = XML_PARSER_CONTENT;
7815 xmlParseElement(ctxt);
7816 ctxt->instate = XML_PARSER_EPILOG;
7817
7818
7819 /*
7820 * The Misc part at the end
7821 */
7822 xmlParseMisc(ctxt);
7823
Daniel Veillard561b7f82002-03-20 21:55:57 +00007824 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007825 ctxt->errNo = XML_ERR_DOCUMENT_END;
7826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7827 ctxt->sax->error(ctxt->userData,
7828 "Extra content at the end of the document\n");
7829 ctxt->wellFormed = 0;
7830 ctxt->disableSAX = 1;
7831 }
7832 ctxt->instate = XML_PARSER_EOF;
7833 }
7834
7835 /*
7836 * SAX: end of the document processing.
7837 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007838 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007839 ctxt->sax->endDocument(ctxt->userData);
7840
Daniel Veillard5997aca2002-03-18 18:36:20 +00007841 /*
7842 * Remove locally kept entity definitions if the tree was not built
7843 */
7844 if ((ctxt->myDoc != NULL) &&
7845 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7846 xmlFreeDoc(ctxt->myDoc);
7847 ctxt->myDoc = NULL;
7848 }
7849
Daniel Veillardc7612992002-02-17 22:47:37 +00007850 if (! ctxt->wellFormed) {
7851 ctxt->valid = 0;
7852 return(-1);
7853 }
Owen Taylor3473f882001-02-23 17:55:21 +00007854 return(0);
7855}
7856
7857/**
7858 * xmlParseExtParsedEnt:
7859 * @ctxt: an XML parser context
7860 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007861 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007862 * An external general parsed entity is well-formed if it matches the
7863 * production labeled extParsedEnt.
7864 *
7865 * [78] extParsedEnt ::= TextDecl? content
7866 *
7867 * Returns 0, -1 in case of error. the parser context is augmented
7868 * as a result of the parsing.
7869 */
7870
7871int
7872xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7873 xmlChar start[4];
7874 xmlCharEncoding enc;
7875
7876 xmlDefaultSAXHandlerInit();
7877
7878 GROW;
7879
7880 /*
7881 * SAX: beginning of the document processing.
7882 */
7883 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7884 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7885
7886 /*
7887 * Get the 4 first bytes and decode the charset
7888 * if enc != XML_CHAR_ENCODING_NONE
7889 * plug some encoding conversion routines.
7890 */
7891 start[0] = RAW;
7892 start[1] = NXT(1);
7893 start[2] = NXT(2);
7894 start[3] = NXT(3);
7895 enc = xmlDetectCharEncoding(start, 4);
7896 if (enc != XML_CHAR_ENCODING_NONE) {
7897 xmlSwitchEncoding(ctxt, enc);
7898 }
7899
7900
7901 if (CUR == 0) {
7902 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7904 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7905 ctxt->wellFormed = 0;
7906 ctxt->disableSAX = 1;
7907 }
7908
7909 /*
7910 * Check for the XMLDecl in the Prolog.
7911 */
7912 GROW;
7913 if ((RAW == '<') && (NXT(1) == '?') &&
7914 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7915 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7916
7917 /*
7918 * Note that we will switch encoding on the fly.
7919 */
7920 xmlParseXMLDecl(ctxt);
7921 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7922 /*
7923 * The XML REC instructs us to stop parsing right here
7924 */
7925 return(-1);
7926 }
7927 SKIP_BLANKS;
7928 } else {
7929 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7930 }
7931 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7932 ctxt->sax->startDocument(ctxt->userData);
7933
7934 /*
7935 * Doing validity checking on chunk doesn't make sense
7936 */
7937 ctxt->instate = XML_PARSER_CONTENT;
7938 ctxt->validate = 0;
7939 ctxt->loadsubset = 0;
7940 ctxt->depth = 0;
7941
7942 xmlParseContent(ctxt);
7943
7944 if ((RAW == '<') && (NXT(1) == '/')) {
7945 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7947 ctxt->sax->error(ctxt->userData,
7948 "chunk is not well balanced\n");
7949 ctxt->wellFormed = 0;
7950 ctxt->disableSAX = 1;
7951 } else if (RAW != 0) {
7952 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7954 ctxt->sax->error(ctxt->userData,
7955 "extra content at the end of well balanced chunk\n");
7956 ctxt->wellFormed = 0;
7957 ctxt->disableSAX = 1;
7958 }
7959
7960 /*
7961 * SAX: end of the document processing.
7962 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007963 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007964 ctxt->sax->endDocument(ctxt->userData);
7965
7966 if (! ctxt->wellFormed) return(-1);
7967 return(0);
7968}
7969
7970/************************************************************************
7971 * *
7972 * Progressive parsing interfaces *
7973 * *
7974 ************************************************************************/
7975
7976/**
7977 * xmlParseLookupSequence:
7978 * @ctxt: an XML parser context
7979 * @first: the first char to lookup
7980 * @next: the next char to lookup or zero
7981 * @third: the next char to lookup or zero
7982 *
7983 * Try to find if a sequence (first, next, third) or just (first next) or
7984 * (first) is available in the input stream.
7985 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7986 * to avoid rescanning sequences of bytes, it DOES change the state of the
7987 * parser, do not use liberally.
7988 *
7989 * Returns the index to the current parsing point if the full sequence
7990 * is available, -1 otherwise.
7991 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007992static int
Owen Taylor3473f882001-02-23 17:55:21 +00007993xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7994 xmlChar next, xmlChar third) {
7995 int base, len;
7996 xmlParserInputPtr in;
7997 const xmlChar *buf;
7998
7999 in = ctxt->input;
8000 if (in == NULL) return(-1);
8001 base = in->cur - in->base;
8002 if (base < 0) return(-1);
8003 if (ctxt->checkIndex > base)
8004 base = ctxt->checkIndex;
8005 if (in->buf == NULL) {
8006 buf = in->base;
8007 len = in->length;
8008 } else {
8009 buf = in->buf->buffer->content;
8010 len = in->buf->buffer->use;
8011 }
8012 /* take into account the sequence length */
8013 if (third) len -= 2;
8014 else if (next) len --;
8015 for (;base < len;base++) {
8016 if (buf[base] == first) {
8017 if (third != 0) {
8018 if ((buf[base + 1] != next) ||
8019 (buf[base + 2] != third)) continue;
8020 } else if (next != 0) {
8021 if (buf[base + 1] != next) continue;
8022 }
8023 ctxt->checkIndex = 0;
8024#ifdef DEBUG_PUSH
8025 if (next == 0)
8026 xmlGenericError(xmlGenericErrorContext,
8027 "PP: lookup '%c' found at %d\n",
8028 first, base);
8029 else if (third == 0)
8030 xmlGenericError(xmlGenericErrorContext,
8031 "PP: lookup '%c%c' found at %d\n",
8032 first, next, base);
8033 else
8034 xmlGenericError(xmlGenericErrorContext,
8035 "PP: lookup '%c%c%c' found at %d\n",
8036 first, next, third, base);
8037#endif
8038 return(base - (in->cur - in->base));
8039 }
8040 }
8041 ctxt->checkIndex = base;
8042#ifdef DEBUG_PUSH
8043 if (next == 0)
8044 xmlGenericError(xmlGenericErrorContext,
8045 "PP: lookup '%c' failed\n", first);
8046 else if (third == 0)
8047 xmlGenericError(xmlGenericErrorContext,
8048 "PP: lookup '%c%c' failed\n", first, next);
8049 else
8050 xmlGenericError(xmlGenericErrorContext,
8051 "PP: lookup '%c%c%c' failed\n", first, next, third);
8052#endif
8053 return(-1);
8054}
8055
8056/**
8057 * xmlParseTryOrFinish:
8058 * @ctxt: an XML parser context
8059 * @terminate: last chunk indicator
8060 *
8061 * Try to progress on parsing
8062 *
8063 * Returns zero if no parsing was possible
8064 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008065static int
Owen Taylor3473f882001-02-23 17:55:21 +00008066xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8067 int ret = 0;
8068 int avail;
8069 xmlChar cur, next;
8070
8071#ifdef DEBUG_PUSH
8072 switch (ctxt->instate) {
8073 case XML_PARSER_EOF:
8074 xmlGenericError(xmlGenericErrorContext,
8075 "PP: try EOF\n"); break;
8076 case XML_PARSER_START:
8077 xmlGenericError(xmlGenericErrorContext,
8078 "PP: try START\n"); break;
8079 case XML_PARSER_MISC:
8080 xmlGenericError(xmlGenericErrorContext,
8081 "PP: try MISC\n");break;
8082 case XML_PARSER_COMMENT:
8083 xmlGenericError(xmlGenericErrorContext,
8084 "PP: try COMMENT\n");break;
8085 case XML_PARSER_PROLOG:
8086 xmlGenericError(xmlGenericErrorContext,
8087 "PP: try PROLOG\n");break;
8088 case XML_PARSER_START_TAG:
8089 xmlGenericError(xmlGenericErrorContext,
8090 "PP: try START_TAG\n");break;
8091 case XML_PARSER_CONTENT:
8092 xmlGenericError(xmlGenericErrorContext,
8093 "PP: try CONTENT\n");break;
8094 case XML_PARSER_CDATA_SECTION:
8095 xmlGenericError(xmlGenericErrorContext,
8096 "PP: try CDATA_SECTION\n");break;
8097 case XML_PARSER_END_TAG:
8098 xmlGenericError(xmlGenericErrorContext,
8099 "PP: try END_TAG\n");break;
8100 case XML_PARSER_ENTITY_DECL:
8101 xmlGenericError(xmlGenericErrorContext,
8102 "PP: try ENTITY_DECL\n");break;
8103 case XML_PARSER_ENTITY_VALUE:
8104 xmlGenericError(xmlGenericErrorContext,
8105 "PP: try ENTITY_VALUE\n");break;
8106 case XML_PARSER_ATTRIBUTE_VALUE:
8107 xmlGenericError(xmlGenericErrorContext,
8108 "PP: try ATTRIBUTE_VALUE\n");break;
8109 case XML_PARSER_DTD:
8110 xmlGenericError(xmlGenericErrorContext,
8111 "PP: try DTD\n");break;
8112 case XML_PARSER_EPILOG:
8113 xmlGenericError(xmlGenericErrorContext,
8114 "PP: try EPILOG\n");break;
8115 case XML_PARSER_PI:
8116 xmlGenericError(xmlGenericErrorContext,
8117 "PP: try PI\n");break;
8118 case XML_PARSER_IGNORE:
8119 xmlGenericError(xmlGenericErrorContext,
8120 "PP: try IGNORE\n");break;
8121 }
8122#endif
8123
8124 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008125 SHRINK;
8126
Owen Taylor3473f882001-02-23 17:55:21 +00008127 /*
8128 * Pop-up of finished entities.
8129 */
8130 while ((RAW == 0) && (ctxt->inputNr > 1))
8131 xmlPopInput(ctxt);
8132
8133 if (ctxt->input ==NULL) break;
8134 if (ctxt->input->buf == NULL)
8135 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008136 else {
8137 /*
8138 * If we are operating on converted input, try to flush
8139 * remainng chars to avoid them stalling in the non-converted
8140 * buffer.
8141 */
8142 if ((ctxt->input->buf->raw != NULL) &&
8143 (ctxt->input->buf->raw->use > 0)) {
8144 int base = ctxt->input->base -
8145 ctxt->input->buf->buffer->content;
8146 int current = ctxt->input->cur - ctxt->input->base;
8147
8148 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8149 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8150 ctxt->input->cur = ctxt->input->base + current;
8151 ctxt->input->end =
8152 &ctxt->input->buf->buffer->content[
8153 ctxt->input->buf->buffer->use];
8154 }
8155 avail = ctxt->input->buf->buffer->use -
8156 (ctxt->input->cur - ctxt->input->base);
8157 }
Owen Taylor3473f882001-02-23 17:55:21 +00008158 if (avail < 1)
8159 goto done;
8160 switch (ctxt->instate) {
8161 case XML_PARSER_EOF:
8162 /*
8163 * Document parsing is done !
8164 */
8165 goto done;
8166 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008167 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8168 xmlChar start[4];
8169 xmlCharEncoding enc;
8170
8171 /*
8172 * Very first chars read from the document flow.
8173 */
8174 if (avail < 4)
8175 goto done;
8176
8177 /*
8178 * Get the 4 first bytes and decode the charset
8179 * if enc != XML_CHAR_ENCODING_NONE
8180 * plug some encoding conversion routines.
8181 */
8182 start[0] = RAW;
8183 start[1] = NXT(1);
8184 start[2] = NXT(2);
8185 start[3] = NXT(3);
8186 enc = xmlDetectCharEncoding(start, 4);
8187 if (enc != XML_CHAR_ENCODING_NONE) {
8188 xmlSwitchEncoding(ctxt, enc);
8189 }
8190 break;
8191 }
Owen Taylor3473f882001-02-23 17:55:21 +00008192
8193 cur = ctxt->input->cur[0];
8194 next = ctxt->input->cur[1];
8195 if (cur == 0) {
8196 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8197 ctxt->sax->setDocumentLocator(ctxt->userData,
8198 &xmlDefaultSAXLocator);
8199 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8201 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8202 ctxt->wellFormed = 0;
8203 ctxt->disableSAX = 1;
8204 ctxt->instate = XML_PARSER_EOF;
8205#ifdef DEBUG_PUSH
8206 xmlGenericError(xmlGenericErrorContext,
8207 "PP: entering EOF\n");
8208#endif
8209 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8210 ctxt->sax->endDocument(ctxt->userData);
8211 goto done;
8212 }
8213 if ((cur == '<') && (next == '?')) {
8214 /* PI or XML decl */
8215 if (avail < 5) return(ret);
8216 if ((!terminate) &&
8217 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8218 return(ret);
8219 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8220 ctxt->sax->setDocumentLocator(ctxt->userData,
8221 &xmlDefaultSAXLocator);
8222 if ((ctxt->input->cur[2] == 'x') &&
8223 (ctxt->input->cur[3] == 'm') &&
8224 (ctxt->input->cur[4] == 'l') &&
8225 (IS_BLANK(ctxt->input->cur[5]))) {
8226 ret += 5;
8227#ifdef DEBUG_PUSH
8228 xmlGenericError(xmlGenericErrorContext,
8229 "PP: Parsing XML Decl\n");
8230#endif
8231 xmlParseXMLDecl(ctxt);
8232 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8233 /*
8234 * The XML REC instructs us to stop parsing right
8235 * here
8236 */
8237 ctxt->instate = XML_PARSER_EOF;
8238 return(0);
8239 }
8240 ctxt->standalone = ctxt->input->standalone;
8241 if ((ctxt->encoding == NULL) &&
8242 (ctxt->input->encoding != NULL))
8243 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8244 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8245 (!ctxt->disableSAX))
8246 ctxt->sax->startDocument(ctxt->userData);
8247 ctxt->instate = XML_PARSER_MISC;
8248#ifdef DEBUG_PUSH
8249 xmlGenericError(xmlGenericErrorContext,
8250 "PP: entering MISC\n");
8251#endif
8252 } else {
8253 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8254 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8255 (!ctxt->disableSAX))
8256 ctxt->sax->startDocument(ctxt->userData);
8257 ctxt->instate = XML_PARSER_MISC;
8258#ifdef DEBUG_PUSH
8259 xmlGenericError(xmlGenericErrorContext,
8260 "PP: entering MISC\n");
8261#endif
8262 }
8263 } else {
8264 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8265 ctxt->sax->setDocumentLocator(ctxt->userData,
8266 &xmlDefaultSAXLocator);
8267 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8268 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8269 (!ctxt->disableSAX))
8270 ctxt->sax->startDocument(ctxt->userData);
8271 ctxt->instate = XML_PARSER_MISC;
8272#ifdef DEBUG_PUSH
8273 xmlGenericError(xmlGenericErrorContext,
8274 "PP: entering MISC\n");
8275#endif
8276 }
8277 break;
8278 case XML_PARSER_MISC:
8279 SKIP_BLANKS;
8280 if (ctxt->input->buf == NULL)
8281 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8282 else
8283 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8284 if (avail < 2)
8285 goto done;
8286 cur = ctxt->input->cur[0];
8287 next = ctxt->input->cur[1];
8288 if ((cur == '<') && (next == '?')) {
8289 if ((!terminate) &&
8290 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8291 goto done;
8292#ifdef DEBUG_PUSH
8293 xmlGenericError(xmlGenericErrorContext,
8294 "PP: Parsing PI\n");
8295#endif
8296 xmlParsePI(ctxt);
8297 } else if ((cur == '<') && (next == '!') &&
8298 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8299 if ((!terminate) &&
8300 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8301 goto done;
8302#ifdef DEBUG_PUSH
8303 xmlGenericError(xmlGenericErrorContext,
8304 "PP: Parsing Comment\n");
8305#endif
8306 xmlParseComment(ctxt);
8307 ctxt->instate = XML_PARSER_MISC;
8308 } else if ((cur == '<') && (next == '!') &&
8309 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8310 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8311 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8312 (ctxt->input->cur[8] == 'E')) {
8313 if ((!terminate) &&
8314 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8315 goto done;
8316#ifdef DEBUG_PUSH
8317 xmlGenericError(xmlGenericErrorContext,
8318 "PP: Parsing internal subset\n");
8319#endif
8320 ctxt->inSubset = 1;
8321 xmlParseDocTypeDecl(ctxt);
8322 if (RAW == '[') {
8323 ctxt->instate = XML_PARSER_DTD;
8324#ifdef DEBUG_PUSH
8325 xmlGenericError(xmlGenericErrorContext,
8326 "PP: entering DTD\n");
8327#endif
8328 } else {
8329 /*
8330 * Create and update the external subset.
8331 */
8332 ctxt->inSubset = 2;
8333 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8334 (ctxt->sax->externalSubset != NULL))
8335 ctxt->sax->externalSubset(ctxt->userData,
8336 ctxt->intSubName, ctxt->extSubSystem,
8337 ctxt->extSubURI);
8338 ctxt->inSubset = 0;
8339 ctxt->instate = XML_PARSER_PROLOG;
8340#ifdef DEBUG_PUSH
8341 xmlGenericError(xmlGenericErrorContext,
8342 "PP: entering PROLOG\n");
8343#endif
8344 }
8345 } else if ((cur == '<') && (next == '!') &&
8346 (avail < 9)) {
8347 goto done;
8348 } else {
8349 ctxt->instate = XML_PARSER_START_TAG;
8350#ifdef DEBUG_PUSH
8351 xmlGenericError(xmlGenericErrorContext,
8352 "PP: entering START_TAG\n");
8353#endif
8354 }
8355 break;
8356 case XML_PARSER_IGNORE:
8357 xmlGenericError(xmlGenericErrorContext,
8358 "PP: internal error, state == IGNORE");
8359 ctxt->instate = XML_PARSER_DTD;
8360#ifdef DEBUG_PUSH
8361 xmlGenericError(xmlGenericErrorContext,
8362 "PP: entering DTD\n");
8363#endif
8364 break;
8365 case XML_PARSER_PROLOG:
8366 SKIP_BLANKS;
8367 if (ctxt->input->buf == NULL)
8368 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8369 else
8370 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8371 if (avail < 2)
8372 goto done;
8373 cur = ctxt->input->cur[0];
8374 next = ctxt->input->cur[1];
8375 if ((cur == '<') && (next == '?')) {
8376 if ((!terminate) &&
8377 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8378 goto done;
8379#ifdef DEBUG_PUSH
8380 xmlGenericError(xmlGenericErrorContext,
8381 "PP: Parsing PI\n");
8382#endif
8383 xmlParsePI(ctxt);
8384 } else if ((cur == '<') && (next == '!') &&
8385 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8386 if ((!terminate) &&
8387 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8388 goto done;
8389#ifdef DEBUG_PUSH
8390 xmlGenericError(xmlGenericErrorContext,
8391 "PP: Parsing Comment\n");
8392#endif
8393 xmlParseComment(ctxt);
8394 ctxt->instate = XML_PARSER_PROLOG;
8395 } else if ((cur == '<') && (next == '!') &&
8396 (avail < 4)) {
8397 goto done;
8398 } else {
8399 ctxt->instate = XML_PARSER_START_TAG;
8400#ifdef DEBUG_PUSH
8401 xmlGenericError(xmlGenericErrorContext,
8402 "PP: entering START_TAG\n");
8403#endif
8404 }
8405 break;
8406 case XML_PARSER_EPILOG:
8407 SKIP_BLANKS;
8408 if (ctxt->input->buf == NULL)
8409 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8410 else
8411 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8412 if (avail < 2)
8413 goto done;
8414 cur = ctxt->input->cur[0];
8415 next = ctxt->input->cur[1];
8416 if ((cur == '<') && (next == '?')) {
8417 if ((!terminate) &&
8418 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8419 goto done;
8420#ifdef DEBUG_PUSH
8421 xmlGenericError(xmlGenericErrorContext,
8422 "PP: Parsing PI\n");
8423#endif
8424 xmlParsePI(ctxt);
8425 ctxt->instate = XML_PARSER_EPILOG;
8426 } else if ((cur == '<') && (next == '!') &&
8427 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8428 if ((!terminate) &&
8429 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8430 goto done;
8431#ifdef DEBUG_PUSH
8432 xmlGenericError(xmlGenericErrorContext,
8433 "PP: Parsing Comment\n");
8434#endif
8435 xmlParseComment(ctxt);
8436 ctxt->instate = XML_PARSER_EPILOG;
8437 } else if ((cur == '<') && (next == '!') &&
8438 (avail < 4)) {
8439 goto done;
8440 } else {
8441 ctxt->errNo = XML_ERR_DOCUMENT_END;
8442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8443 ctxt->sax->error(ctxt->userData,
8444 "Extra content at the end of the document\n");
8445 ctxt->wellFormed = 0;
8446 ctxt->disableSAX = 1;
8447 ctxt->instate = XML_PARSER_EOF;
8448#ifdef DEBUG_PUSH
8449 xmlGenericError(xmlGenericErrorContext,
8450 "PP: entering EOF\n");
8451#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008452 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008453 ctxt->sax->endDocument(ctxt->userData);
8454 goto done;
8455 }
8456 break;
8457 case XML_PARSER_START_TAG: {
8458 xmlChar *name, *oldname;
8459
8460 if ((avail < 2) && (ctxt->inputNr == 1))
8461 goto done;
8462 cur = ctxt->input->cur[0];
8463 if (cur != '<') {
8464 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8466 ctxt->sax->error(ctxt->userData,
8467 "Start tag expect, '<' not found\n");
8468 ctxt->wellFormed = 0;
8469 ctxt->disableSAX = 1;
8470 ctxt->instate = XML_PARSER_EOF;
8471#ifdef DEBUG_PUSH
8472 xmlGenericError(xmlGenericErrorContext,
8473 "PP: entering EOF\n");
8474#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008475 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008476 ctxt->sax->endDocument(ctxt->userData);
8477 goto done;
8478 }
8479 if ((!terminate) &&
8480 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8481 goto done;
8482 if (ctxt->spaceNr == 0)
8483 spacePush(ctxt, -1);
8484 else
8485 spacePush(ctxt, *ctxt->space);
8486 name = xmlParseStartTag(ctxt);
8487 if (name == NULL) {
8488 spacePop(ctxt);
8489 ctxt->instate = XML_PARSER_EOF;
8490#ifdef DEBUG_PUSH
8491 xmlGenericError(xmlGenericErrorContext,
8492 "PP: entering EOF\n");
8493#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008494 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008495 ctxt->sax->endDocument(ctxt->userData);
8496 goto done;
8497 }
8498 namePush(ctxt, xmlStrdup(name));
8499
8500 /*
8501 * [ VC: Root Element Type ]
8502 * The Name in the document type declaration must match
8503 * the element type of the root element.
8504 */
8505 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8506 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8507 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8508
8509 /*
8510 * Check for an Empty Element.
8511 */
8512 if ((RAW == '/') && (NXT(1) == '>')) {
8513 SKIP(2);
8514 if ((ctxt->sax != NULL) &&
8515 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8516 ctxt->sax->endElement(ctxt->userData, name);
8517 xmlFree(name);
8518 oldname = namePop(ctxt);
8519 spacePop(ctxt);
8520 if (oldname != NULL) {
8521#ifdef DEBUG_STACK
8522 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8523#endif
8524 xmlFree(oldname);
8525 }
8526 if (ctxt->name == NULL) {
8527 ctxt->instate = XML_PARSER_EPILOG;
8528#ifdef DEBUG_PUSH
8529 xmlGenericError(xmlGenericErrorContext,
8530 "PP: entering EPILOG\n");
8531#endif
8532 } else {
8533 ctxt->instate = XML_PARSER_CONTENT;
8534#ifdef DEBUG_PUSH
8535 xmlGenericError(xmlGenericErrorContext,
8536 "PP: entering CONTENT\n");
8537#endif
8538 }
8539 break;
8540 }
8541 if (RAW == '>') {
8542 NEXT;
8543 } else {
8544 ctxt->errNo = XML_ERR_GT_REQUIRED;
8545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8546 ctxt->sax->error(ctxt->userData,
8547 "Couldn't find end of Start Tag %s\n",
8548 name);
8549 ctxt->wellFormed = 0;
8550 ctxt->disableSAX = 1;
8551
8552 /*
8553 * end of parsing of this node.
8554 */
8555 nodePop(ctxt);
8556 oldname = namePop(ctxt);
8557 spacePop(ctxt);
8558 if (oldname != NULL) {
8559#ifdef DEBUG_STACK
8560 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8561#endif
8562 xmlFree(oldname);
8563 }
8564 }
8565 xmlFree(name);
8566 ctxt->instate = XML_PARSER_CONTENT;
8567#ifdef DEBUG_PUSH
8568 xmlGenericError(xmlGenericErrorContext,
8569 "PP: entering CONTENT\n");
8570#endif
8571 break;
8572 }
8573 case XML_PARSER_CONTENT: {
8574 const xmlChar *test;
8575 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008576 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008577
8578 /*
8579 * Handle preparsed entities and charRef
8580 */
8581 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008582 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008583
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008584 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008585 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8586 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008587 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008588 ctxt->token = 0;
8589 }
8590 if ((avail < 2) && (ctxt->inputNr == 1))
8591 goto done;
8592 cur = ctxt->input->cur[0];
8593 next = ctxt->input->cur[1];
8594
8595 test = CUR_PTR;
8596 cons = ctxt->input->consumed;
8597 tok = ctxt->token;
8598 if ((cur == '<') && (next == '?')) {
8599 if ((!terminate) &&
8600 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8601 goto done;
8602#ifdef DEBUG_PUSH
8603 xmlGenericError(xmlGenericErrorContext,
8604 "PP: Parsing PI\n");
8605#endif
8606 xmlParsePI(ctxt);
8607 } else if ((cur == '<') && (next == '!') &&
8608 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8609 if ((!terminate) &&
8610 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8611 goto done;
8612#ifdef DEBUG_PUSH
8613 xmlGenericError(xmlGenericErrorContext,
8614 "PP: Parsing Comment\n");
8615#endif
8616 xmlParseComment(ctxt);
8617 ctxt->instate = XML_PARSER_CONTENT;
8618 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8619 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8620 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8621 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8622 (ctxt->input->cur[8] == '[')) {
8623 SKIP(9);
8624 ctxt->instate = XML_PARSER_CDATA_SECTION;
8625#ifdef DEBUG_PUSH
8626 xmlGenericError(xmlGenericErrorContext,
8627 "PP: entering CDATA_SECTION\n");
8628#endif
8629 break;
8630 } else if ((cur == '<') && (next == '!') &&
8631 (avail < 9)) {
8632 goto done;
8633 } else if ((cur == '<') && (next == '/')) {
8634 ctxt->instate = XML_PARSER_END_TAG;
8635#ifdef DEBUG_PUSH
8636 xmlGenericError(xmlGenericErrorContext,
8637 "PP: entering END_TAG\n");
8638#endif
8639 break;
8640 } else if (cur == '<') {
8641 ctxt->instate = XML_PARSER_START_TAG;
8642#ifdef DEBUG_PUSH
8643 xmlGenericError(xmlGenericErrorContext,
8644 "PP: entering START_TAG\n");
8645#endif
8646 break;
8647 } else if (cur == '&') {
8648 if ((!terminate) &&
8649 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8650 goto done;
8651#ifdef DEBUG_PUSH
8652 xmlGenericError(xmlGenericErrorContext,
8653 "PP: Parsing Reference\n");
8654#endif
8655 xmlParseReference(ctxt);
8656 } else {
8657 /* TODO Avoid the extra copy, handle directly !!! */
8658 /*
8659 * Goal of the following test is:
8660 * - minimize calls to the SAX 'character' callback
8661 * when they are mergeable
8662 * - handle an problem for isBlank when we only parse
8663 * a sequence of blank chars and the next one is
8664 * not available to check against '<' presence.
8665 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008666 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008667 * of the parser.
8668 */
8669 if ((ctxt->inputNr == 1) &&
8670 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8671 if ((!terminate) &&
8672 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8673 goto done;
8674 }
8675 ctxt->checkIndex = 0;
8676#ifdef DEBUG_PUSH
8677 xmlGenericError(xmlGenericErrorContext,
8678 "PP: Parsing char data\n");
8679#endif
8680 xmlParseCharData(ctxt, 0);
8681 }
8682 /*
8683 * Pop-up of finished entities.
8684 */
8685 while ((RAW == 0) && (ctxt->inputNr > 1))
8686 xmlPopInput(ctxt);
8687 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8688 (tok == ctxt->token)) {
8689 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8691 ctxt->sax->error(ctxt->userData,
8692 "detected an error in element content\n");
8693 ctxt->wellFormed = 0;
8694 ctxt->disableSAX = 1;
8695 ctxt->instate = XML_PARSER_EOF;
8696 break;
8697 }
8698 break;
8699 }
8700 case XML_PARSER_CDATA_SECTION: {
8701 /*
8702 * The Push mode need to have the SAX callback for
8703 * cdataBlock merge back contiguous callbacks.
8704 */
8705 int base;
8706
8707 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8708 if (base < 0) {
8709 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8710 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8711 if (ctxt->sax->cdataBlock != NULL)
8712 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8713 XML_PARSER_BIG_BUFFER_SIZE);
8714 }
8715 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8716 ctxt->checkIndex = 0;
8717 }
8718 goto done;
8719 } else {
8720 if ((ctxt->sax != NULL) && (base > 0) &&
8721 (!ctxt->disableSAX)) {
8722 if (ctxt->sax->cdataBlock != NULL)
8723 ctxt->sax->cdataBlock(ctxt->userData,
8724 ctxt->input->cur, base);
8725 }
8726 SKIP(base + 3);
8727 ctxt->checkIndex = 0;
8728 ctxt->instate = XML_PARSER_CONTENT;
8729#ifdef DEBUG_PUSH
8730 xmlGenericError(xmlGenericErrorContext,
8731 "PP: entering CONTENT\n");
8732#endif
8733 }
8734 break;
8735 }
8736 case XML_PARSER_END_TAG:
8737 if (avail < 2)
8738 goto done;
8739 if ((!terminate) &&
8740 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8741 goto done;
8742 xmlParseEndTag(ctxt);
8743 if (ctxt->name == NULL) {
8744 ctxt->instate = XML_PARSER_EPILOG;
8745#ifdef DEBUG_PUSH
8746 xmlGenericError(xmlGenericErrorContext,
8747 "PP: entering EPILOG\n");
8748#endif
8749 } else {
8750 ctxt->instate = XML_PARSER_CONTENT;
8751#ifdef DEBUG_PUSH
8752 xmlGenericError(xmlGenericErrorContext,
8753 "PP: entering CONTENT\n");
8754#endif
8755 }
8756 break;
8757 case XML_PARSER_DTD: {
8758 /*
8759 * Sorry but progressive parsing of the internal subset
8760 * is not expected to be supported. We first check that
8761 * the full content of the internal subset is available and
8762 * the parsing is launched only at that point.
8763 * Internal subset ends up with "']' S? '>'" in an unescaped
8764 * section and not in a ']]>' sequence which are conditional
8765 * sections (whoever argued to keep that crap in XML deserve
8766 * a place in hell !).
8767 */
8768 int base, i;
8769 xmlChar *buf;
8770 xmlChar quote = 0;
8771
8772 base = ctxt->input->cur - ctxt->input->base;
8773 if (base < 0) return(0);
8774 if (ctxt->checkIndex > base)
8775 base = ctxt->checkIndex;
8776 buf = ctxt->input->buf->buffer->content;
8777 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8778 base++) {
8779 if (quote != 0) {
8780 if (buf[base] == quote)
8781 quote = 0;
8782 continue;
8783 }
8784 if (buf[base] == '"') {
8785 quote = '"';
8786 continue;
8787 }
8788 if (buf[base] == '\'') {
8789 quote = '\'';
8790 continue;
8791 }
8792 if (buf[base] == ']') {
8793 if ((unsigned int) base +1 >=
8794 ctxt->input->buf->buffer->use)
8795 break;
8796 if (buf[base + 1] == ']') {
8797 /* conditional crap, skip both ']' ! */
8798 base++;
8799 continue;
8800 }
8801 for (i = 0;
8802 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8803 i++) {
8804 if (buf[base + i] == '>')
8805 goto found_end_int_subset;
8806 }
8807 break;
8808 }
8809 }
8810 /*
8811 * We didn't found the end of the Internal subset
8812 */
8813 if (quote == 0)
8814 ctxt->checkIndex = base;
8815#ifdef DEBUG_PUSH
8816 if (next == 0)
8817 xmlGenericError(xmlGenericErrorContext,
8818 "PP: lookup of int subset end filed\n");
8819#endif
8820 goto done;
8821
8822found_end_int_subset:
8823 xmlParseInternalSubset(ctxt);
8824 ctxt->inSubset = 2;
8825 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8826 (ctxt->sax->externalSubset != NULL))
8827 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8828 ctxt->extSubSystem, ctxt->extSubURI);
8829 ctxt->inSubset = 0;
8830 ctxt->instate = XML_PARSER_PROLOG;
8831 ctxt->checkIndex = 0;
8832#ifdef DEBUG_PUSH
8833 xmlGenericError(xmlGenericErrorContext,
8834 "PP: entering PROLOG\n");
8835#endif
8836 break;
8837 }
8838 case XML_PARSER_COMMENT:
8839 xmlGenericError(xmlGenericErrorContext,
8840 "PP: internal error, state == COMMENT\n");
8841 ctxt->instate = XML_PARSER_CONTENT;
8842#ifdef DEBUG_PUSH
8843 xmlGenericError(xmlGenericErrorContext,
8844 "PP: entering CONTENT\n");
8845#endif
8846 break;
8847 case XML_PARSER_PI:
8848 xmlGenericError(xmlGenericErrorContext,
8849 "PP: internal error, state == PI\n");
8850 ctxt->instate = XML_PARSER_CONTENT;
8851#ifdef DEBUG_PUSH
8852 xmlGenericError(xmlGenericErrorContext,
8853 "PP: entering CONTENT\n");
8854#endif
8855 break;
8856 case XML_PARSER_ENTITY_DECL:
8857 xmlGenericError(xmlGenericErrorContext,
8858 "PP: internal error, state == ENTITY_DECL\n");
8859 ctxt->instate = XML_PARSER_DTD;
8860#ifdef DEBUG_PUSH
8861 xmlGenericError(xmlGenericErrorContext,
8862 "PP: entering DTD\n");
8863#endif
8864 break;
8865 case XML_PARSER_ENTITY_VALUE:
8866 xmlGenericError(xmlGenericErrorContext,
8867 "PP: internal error, state == ENTITY_VALUE\n");
8868 ctxt->instate = XML_PARSER_CONTENT;
8869#ifdef DEBUG_PUSH
8870 xmlGenericError(xmlGenericErrorContext,
8871 "PP: entering DTD\n");
8872#endif
8873 break;
8874 case XML_PARSER_ATTRIBUTE_VALUE:
8875 xmlGenericError(xmlGenericErrorContext,
8876 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8877 ctxt->instate = XML_PARSER_START_TAG;
8878#ifdef DEBUG_PUSH
8879 xmlGenericError(xmlGenericErrorContext,
8880 "PP: entering START_TAG\n");
8881#endif
8882 break;
8883 case XML_PARSER_SYSTEM_LITERAL:
8884 xmlGenericError(xmlGenericErrorContext,
8885 "PP: internal error, state == SYSTEM_LITERAL\n");
8886 ctxt->instate = XML_PARSER_START_TAG;
8887#ifdef DEBUG_PUSH
8888 xmlGenericError(xmlGenericErrorContext,
8889 "PP: entering START_TAG\n");
8890#endif
8891 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008892 case XML_PARSER_PUBLIC_LITERAL:
8893 xmlGenericError(xmlGenericErrorContext,
8894 "PP: internal error, state == PUBLIC_LITERAL\n");
8895 ctxt->instate = XML_PARSER_START_TAG;
8896#ifdef DEBUG_PUSH
8897 xmlGenericError(xmlGenericErrorContext,
8898 "PP: entering START_TAG\n");
8899#endif
8900 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008901 }
8902 }
8903done:
8904#ifdef DEBUG_PUSH
8905 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8906#endif
8907 return(ret);
8908}
8909
8910/**
Owen Taylor3473f882001-02-23 17:55:21 +00008911 * xmlParseChunk:
8912 * @ctxt: an XML parser context
8913 * @chunk: an char array
8914 * @size: the size in byte of the chunk
8915 * @terminate: last chunk indicator
8916 *
8917 * Parse a Chunk of memory
8918 *
8919 * Returns zero if no error, the xmlParserErrors otherwise.
8920 */
8921int
8922xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8923 int terminate) {
8924 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8925 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8926 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8927 int cur = ctxt->input->cur - ctxt->input->base;
8928
8929 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8930 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8931 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008932 ctxt->input->end =
8933 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008934#ifdef DEBUG_PUSH
8935 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8936#endif
8937
8938 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8939 xmlParseTryOrFinish(ctxt, terminate);
8940 } else if (ctxt->instate != XML_PARSER_EOF) {
8941 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8942 xmlParserInputBufferPtr in = ctxt->input->buf;
8943 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8944 (in->raw != NULL)) {
8945 int nbchars;
8946
8947 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8948 if (nbchars < 0) {
8949 xmlGenericError(xmlGenericErrorContext,
8950 "xmlParseChunk: encoder error\n");
8951 return(XML_ERR_INVALID_ENCODING);
8952 }
8953 }
8954 }
8955 }
8956 xmlParseTryOrFinish(ctxt, terminate);
8957 if (terminate) {
8958 /*
8959 * Check for termination
8960 */
8961 if ((ctxt->instate != XML_PARSER_EOF) &&
8962 (ctxt->instate != XML_PARSER_EPILOG)) {
8963 ctxt->errNo = XML_ERR_DOCUMENT_END;
8964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8965 ctxt->sax->error(ctxt->userData,
8966 "Extra content at the end of the document\n");
8967 ctxt->wellFormed = 0;
8968 ctxt->disableSAX = 1;
8969 }
8970 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008971 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008972 ctxt->sax->endDocument(ctxt->userData);
8973 }
8974 ctxt->instate = XML_PARSER_EOF;
8975 }
8976 return((xmlParserErrors) ctxt->errNo);
8977}
8978
8979/************************************************************************
8980 * *
8981 * I/O front end functions to the parser *
8982 * *
8983 ************************************************************************/
8984
8985/**
8986 * xmlStopParser:
8987 * @ctxt: an XML parser context
8988 *
8989 * Blocks further parser processing
8990 */
8991void
8992xmlStopParser(xmlParserCtxtPtr ctxt) {
8993 ctxt->instate = XML_PARSER_EOF;
8994 if (ctxt->input != NULL)
8995 ctxt->input->cur = BAD_CAST"";
8996}
8997
8998/**
8999 * xmlCreatePushParserCtxt:
9000 * @sax: a SAX handler
9001 * @user_data: The user data returned on SAX callbacks
9002 * @chunk: a pointer to an array of chars
9003 * @size: number of chars in the array
9004 * @filename: an optional file name or URI
9005 *
9006 * Create a parser context for using the XML parser in push mode
9007 * To allow content encoding detection, @size should be >= 4
9008 * The value of @filename is used for fetching external entities
9009 * and error/warning reports.
9010 *
9011 * Returns the new parser context or NULL
9012 */
9013xmlParserCtxtPtr
9014xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9015 const char *chunk, int size, const char *filename) {
9016 xmlParserCtxtPtr ctxt;
9017 xmlParserInputPtr inputStream;
9018 xmlParserInputBufferPtr buf;
9019 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9020
9021 /*
9022 * plug some encoding conversion routines
9023 */
9024 if ((chunk != NULL) && (size >= 4))
9025 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9026
9027 buf = xmlAllocParserInputBuffer(enc);
9028 if (buf == NULL) return(NULL);
9029
9030 ctxt = xmlNewParserCtxt();
9031 if (ctxt == NULL) {
9032 xmlFree(buf);
9033 return(NULL);
9034 }
9035 if (sax != NULL) {
9036 if (ctxt->sax != &xmlDefaultSAXHandler)
9037 xmlFree(ctxt->sax);
9038 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9039 if (ctxt->sax == NULL) {
9040 xmlFree(buf);
9041 xmlFree(ctxt);
9042 return(NULL);
9043 }
9044 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9045 if (user_data != NULL)
9046 ctxt->userData = user_data;
9047 }
9048 if (filename == NULL) {
9049 ctxt->directory = NULL;
9050 } else {
9051 ctxt->directory = xmlParserGetDirectory(filename);
9052 }
9053
9054 inputStream = xmlNewInputStream(ctxt);
9055 if (inputStream == NULL) {
9056 xmlFreeParserCtxt(ctxt);
9057 return(NULL);
9058 }
9059
9060 if (filename == NULL)
9061 inputStream->filename = NULL;
9062 else
9063 inputStream->filename = xmlMemStrdup(filename);
9064 inputStream->buf = buf;
9065 inputStream->base = inputStream->buf->buffer->content;
9066 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009067 inputStream->end =
9068 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009069
9070 inputPush(ctxt, inputStream);
9071
9072 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9073 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009074 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9075 int cur = ctxt->input->cur - ctxt->input->base;
9076
Owen Taylor3473f882001-02-23 17:55:21 +00009077 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009078
9079 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9080 ctxt->input->cur = ctxt->input->base + cur;
9081 ctxt->input->end =
9082 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009083#ifdef DEBUG_PUSH
9084 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9085#endif
9086 }
9087
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009088 if (enc != XML_CHAR_ENCODING_NONE) {
9089 xmlSwitchEncoding(ctxt, enc);
9090 }
9091
Owen Taylor3473f882001-02-23 17:55:21 +00009092 return(ctxt);
9093}
9094
9095/**
9096 * xmlCreateIOParserCtxt:
9097 * @sax: a SAX handler
9098 * @user_data: The user data returned on SAX callbacks
9099 * @ioread: an I/O read function
9100 * @ioclose: an I/O close function
9101 * @ioctx: an I/O handler
9102 * @enc: the charset encoding if known
9103 *
9104 * Create a parser context for using the XML parser with an existing
9105 * I/O stream
9106 *
9107 * Returns the new parser context or NULL
9108 */
9109xmlParserCtxtPtr
9110xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9111 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9112 void *ioctx, xmlCharEncoding enc) {
9113 xmlParserCtxtPtr ctxt;
9114 xmlParserInputPtr inputStream;
9115 xmlParserInputBufferPtr buf;
9116
9117 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9118 if (buf == NULL) return(NULL);
9119
9120 ctxt = xmlNewParserCtxt();
9121 if (ctxt == NULL) {
9122 xmlFree(buf);
9123 return(NULL);
9124 }
9125 if (sax != NULL) {
9126 if (ctxt->sax != &xmlDefaultSAXHandler)
9127 xmlFree(ctxt->sax);
9128 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9129 if (ctxt->sax == NULL) {
9130 xmlFree(buf);
9131 xmlFree(ctxt);
9132 return(NULL);
9133 }
9134 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9135 if (user_data != NULL)
9136 ctxt->userData = user_data;
9137 }
9138
9139 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9140 if (inputStream == NULL) {
9141 xmlFreeParserCtxt(ctxt);
9142 return(NULL);
9143 }
9144 inputPush(ctxt, inputStream);
9145
9146 return(ctxt);
9147}
9148
9149/************************************************************************
9150 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009151 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009152 * *
9153 ************************************************************************/
9154
9155/**
9156 * xmlIOParseDTD:
9157 * @sax: the SAX handler block or NULL
9158 * @input: an Input Buffer
9159 * @enc: the charset encoding if known
9160 *
9161 * Load and parse a DTD
9162 *
9163 * Returns the resulting xmlDtdPtr or NULL in case of error.
9164 * @input will be freed at parsing end.
9165 */
9166
9167xmlDtdPtr
9168xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9169 xmlCharEncoding enc) {
9170 xmlDtdPtr ret = NULL;
9171 xmlParserCtxtPtr ctxt;
9172 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009173 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009174
9175 if (input == NULL)
9176 return(NULL);
9177
9178 ctxt = xmlNewParserCtxt();
9179 if (ctxt == NULL) {
9180 return(NULL);
9181 }
9182
9183 /*
9184 * Set-up the SAX context
9185 */
9186 if (sax != NULL) {
9187 if (ctxt->sax != NULL)
9188 xmlFree(ctxt->sax);
9189 ctxt->sax = sax;
9190 ctxt->userData = NULL;
9191 }
9192
9193 /*
9194 * generate a parser input from the I/O handler
9195 */
9196
9197 pinput = xmlNewIOInputStream(ctxt, input, enc);
9198 if (pinput == NULL) {
9199 if (sax != NULL) ctxt->sax = NULL;
9200 xmlFreeParserCtxt(ctxt);
9201 return(NULL);
9202 }
9203
9204 /*
9205 * plug some encoding conversion routines here.
9206 */
9207 xmlPushInput(ctxt, pinput);
9208
9209 pinput->filename = NULL;
9210 pinput->line = 1;
9211 pinput->col = 1;
9212 pinput->base = ctxt->input->cur;
9213 pinput->cur = ctxt->input->cur;
9214 pinput->free = NULL;
9215
9216 /*
9217 * let's parse that entity knowing it's an external subset.
9218 */
9219 ctxt->inSubset = 2;
9220 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9221 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9222 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009223
9224 if (enc == XML_CHAR_ENCODING_NONE) {
9225 /*
9226 * Get the 4 first bytes and decode the charset
9227 * if enc != XML_CHAR_ENCODING_NONE
9228 * plug some encoding conversion routines.
9229 */
9230 start[0] = RAW;
9231 start[1] = NXT(1);
9232 start[2] = NXT(2);
9233 start[3] = NXT(3);
9234 enc = xmlDetectCharEncoding(start, 4);
9235 if (enc != XML_CHAR_ENCODING_NONE) {
9236 xmlSwitchEncoding(ctxt, enc);
9237 }
9238 }
9239
Owen Taylor3473f882001-02-23 17:55:21 +00009240 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9241
9242 if (ctxt->myDoc != NULL) {
9243 if (ctxt->wellFormed) {
9244 ret = ctxt->myDoc->extSubset;
9245 ctxt->myDoc->extSubset = NULL;
9246 } else {
9247 ret = NULL;
9248 }
9249 xmlFreeDoc(ctxt->myDoc);
9250 ctxt->myDoc = NULL;
9251 }
9252 if (sax != NULL) ctxt->sax = NULL;
9253 xmlFreeParserCtxt(ctxt);
9254
9255 return(ret);
9256}
9257
9258/**
9259 * xmlSAXParseDTD:
9260 * @sax: the SAX handler block
9261 * @ExternalID: a NAME* containing the External ID of the DTD
9262 * @SystemID: a NAME* containing the URL to the DTD
9263 *
9264 * Load and parse an external subset.
9265 *
9266 * Returns the resulting xmlDtdPtr or NULL in case of error.
9267 */
9268
9269xmlDtdPtr
9270xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9271 const xmlChar *SystemID) {
9272 xmlDtdPtr ret = NULL;
9273 xmlParserCtxtPtr ctxt;
9274 xmlParserInputPtr input = NULL;
9275 xmlCharEncoding enc;
9276
9277 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9278
9279 ctxt = xmlNewParserCtxt();
9280 if (ctxt == NULL) {
9281 return(NULL);
9282 }
9283
9284 /*
9285 * Set-up the SAX context
9286 */
9287 if (sax != NULL) {
9288 if (ctxt->sax != NULL)
9289 xmlFree(ctxt->sax);
9290 ctxt->sax = sax;
9291 ctxt->userData = NULL;
9292 }
9293
9294 /*
9295 * Ask the Entity resolver to load the damn thing
9296 */
9297
9298 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9299 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9300 if (input == NULL) {
9301 if (sax != NULL) ctxt->sax = NULL;
9302 xmlFreeParserCtxt(ctxt);
9303 return(NULL);
9304 }
9305
9306 /*
9307 * plug some encoding conversion routines here.
9308 */
9309 xmlPushInput(ctxt, input);
9310 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9311 xmlSwitchEncoding(ctxt, enc);
9312
9313 if (input->filename == NULL)
9314 input->filename = (char *) xmlStrdup(SystemID);
9315 input->line = 1;
9316 input->col = 1;
9317 input->base = ctxt->input->cur;
9318 input->cur = ctxt->input->cur;
9319 input->free = NULL;
9320
9321 /*
9322 * let's parse that entity knowing it's an external subset.
9323 */
9324 ctxt->inSubset = 2;
9325 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9326 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9327 ExternalID, SystemID);
9328 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9329
9330 if (ctxt->myDoc != NULL) {
9331 if (ctxt->wellFormed) {
9332 ret = ctxt->myDoc->extSubset;
9333 ctxt->myDoc->extSubset = NULL;
9334 } else {
9335 ret = NULL;
9336 }
9337 xmlFreeDoc(ctxt->myDoc);
9338 ctxt->myDoc = NULL;
9339 }
9340 if (sax != NULL) ctxt->sax = NULL;
9341 xmlFreeParserCtxt(ctxt);
9342
9343 return(ret);
9344}
9345
9346/**
9347 * xmlParseDTD:
9348 * @ExternalID: a NAME* containing the External ID of the DTD
9349 * @SystemID: a NAME* containing the URL to the DTD
9350 *
9351 * Load and parse an external subset.
9352 *
9353 * Returns the resulting xmlDtdPtr or NULL in case of error.
9354 */
9355
9356xmlDtdPtr
9357xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9358 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9359}
9360
9361/************************************************************************
9362 * *
9363 * Front ends when parsing an Entity *
9364 * *
9365 ************************************************************************/
9366
9367/**
Owen Taylor3473f882001-02-23 17:55:21 +00009368 * xmlParseCtxtExternalEntity:
9369 * @ctx: the existing parsing context
9370 * @URL: the URL for the entity to load
9371 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009372 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009373 *
9374 * Parse an external general entity within an existing parsing context
9375 * An external general parsed entity is well-formed if it matches the
9376 * production labeled extParsedEnt.
9377 *
9378 * [78] extParsedEnt ::= TextDecl? content
9379 *
9380 * Returns 0 if the entity is well formed, -1 in case of args problem and
9381 * the parser error code otherwise
9382 */
9383
9384int
9385xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009386 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009387 xmlParserCtxtPtr ctxt;
9388 xmlDocPtr newDoc;
9389 xmlSAXHandlerPtr oldsax = NULL;
9390 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009391 xmlChar start[4];
9392 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009393
9394 if (ctx->depth > 40) {
9395 return(XML_ERR_ENTITY_LOOP);
9396 }
9397
Daniel Veillardcda96922001-08-21 10:56:31 +00009398 if (lst != NULL)
9399 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009400 if ((URL == NULL) && (ID == NULL))
9401 return(-1);
9402 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9403 return(-1);
9404
9405
9406 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9407 if (ctxt == NULL) return(-1);
9408 ctxt->userData = ctxt;
9409 oldsax = ctxt->sax;
9410 ctxt->sax = ctx->sax;
9411 newDoc = xmlNewDoc(BAD_CAST "1.0");
9412 if (newDoc == NULL) {
9413 xmlFreeParserCtxt(ctxt);
9414 return(-1);
9415 }
9416 if (ctx->myDoc != NULL) {
9417 newDoc->intSubset = ctx->myDoc->intSubset;
9418 newDoc->extSubset = ctx->myDoc->extSubset;
9419 }
9420 if (ctx->myDoc->URL != NULL) {
9421 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9422 }
9423 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9424 if (newDoc->children == NULL) {
9425 ctxt->sax = oldsax;
9426 xmlFreeParserCtxt(ctxt);
9427 newDoc->intSubset = NULL;
9428 newDoc->extSubset = NULL;
9429 xmlFreeDoc(newDoc);
9430 return(-1);
9431 }
9432 nodePush(ctxt, newDoc->children);
9433 if (ctx->myDoc == NULL) {
9434 ctxt->myDoc = newDoc;
9435 } else {
9436 ctxt->myDoc = ctx->myDoc;
9437 newDoc->children->doc = ctx->myDoc;
9438 }
9439
Daniel Veillard87a764e2001-06-20 17:41:10 +00009440 /*
9441 * Get the 4 first bytes and decode the charset
9442 * if enc != XML_CHAR_ENCODING_NONE
9443 * plug some encoding conversion routines.
9444 */
9445 GROW
9446 start[0] = RAW;
9447 start[1] = NXT(1);
9448 start[2] = NXT(2);
9449 start[3] = NXT(3);
9450 enc = xmlDetectCharEncoding(start, 4);
9451 if (enc != XML_CHAR_ENCODING_NONE) {
9452 xmlSwitchEncoding(ctxt, enc);
9453 }
9454
Owen Taylor3473f882001-02-23 17:55:21 +00009455 /*
9456 * Parse a possible text declaration first
9457 */
Owen Taylor3473f882001-02-23 17:55:21 +00009458 if ((RAW == '<') && (NXT(1) == '?') &&
9459 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9460 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9461 xmlParseTextDecl(ctxt);
9462 }
9463
9464 /*
9465 * Doing validity checking on chunk doesn't make sense
9466 */
9467 ctxt->instate = XML_PARSER_CONTENT;
9468 ctxt->validate = ctx->validate;
9469 ctxt->loadsubset = ctx->loadsubset;
9470 ctxt->depth = ctx->depth + 1;
9471 ctxt->replaceEntities = ctx->replaceEntities;
9472 if (ctxt->validate) {
9473 ctxt->vctxt.error = ctx->vctxt.error;
9474 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009475 } else {
9476 ctxt->vctxt.error = NULL;
9477 ctxt->vctxt.warning = NULL;
9478 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009479 ctxt->vctxt.nodeTab = NULL;
9480 ctxt->vctxt.nodeNr = 0;
9481 ctxt->vctxt.nodeMax = 0;
9482 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009483
9484 xmlParseContent(ctxt);
9485
9486 if ((RAW == '<') && (NXT(1) == '/')) {
9487 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9488 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9489 ctxt->sax->error(ctxt->userData,
9490 "chunk is not well balanced\n");
9491 ctxt->wellFormed = 0;
9492 ctxt->disableSAX = 1;
9493 } else if (RAW != 0) {
9494 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9495 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9496 ctxt->sax->error(ctxt->userData,
9497 "extra content at the end of well balanced chunk\n");
9498 ctxt->wellFormed = 0;
9499 ctxt->disableSAX = 1;
9500 }
9501 if (ctxt->node != newDoc->children) {
9502 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9503 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9504 ctxt->sax->error(ctxt->userData,
9505 "chunk is not well balanced\n");
9506 ctxt->wellFormed = 0;
9507 ctxt->disableSAX = 1;
9508 }
9509
9510 if (!ctxt->wellFormed) {
9511 if (ctxt->errNo == 0)
9512 ret = 1;
9513 else
9514 ret = ctxt->errNo;
9515 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009516 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009517 xmlNodePtr cur;
9518
9519 /*
9520 * Return the newly created nodeset after unlinking it from
9521 * they pseudo parent.
9522 */
9523 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009524 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009525 while (cur != NULL) {
9526 cur->parent = NULL;
9527 cur = cur->next;
9528 }
9529 newDoc->children->children = NULL;
9530 }
9531 ret = 0;
9532 }
9533 ctxt->sax = oldsax;
9534 xmlFreeParserCtxt(ctxt);
9535 newDoc->intSubset = NULL;
9536 newDoc->extSubset = NULL;
9537 xmlFreeDoc(newDoc);
9538
9539 return(ret);
9540}
9541
9542/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009543 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009544 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009545 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009546 * @sax: the SAX handler bloc (possibly NULL)
9547 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9548 * @depth: Used for loop detection, use 0
9549 * @URL: the URL for the entity to load
9550 * @ID: the System ID for the entity to load
9551 * @list: the return value for the set of parsed nodes
9552 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009553 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009554 *
9555 * Returns 0 if the entity is well formed, -1 in case of args problem and
9556 * the parser error code otherwise
9557 */
9558
Daniel Veillard257d9102001-05-08 10:41:44 +00009559static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009560xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9561 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009562 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009563 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009564 xmlParserCtxtPtr ctxt;
9565 xmlDocPtr newDoc;
9566 xmlSAXHandlerPtr oldsax = NULL;
9567 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009568 xmlChar start[4];
9569 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009570
9571 if (depth > 40) {
9572 return(XML_ERR_ENTITY_LOOP);
9573 }
9574
9575
9576
9577 if (list != NULL)
9578 *list = NULL;
9579 if ((URL == NULL) && (ID == NULL))
9580 return(-1);
9581 if (doc == NULL) /* @@ relax but check for dereferences */
9582 return(-1);
9583
9584
9585 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9586 if (ctxt == NULL) return(-1);
9587 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009588 if (oldctxt != NULL) {
9589 ctxt->_private = oldctxt->_private;
9590 ctxt->loadsubset = oldctxt->loadsubset;
9591 ctxt->validate = oldctxt->validate;
9592 ctxt->external = oldctxt->external;
9593 } else {
9594 /*
9595 * Doing validity checking on chunk without context
9596 * doesn't make sense
9597 */
9598 ctxt->_private = NULL;
9599 ctxt->validate = 0;
9600 ctxt->external = 2;
9601 ctxt->loadsubset = 0;
9602 }
Owen Taylor3473f882001-02-23 17:55:21 +00009603 if (sax != NULL) {
9604 oldsax = ctxt->sax;
9605 ctxt->sax = sax;
9606 if (user_data != NULL)
9607 ctxt->userData = user_data;
9608 }
9609 newDoc = xmlNewDoc(BAD_CAST "1.0");
9610 if (newDoc == NULL) {
9611 xmlFreeParserCtxt(ctxt);
9612 return(-1);
9613 }
9614 if (doc != NULL) {
9615 newDoc->intSubset = doc->intSubset;
9616 newDoc->extSubset = doc->extSubset;
9617 }
9618 if (doc->URL != NULL) {
9619 newDoc->URL = xmlStrdup(doc->URL);
9620 }
9621 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9622 if (newDoc->children == NULL) {
9623 if (sax != NULL)
9624 ctxt->sax = oldsax;
9625 xmlFreeParserCtxt(ctxt);
9626 newDoc->intSubset = NULL;
9627 newDoc->extSubset = NULL;
9628 xmlFreeDoc(newDoc);
9629 return(-1);
9630 }
9631 nodePush(ctxt, newDoc->children);
9632 if (doc == NULL) {
9633 ctxt->myDoc = newDoc;
9634 } else {
9635 ctxt->myDoc = doc;
9636 newDoc->children->doc = doc;
9637 }
9638
Daniel Veillard87a764e2001-06-20 17:41:10 +00009639 /*
9640 * Get the 4 first bytes and decode the charset
9641 * if enc != XML_CHAR_ENCODING_NONE
9642 * plug some encoding conversion routines.
9643 */
9644 GROW;
9645 start[0] = RAW;
9646 start[1] = NXT(1);
9647 start[2] = NXT(2);
9648 start[3] = NXT(3);
9649 enc = xmlDetectCharEncoding(start, 4);
9650 if (enc != XML_CHAR_ENCODING_NONE) {
9651 xmlSwitchEncoding(ctxt, enc);
9652 }
9653
Owen Taylor3473f882001-02-23 17:55:21 +00009654 /*
9655 * Parse a possible text declaration first
9656 */
Owen Taylor3473f882001-02-23 17:55:21 +00009657 if ((RAW == '<') && (NXT(1) == '?') &&
9658 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9659 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9660 xmlParseTextDecl(ctxt);
9661 }
9662
Owen Taylor3473f882001-02-23 17:55:21 +00009663 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009664 ctxt->depth = depth;
9665
9666 xmlParseContent(ctxt);
9667
Daniel Veillard561b7f82002-03-20 21:55:57 +00009668 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009669 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9671 ctxt->sax->error(ctxt->userData,
9672 "chunk is not well balanced\n");
9673 ctxt->wellFormed = 0;
9674 ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009675 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009676 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9678 ctxt->sax->error(ctxt->userData,
9679 "extra content at the end of well balanced chunk\n");
9680 ctxt->wellFormed = 0;
9681 ctxt->disableSAX = 1;
9682 }
9683 if (ctxt->node != newDoc->children) {
9684 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9686 ctxt->sax->error(ctxt->userData,
9687 "chunk is not well balanced\n");
9688 ctxt->wellFormed = 0;
9689 ctxt->disableSAX = 1;
9690 }
9691
9692 if (!ctxt->wellFormed) {
9693 if (ctxt->errNo == 0)
9694 ret = 1;
9695 else
9696 ret = ctxt->errNo;
9697 } else {
9698 if (list != NULL) {
9699 xmlNodePtr cur;
9700
9701 /*
9702 * Return the newly created nodeset after unlinking it from
9703 * they pseudo parent.
9704 */
9705 cur = newDoc->children->children;
9706 *list = cur;
9707 while (cur != NULL) {
9708 cur->parent = NULL;
9709 cur = cur->next;
9710 }
9711 newDoc->children->children = NULL;
9712 }
9713 ret = 0;
9714 }
9715 if (sax != NULL)
9716 ctxt->sax = oldsax;
9717 xmlFreeParserCtxt(ctxt);
9718 newDoc->intSubset = NULL;
9719 newDoc->extSubset = NULL;
9720 xmlFreeDoc(newDoc);
9721
9722 return(ret);
9723}
9724
9725/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009726 * xmlParseExternalEntity:
9727 * @doc: the document the chunk pertains to
9728 * @sax: the SAX handler bloc (possibly NULL)
9729 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9730 * @depth: Used for loop detection, use 0
9731 * @URL: the URL for the entity to load
9732 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009733 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009734 *
9735 * Parse an external general entity
9736 * An external general parsed entity is well-formed if it matches the
9737 * production labeled extParsedEnt.
9738 *
9739 * [78] extParsedEnt ::= TextDecl? content
9740 *
9741 * Returns 0 if the entity is well formed, -1 in case of args problem and
9742 * the parser error code otherwise
9743 */
9744
9745int
9746xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009747 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009748 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009749 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009750}
9751
9752/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009753 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009754 * @doc: the document the chunk pertains to
9755 * @sax: the SAX handler bloc (possibly NULL)
9756 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9757 * @depth: Used for loop detection, use 0
9758 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009759 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009760 *
9761 * Parse a well-balanced chunk of an XML document
9762 * called by the parser
9763 * The allowed sequence for the Well Balanced Chunk is the one defined by
9764 * the content production in the XML grammar:
9765 *
9766 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9767 *
9768 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9769 * the parser error code otherwise
9770 */
9771
9772int
9773xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009774 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009775 xmlParserCtxtPtr ctxt;
9776 xmlDocPtr newDoc;
9777 xmlSAXHandlerPtr oldsax = NULL;
9778 int size;
9779 int ret = 0;
9780
9781 if (depth > 40) {
9782 return(XML_ERR_ENTITY_LOOP);
9783 }
9784
9785
Daniel Veillardcda96922001-08-21 10:56:31 +00009786 if (lst != NULL)
9787 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009788 if (string == NULL)
9789 return(-1);
9790
9791 size = xmlStrlen(string);
9792
9793 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9794 if (ctxt == NULL) return(-1);
9795 ctxt->userData = ctxt;
9796 if (sax != NULL) {
9797 oldsax = ctxt->sax;
9798 ctxt->sax = sax;
9799 if (user_data != NULL)
9800 ctxt->userData = user_data;
9801 }
9802 newDoc = xmlNewDoc(BAD_CAST "1.0");
9803 if (newDoc == NULL) {
9804 xmlFreeParserCtxt(ctxt);
9805 return(-1);
9806 }
9807 if (doc != NULL) {
9808 newDoc->intSubset = doc->intSubset;
9809 newDoc->extSubset = doc->extSubset;
9810 }
9811 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9812 if (newDoc->children == NULL) {
9813 if (sax != NULL)
9814 ctxt->sax = oldsax;
9815 xmlFreeParserCtxt(ctxt);
9816 newDoc->intSubset = NULL;
9817 newDoc->extSubset = NULL;
9818 xmlFreeDoc(newDoc);
9819 return(-1);
9820 }
9821 nodePush(ctxt, newDoc->children);
9822 if (doc == NULL) {
9823 ctxt->myDoc = newDoc;
9824 } else {
9825 ctxt->myDoc = doc;
9826 newDoc->children->doc = doc;
9827 }
9828 ctxt->instate = XML_PARSER_CONTENT;
9829 ctxt->depth = depth;
9830
9831 /*
9832 * Doing validity checking on chunk doesn't make sense
9833 */
9834 ctxt->validate = 0;
9835 ctxt->loadsubset = 0;
9836
9837 xmlParseContent(ctxt);
9838
9839 if ((RAW == '<') && (NXT(1) == '/')) {
9840 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9842 ctxt->sax->error(ctxt->userData,
9843 "chunk is not well balanced\n");
9844 ctxt->wellFormed = 0;
9845 ctxt->disableSAX = 1;
9846 } else if (RAW != 0) {
9847 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9849 ctxt->sax->error(ctxt->userData,
9850 "extra content at the end of well balanced chunk\n");
9851 ctxt->wellFormed = 0;
9852 ctxt->disableSAX = 1;
9853 }
9854 if (ctxt->node != newDoc->children) {
9855 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9857 ctxt->sax->error(ctxt->userData,
9858 "chunk is not well balanced\n");
9859 ctxt->wellFormed = 0;
9860 ctxt->disableSAX = 1;
9861 }
9862
9863 if (!ctxt->wellFormed) {
9864 if (ctxt->errNo == 0)
9865 ret = 1;
9866 else
9867 ret = ctxt->errNo;
9868 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009869 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009870 xmlNodePtr cur;
9871
9872 /*
9873 * Return the newly created nodeset after unlinking it from
9874 * they pseudo parent.
9875 */
9876 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009877 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009878 while (cur != NULL) {
9879 cur->parent = NULL;
9880 cur = cur->next;
9881 }
9882 newDoc->children->children = NULL;
9883 }
9884 ret = 0;
9885 }
9886 if (sax != NULL)
9887 ctxt->sax = oldsax;
9888 xmlFreeParserCtxt(ctxt);
9889 newDoc->intSubset = NULL;
9890 newDoc->extSubset = NULL;
9891 xmlFreeDoc(newDoc);
9892
9893 return(ret);
9894}
9895
9896/**
9897 * xmlSAXParseEntity:
9898 * @sax: the SAX handler block
9899 * @filename: the filename
9900 *
9901 * parse an XML external entity out of context and build a tree.
9902 * It use the given SAX function block to handle the parsing callback.
9903 * If sax is NULL, fallback to the default DOM tree building routines.
9904 *
9905 * [78] extParsedEnt ::= TextDecl? content
9906 *
9907 * This correspond to a "Well Balanced" chunk
9908 *
9909 * Returns the resulting document tree
9910 */
9911
9912xmlDocPtr
9913xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9914 xmlDocPtr ret;
9915 xmlParserCtxtPtr ctxt;
9916 char *directory = NULL;
9917
9918 ctxt = xmlCreateFileParserCtxt(filename);
9919 if (ctxt == NULL) {
9920 return(NULL);
9921 }
9922 if (sax != NULL) {
9923 if (ctxt->sax != NULL)
9924 xmlFree(ctxt->sax);
9925 ctxt->sax = sax;
9926 ctxt->userData = NULL;
9927 }
9928
9929 if ((ctxt->directory == NULL) && (directory == NULL))
9930 directory = xmlParserGetDirectory(filename);
9931
9932 xmlParseExtParsedEnt(ctxt);
9933
9934 if (ctxt->wellFormed)
9935 ret = ctxt->myDoc;
9936 else {
9937 ret = NULL;
9938 xmlFreeDoc(ctxt->myDoc);
9939 ctxt->myDoc = NULL;
9940 }
9941 if (sax != NULL)
9942 ctxt->sax = NULL;
9943 xmlFreeParserCtxt(ctxt);
9944
9945 return(ret);
9946}
9947
9948/**
9949 * xmlParseEntity:
9950 * @filename: the filename
9951 *
9952 * parse an XML external entity out of context and build a tree.
9953 *
9954 * [78] extParsedEnt ::= TextDecl? content
9955 *
9956 * This correspond to a "Well Balanced" chunk
9957 *
9958 * Returns the resulting document tree
9959 */
9960
9961xmlDocPtr
9962xmlParseEntity(const char *filename) {
9963 return(xmlSAXParseEntity(NULL, filename));
9964}
9965
9966/**
9967 * xmlCreateEntityParserCtxt:
9968 * @URL: the entity URL
9969 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009970 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009971 *
9972 * Create a parser context for an external entity
9973 * Automatic support for ZLIB/Compress compressed document is provided
9974 * by default if found at compile-time.
9975 *
9976 * Returns the new parser context or NULL
9977 */
9978xmlParserCtxtPtr
9979xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9980 const xmlChar *base) {
9981 xmlParserCtxtPtr ctxt;
9982 xmlParserInputPtr inputStream;
9983 char *directory = NULL;
9984 xmlChar *uri;
9985
9986 ctxt = xmlNewParserCtxt();
9987 if (ctxt == NULL) {
9988 return(NULL);
9989 }
9990
9991 uri = xmlBuildURI(URL, base);
9992
9993 if (uri == NULL) {
9994 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9995 if (inputStream == NULL) {
9996 xmlFreeParserCtxt(ctxt);
9997 return(NULL);
9998 }
9999
10000 inputPush(ctxt, inputStream);
10001
10002 if ((ctxt->directory == NULL) && (directory == NULL))
10003 directory = xmlParserGetDirectory((char *)URL);
10004 if ((ctxt->directory == NULL) && (directory != NULL))
10005 ctxt->directory = directory;
10006 } else {
10007 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10008 if (inputStream == NULL) {
10009 xmlFree(uri);
10010 xmlFreeParserCtxt(ctxt);
10011 return(NULL);
10012 }
10013
10014 inputPush(ctxt, inputStream);
10015
10016 if ((ctxt->directory == NULL) && (directory == NULL))
10017 directory = xmlParserGetDirectory((char *)uri);
10018 if ((ctxt->directory == NULL) && (directory != NULL))
10019 ctxt->directory = directory;
10020 xmlFree(uri);
10021 }
10022
10023 return(ctxt);
10024}
10025
10026/************************************************************************
10027 * *
10028 * Front ends when parsing from a file *
10029 * *
10030 ************************************************************************/
10031
10032/**
10033 * xmlCreateFileParserCtxt:
10034 * @filename: the filename
10035 *
10036 * Create a parser context for a file content.
10037 * Automatic support for ZLIB/Compress compressed document is provided
10038 * by default if found at compile-time.
10039 *
10040 * Returns the new parser context or NULL
10041 */
10042xmlParserCtxtPtr
10043xmlCreateFileParserCtxt(const char *filename)
10044{
10045 xmlParserCtxtPtr ctxt;
10046 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010047 char *directory = NULL;
10048
Owen Taylor3473f882001-02-23 17:55:21 +000010049 ctxt = xmlNewParserCtxt();
10050 if (ctxt == NULL) {
10051 if (xmlDefaultSAXHandler.error != NULL) {
10052 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10053 }
10054 return(NULL);
10055 }
10056
Daniel Veillard9f7b84b2001-08-23 15:31:19 +000010057 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010058 if (inputStream == NULL) {
10059 xmlFreeParserCtxt(ctxt);
10060 return(NULL);
10061 }
10062
Owen Taylor3473f882001-02-23 17:55:21 +000010063 inputPush(ctxt, inputStream);
10064 if ((ctxt->directory == NULL) && (directory == NULL))
10065 directory = xmlParserGetDirectory(filename);
10066 if ((ctxt->directory == NULL) && (directory != NULL))
10067 ctxt->directory = directory;
10068
10069 return(ctxt);
10070}
10071
10072/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010073 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010074 * @sax: the SAX handler block
10075 * @filename: the filename
10076 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10077 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010078 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010079 *
10080 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10081 * compressed document is provided by default if found at compile-time.
10082 * It use the given SAX function block to handle the parsing callback.
10083 * If sax is NULL, fallback to the default DOM tree building routines.
10084 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010085 * User data (void *) is stored within the parser context in the
10086 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010087 *
Owen Taylor3473f882001-02-23 17:55:21 +000010088 * Returns the resulting document tree
10089 */
10090
10091xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010092xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10093 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010094 xmlDocPtr ret;
10095 xmlParserCtxtPtr ctxt;
10096 char *directory = NULL;
10097
Daniel Veillard635ef722001-10-29 11:48:19 +000010098 xmlInitParser();
10099
Owen Taylor3473f882001-02-23 17:55:21 +000010100 ctxt = xmlCreateFileParserCtxt(filename);
10101 if (ctxt == NULL) {
10102 return(NULL);
10103 }
10104 if (sax != NULL) {
10105 if (ctxt->sax != NULL)
10106 xmlFree(ctxt->sax);
10107 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010108 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010109 if (data!=NULL) {
10110 ctxt->_private=data;
10111 }
Owen Taylor3473f882001-02-23 17:55:21 +000010112
10113 if ((ctxt->directory == NULL) && (directory == NULL))
10114 directory = xmlParserGetDirectory(filename);
10115 if ((ctxt->directory == NULL) && (directory != NULL))
10116 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10117
10118 xmlParseDocument(ctxt);
10119
10120 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10121 else {
10122 ret = NULL;
10123 xmlFreeDoc(ctxt->myDoc);
10124 ctxt->myDoc = NULL;
10125 }
10126 if (sax != NULL)
10127 ctxt->sax = NULL;
10128 xmlFreeParserCtxt(ctxt);
10129
10130 return(ret);
10131}
10132
10133/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010134 * xmlSAXParseFile:
10135 * @sax: the SAX handler block
10136 * @filename: the filename
10137 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10138 * documents
10139 *
10140 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10141 * compressed document is provided by default if found at compile-time.
10142 * It use the given SAX function block to handle the parsing callback.
10143 * If sax is NULL, fallback to the default DOM tree building routines.
10144 *
10145 * Returns the resulting document tree
10146 */
10147
10148xmlDocPtr
10149xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10150 int recovery) {
10151 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10152}
10153
10154/**
Owen Taylor3473f882001-02-23 17:55:21 +000010155 * xmlRecoverDoc:
10156 * @cur: a pointer to an array of xmlChar
10157 *
10158 * parse an XML in-memory document and build a tree.
10159 * In the case the document is not Well Formed, a tree is built anyway
10160 *
10161 * Returns the resulting document tree
10162 */
10163
10164xmlDocPtr
10165xmlRecoverDoc(xmlChar *cur) {
10166 return(xmlSAXParseDoc(NULL, cur, 1));
10167}
10168
10169/**
10170 * xmlParseFile:
10171 * @filename: the filename
10172 *
10173 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10174 * compressed document is provided by default if found at compile-time.
10175 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010176 * Returns the resulting document tree if the file was wellformed,
10177 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010178 */
10179
10180xmlDocPtr
10181xmlParseFile(const char *filename) {
10182 return(xmlSAXParseFile(NULL, filename, 0));
10183}
10184
10185/**
10186 * xmlRecoverFile:
10187 * @filename: the filename
10188 *
10189 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10190 * compressed document is provided by default if found at compile-time.
10191 * In the case the document is not Well Formed, a tree is built anyway
10192 *
10193 * Returns the resulting document tree
10194 */
10195
10196xmlDocPtr
10197xmlRecoverFile(const char *filename) {
10198 return(xmlSAXParseFile(NULL, filename, 1));
10199}
10200
10201
10202/**
10203 * xmlSetupParserForBuffer:
10204 * @ctxt: an XML parser context
10205 * @buffer: a xmlChar * buffer
10206 * @filename: a file name
10207 *
10208 * Setup the parser context to parse a new buffer; Clears any prior
10209 * contents from the parser context. The buffer parameter must not be
10210 * NULL, but the filename parameter can be
10211 */
10212void
10213xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10214 const char* filename)
10215{
10216 xmlParserInputPtr input;
10217
10218 input = xmlNewInputStream(ctxt);
10219 if (input == NULL) {
10220 perror("malloc");
10221 xmlFree(ctxt);
10222 return;
10223 }
10224
10225 xmlClearParserCtxt(ctxt);
10226 if (filename != NULL)
10227 input->filename = xmlMemStrdup(filename);
10228 input->base = buffer;
10229 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010230 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010231 inputPush(ctxt, input);
10232}
10233
10234/**
10235 * xmlSAXUserParseFile:
10236 * @sax: a SAX handler
10237 * @user_data: The user data returned on SAX callbacks
10238 * @filename: a file name
10239 *
10240 * parse an XML file and call the given SAX handler routines.
10241 * Automatic support for ZLIB/Compress compressed document is provided
10242 *
10243 * Returns 0 in case of success or a error number otherwise
10244 */
10245int
10246xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10247 const char *filename) {
10248 int ret = 0;
10249 xmlParserCtxtPtr ctxt;
10250
10251 ctxt = xmlCreateFileParserCtxt(filename);
10252 if (ctxt == NULL) return -1;
10253 if (ctxt->sax != &xmlDefaultSAXHandler)
10254 xmlFree(ctxt->sax);
10255 ctxt->sax = sax;
10256 if (user_data != NULL)
10257 ctxt->userData = user_data;
10258
10259 xmlParseDocument(ctxt);
10260
10261 if (ctxt->wellFormed)
10262 ret = 0;
10263 else {
10264 if (ctxt->errNo != 0)
10265 ret = ctxt->errNo;
10266 else
10267 ret = -1;
10268 }
10269 if (sax != NULL)
10270 ctxt->sax = NULL;
10271 xmlFreeParserCtxt(ctxt);
10272
10273 return ret;
10274}
10275
10276/************************************************************************
10277 * *
10278 * Front ends when parsing from memory *
10279 * *
10280 ************************************************************************/
10281
10282/**
10283 * xmlCreateMemoryParserCtxt:
10284 * @buffer: a pointer to a char array
10285 * @size: the size of the array
10286 *
10287 * Create a parser context for an XML in-memory document.
10288 *
10289 * Returns the new parser context or NULL
10290 */
10291xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010292xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010293 xmlParserCtxtPtr ctxt;
10294 xmlParserInputPtr input;
10295 xmlParserInputBufferPtr buf;
10296
10297 if (buffer == NULL)
10298 return(NULL);
10299 if (size <= 0)
10300 return(NULL);
10301
10302 ctxt = xmlNewParserCtxt();
10303 if (ctxt == NULL)
10304 return(NULL);
10305
10306 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10307 if (buf == NULL) return(NULL);
10308
10309 input = xmlNewInputStream(ctxt);
10310 if (input == NULL) {
10311 xmlFreeParserCtxt(ctxt);
10312 return(NULL);
10313 }
10314
10315 input->filename = NULL;
10316 input->buf = buf;
10317 input->base = input->buf->buffer->content;
10318 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010319 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010320
10321 inputPush(ctxt, input);
10322 return(ctxt);
10323}
10324
10325/**
10326 * xmlSAXParseMemory:
10327 * @sax: the SAX handler block
10328 * @buffer: an pointer to a char array
10329 * @size: the size of the array
10330 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10331 * documents
10332 *
10333 * parse an XML in-memory block and use the given SAX function block
10334 * to handle the parsing callback. If sax is NULL, fallback to the default
10335 * DOM tree building routines.
10336 *
10337 * Returns the resulting document tree
10338 */
10339xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010340xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10341 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010342 xmlDocPtr ret;
10343 xmlParserCtxtPtr ctxt;
10344
10345 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10346 if (ctxt == NULL) return(NULL);
10347 if (sax != NULL) {
10348 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010349 }
10350
10351 xmlParseDocument(ctxt);
10352
10353 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10354 else {
10355 ret = NULL;
10356 xmlFreeDoc(ctxt->myDoc);
10357 ctxt->myDoc = NULL;
10358 }
10359 if (sax != NULL)
10360 ctxt->sax = NULL;
10361 xmlFreeParserCtxt(ctxt);
10362
10363 return(ret);
10364}
10365
10366/**
10367 * xmlParseMemory:
10368 * @buffer: an pointer to a char array
10369 * @size: the size of the array
10370 *
10371 * parse an XML in-memory block and build a tree.
10372 *
10373 * Returns the resulting document tree
10374 */
10375
Daniel Veillard50822cb2001-07-26 20:05:51 +000010376xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010377 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10378}
10379
10380/**
10381 * xmlRecoverMemory:
10382 * @buffer: an pointer to a char array
10383 * @size: the size of the array
10384 *
10385 * parse an XML in-memory block and build a tree.
10386 * In the case the document is not Well Formed, a tree is built anyway
10387 *
10388 * Returns the resulting document tree
10389 */
10390
Daniel Veillard50822cb2001-07-26 20:05:51 +000010391xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010392 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10393}
10394
10395/**
10396 * xmlSAXUserParseMemory:
10397 * @sax: a SAX handler
10398 * @user_data: The user data returned on SAX callbacks
10399 * @buffer: an in-memory XML document input
10400 * @size: the length of the XML document in bytes
10401 *
10402 * A better SAX parsing routine.
10403 * parse an XML in-memory buffer and call the given SAX handler routines.
10404 *
10405 * Returns 0 in case of success or a error number otherwise
10406 */
10407int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010408 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010409 int ret = 0;
10410 xmlParserCtxtPtr ctxt;
10411 xmlSAXHandlerPtr oldsax = NULL;
10412
10413 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10414 if (ctxt == NULL) return -1;
10415 if (sax != NULL) {
10416 oldsax = ctxt->sax;
10417 ctxt->sax = sax;
10418 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010419 if (user_data != NULL)
10420 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010421
10422 xmlParseDocument(ctxt);
10423
10424 if (ctxt->wellFormed)
10425 ret = 0;
10426 else {
10427 if (ctxt->errNo != 0)
10428 ret = ctxt->errNo;
10429 else
10430 ret = -1;
10431 }
10432 if (sax != NULL) {
10433 ctxt->sax = oldsax;
10434 }
10435 xmlFreeParserCtxt(ctxt);
10436
10437 return ret;
10438}
10439
10440/**
10441 * xmlCreateDocParserCtxt:
10442 * @cur: a pointer to an array of xmlChar
10443 *
10444 * Creates a parser context for an XML in-memory document.
10445 *
10446 * Returns the new parser context or NULL
10447 */
10448xmlParserCtxtPtr
10449xmlCreateDocParserCtxt(xmlChar *cur) {
10450 int len;
10451
10452 if (cur == NULL)
10453 return(NULL);
10454 len = xmlStrlen(cur);
10455 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10456}
10457
10458/**
10459 * xmlSAXParseDoc:
10460 * @sax: the SAX handler block
10461 * @cur: a pointer to an array of xmlChar
10462 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10463 * documents
10464 *
10465 * parse an XML in-memory document and build a tree.
10466 * It use the given SAX function block to handle the parsing callback.
10467 * If sax is NULL, fallback to the default DOM tree building routines.
10468 *
10469 * Returns the resulting document tree
10470 */
10471
10472xmlDocPtr
10473xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10474 xmlDocPtr ret;
10475 xmlParserCtxtPtr ctxt;
10476
10477 if (cur == NULL) return(NULL);
10478
10479
10480 ctxt = xmlCreateDocParserCtxt(cur);
10481 if (ctxt == NULL) return(NULL);
10482 if (sax != NULL) {
10483 ctxt->sax = sax;
10484 ctxt->userData = NULL;
10485 }
10486
10487 xmlParseDocument(ctxt);
10488 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10489 else {
10490 ret = NULL;
10491 xmlFreeDoc(ctxt->myDoc);
10492 ctxt->myDoc = NULL;
10493 }
10494 if (sax != NULL)
10495 ctxt->sax = NULL;
10496 xmlFreeParserCtxt(ctxt);
10497
10498 return(ret);
10499}
10500
10501/**
10502 * xmlParseDoc:
10503 * @cur: a pointer to an array of xmlChar
10504 *
10505 * parse an XML in-memory document and build a tree.
10506 *
10507 * Returns the resulting document tree
10508 */
10509
10510xmlDocPtr
10511xmlParseDoc(xmlChar *cur) {
10512 return(xmlSAXParseDoc(NULL, cur, 0));
10513}
10514
Daniel Veillard8107a222002-01-13 14:10:10 +000010515/************************************************************************
10516 * *
10517 * Specific function to keep track of entities references *
10518 * and used by the XSLT debugger *
10519 * *
10520 ************************************************************************/
10521
10522static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10523
10524/**
10525 * xmlAddEntityReference:
10526 * @ent : A valid entity
10527 * @firstNode : A valid first node for children of entity
10528 * @lastNode : A valid last node of children entity
10529 *
10530 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10531 */
10532static void
10533xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10534 xmlNodePtr lastNode)
10535{
10536 if (xmlEntityRefFunc != NULL) {
10537 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10538 }
10539}
10540
10541
10542/**
10543 * xmlSetEntityReferenceFunc:
10544 * @func : A valid function
10545 *
10546 * Set the function to call call back when a xml reference has been made
10547 */
10548void
10549xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10550{
10551 xmlEntityRefFunc = func;
10552}
Owen Taylor3473f882001-02-23 17:55:21 +000010553
10554/************************************************************************
10555 * *
10556 * Miscellaneous *
10557 * *
10558 ************************************************************************/
10559
10560#ifdef LIBXML_XPATH_ENABLED
10561#include <libxml/xpath.h>
10562#endif
10563
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010564extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010565static int xmlParserInitialized = 0;
10566
10567/**
10568 * xmlInitParser:
10569 *
10570 * Initialization function for the XML parser.
10571 * This is not reentrant. Call once before processing in case of
10572 * use in multithreaded programs.
10573 */
10574
10575void
10576xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010577 if (xmlParserInitialized != 0)
10578 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010579
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010580 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10581 (xmlGenericError == NULL))
10582 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010583 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010584 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010585 xmlInitCharEncodingHandlers();
10586 xmlInitializePredefinedEntities();
10587 xmlDefaultSAXHandlerInit();
10588 xmlRegisterDefaultInputCallbacks();
10589 xmlRegisterDefaultOutputCallbacks();
10590#ifdef LIBXML_HTML_ENABLED
10591 htmlInitAutoClose();
10592 htmlDefaultSAXHandlerInit();
10593#endif
10594#ifdef LIBXML_XPATH_ENABLED
10595 xmlXPathInit();
10596#endif
10597 xmlParserInitialized = 1;
10598}
10599
10600/**
10601 * xmlCleanupParser:
10602 *
10603 * Cleanup function for the XML parser. It tries to reclaim all
10604 * parsing related global memory allocated for the parser processing.
10605 * It doesn't deallocate any document related memory. Calling this
10606 * function should not prevent reusing the parser.
10607 */
10608
10609void
10610xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010611 xmlCleanupCharEncodingHandlers();
10612 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010613#ifdef LIBXML_CATALOG_ENABLED
10614 xmlCatalogCleanup();
10615#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010616 xmlCleanupThreads();
10617 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010618}