blob: 38658f9bbec1fbdfc12b26945c29e05a0f995442 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Owen Taylor3473f882001-02-23 17:55:21 +0000108/************************************************************************
109 * *
110 * Parser stacks related functions and macros *
111 * *
112 ************************************************************************/
113
114xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
115 const xmlChar ** str);
116
117/*
118 * Generic function for accessing stacks in the Parser Context
119 */
120
121#define PUSH_AND_POP(scope, type, name) \
122scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
123 if (ctxt->name##Nr >= ctxt->name##Max) { \
124 ctxt->name##Max *= 2; \
125 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
126 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
127 if (ctxt->name##Tab == NULL) { \
128 xmlGenericError(xmlGenericErrorContext, \
129 "realloc failed !\n"); \
130 return(0); \
131 } \
132 } \
133 ctxt->name##Tab[ctxt->name##Nr] = value; \
134 ctxt->name = value; \
135 return(ctxt->name##Nr++); \
136} \
137scope type name##Pop(xmlParserCtxtPtr ctxt) { \
138 type ret; \
139 if (ctxt->name##Nr <= 0) return(0); \
140 ctxt->name##Nr--; \
141 if (ctxt->name##Nr > 0) \
142 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
143 else \
144 ctxt->name = NULL; \
145 ret = ctxt->name##Tab[ctxt->name##Nr]; \
146 ctxt->name##Tab[ctxt->name##Nr] = 0; \
147 return(ret); \
148} \
149
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000150/**
151 * inputPop:
152 * @ctxt: an XML parser context
153 *
154 * Pops the top parser input from the input stack
155 *
156 * Returns the input just removed
157 */
158/**
159 * inputPush:
160 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 *
163 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000164 *
165 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000166 */
167/**
168 * namePop:
169 * @ctxt: an XML parser context
170 *
171 * Pops the top element name from the name stack
172 *
173 * Returns the name just removed
174 */
175/**
176 * namePush:
177 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 *
180 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000181 *
182 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 */
184/**
185 * nodePop:
186 * @ctxt: an XML parser context
187 *
188 * Pops the top element node from the node stack
189 *
190 * Returns the node just removed
191 */
192/**
193 * nodePush:
194 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000196 *
197 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000198 *
199 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000200 */
Owen Taylor3473f882001-02-23 17:55:21 +0000201/*
202 * Those macros actually generate the functions
203 */
204PUSH_AND_POP(extern, xmlParserInputPtr, input)
205PUSH_AND_POP(extern, xmlNodePtr, node)
206PUSH_AND_POP(extern, xmlChar*, name)
207
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000208static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000209 if (ctxt->spaceNr >= ctxt->spaceMax) {
210 ctxt->spaceMax *= 2;
211 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
212 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
213 if (ctxt->spaceTab == NULL) {
214 xmlGenericError(xmlGenericErrorContext,
215 "realloc failed !\n");
216 return(0);
217 }
218 }
219 ctxt->spaceTab[ctxt->spaceNr] = val;
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
221 return(ctxt->spaceNr++);
222}
223
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000224static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000225 int ret;
226 if (ctxt->spaceNr <= 0) return(0);
227 ctxt->spaceNr--;
228 if (ctxt->spaceNr > 0)
229 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
230 else
231 ctxt->space = NULL;
232 ret = ctxt->spaceTab[ctxt->spaceNr];
233 ctxt->spaceTab[ctxt->spaceNr] = -1;
234 return(ret);
235}
236
237/*
238 * Macros for accessing the content. Those should be used only by the parser,
239 * and not exported.
240 *
241 * Dirty macros, i.e. one often need to make assumption on the context to
242 * use them
243 *
244 * CUR_PTR return the current pointer to the xmlChar to be parsed.
245 * To be used with extreme caution since operations consuming
246 * characters may move the input buffer to a different location !
247 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
248 * This should be used internally by the parser
249 * only to compare to ASCII values otherwise it would break when
250 * running with UTF-8 encoding.
251 * RAW same as CUR but in the input buffer, bypass any token
252 * extraction that may have been done
253 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
254 * to compare on ASCII based substring.
255 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
256 * strings within the parser.
257 *
258 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
259 *
260 * NEXT Skip to the next character, this does the proper decoding
261 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000262 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000263 * CUR_CHAR(l) returns the current unicode character (int), set l
264 * to the number of xmlChars used for the encoding [0-5].
265 * CUR_SCHAR same but operate on a string instead of the context
266 * COPY_BUF copy the current unicode char to the target buffer, increment
267 * the index
268 * GROW, SHRINK handling of input buffers
269 */
270
Daniel Veillardfdc91562002-07-01 21:52:03 +0000271#define RAW (*ctxt->input->cur)
272#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000273#define NXT(val) ctxt->input->cur[(val)]
274#define CUR_PTR ctxt->input->cur
275
276#define SKIP(val) do { \
277 ctxt->nbChars += (val),ctxt->input->cur += (val); \
278 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000279 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
281 xmlPopInput(ctxt); \
282 } while (0)
283
Daniel Veillard46de64e2002-05-29 08:21:33 +0000284#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
285 xmlSHRINK (ctxt);
286
287static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
288 xmlParserInputShrink(ctxt->input);
289 if ((*ctxt->input->cur == 0) &&
290 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
291 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000292 }
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard46de64e2002-05-29 08:21:33 +0000294#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
295 xmlGROW (ctxt);
296
297static void xmlGROW (xmlParserCtxtPtr ctxt) {
298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
299 if ((*ctxt->input->cur == 0) &&
300 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
301 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000302 }
Owen Taylor3473f882001-02-23 17:55:21 +0000303
304#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
305
306#define NEXT xmlNextChar(ctxt)
307
Daniel Veillard21a0f912001-02-25 19:54:14 +0000308#define NEXT1 { \
309 ctxt->input->cur++; \
310 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000311 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000312 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
313 }
314
Owen Taylor3473f882001-02-23 17:55:21 +0000315#define NEXTL(l) do { \
316 if (*(ctxt->input->cur) == '\n') { \
317 ctxt->input->line++; ctxt->input->col = 1; \
318 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000319 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000320 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000321 } while (0)
322
323#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
324#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
325
326#define COPY_BUF(l,b,i,v) \
327 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000328 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000329
330/**
331 * xmlSkipBlankChars:
332 * @ctxt: the XML parser context
333 *
334 * skip all blanks character found at that point in the input streams.
335 * It pops up finished entities in the process if allowable at that point.
336 *
337 * Returns the number of space chars skipped
338 */
339
340int
341xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000342 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000343
344 /*
345 * It's Okay to use CUR/NEXT here since all the blanks are on
346 * the ASCII range.
347 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000348 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
349 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000350 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000351 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000352 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000353 cur = ctxt->input->cur;
354 while (IS_BLANK(*cur)) {
355 if (*cur == '\n') {
356 ctxt->input->line++; ctxt->input->col = 1;
357 }
358 cur++;
359 res++;
360 if (*cur == 0) {
361 ctxt->input->cur = cur;
362 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
363 cur = ctxt->input->cur;
364 }
365 }
366 ctxt->input->cur = cur;
367 } else {
368 int cur;
369 do {
370 cur = CUR;
371 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
372 NEXT;
373 cur = CUR;
374 res++;
375 }
376 while ((cur == 0) && (ctxt->inputNr > 1) &&
377 (ctxt->instate != XML_PARSER_COMMENT)) {
378 xmlPopInput(ctxt);
379 cur = CUR;
380 }
381 /*
382 * Need to handle support of entities branching here
383 */
384 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
385 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
386 }
Owen Taylor3473f882001-02-23 17:55:21 +0000387 return(res);
388}
389
390/************************************************************************
391 * *
392 * Commodity functions to handle entities *
393 * *
394 ************************************************************************/
395
396/**
397 * xmlPopInput:
398 * @ctxt: an XML parser context
399 *
400 * xmlPopInput: the current input pointed by ctxt->input came to an end
401 * pop it and return the next char.
402 *
403 * Returns the current xmlChar in the parser context
404 */
405xmlChar
406xmlPopInput(xmlParserCtxtPtr ctxt) {
407 if (ctxt->inputNr == 1) return(0); /* End of main Input */
408 if (xmlParserDebugEntities)
409 xmlGenericError(xmlGenericErrorContext,
410 "Popping input %d\n", ctxt->inputNr);
411 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000412 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000413 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
414 return(xmlPopInput(ctxt));
415 return(CUR);
416}
417
418/**
419 * xmlPushInput:
420 * @ctxt: an XML parser context
421 * @input: an XML parser input fragment (entity, XML fragment ...).
422 *
423 * xmlPushInput: switch to a new input stream which is stacked on top
424 * of the previous one(s).
425 */
426void
427xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
428 if (input == NULL) return;
429
430 if (xmlParserDebugEntities) {
431 if ((ctxt->input != NULL) && (ctxt->input->filename))
432 xmlGenericError(xmlGenericErrorContext,
433 "%s(%d): ", ctxt->input->filename,
434 ctxt->input->line);
435 xmlGenericError(xmlGenericErrorContext,
436 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
437 }
438 inputPush(ctxt, input);
439 GROW;
440}
441
442/**
443 * xmlParseCharRef:
444 * @ctxt: an XML parser context
445 *
446 * parse Reference declarations
447 *
448 * [66] CharRef ::= '&#' [0-9]+ ';' |
449 * '&#x' [0-9a-fA-F]+ ';'
450 *
451 * [ WFC: Legal Character ]
452 * Characters referred to using character references must match the
453 * production for Char.
454 *
455 * Returns the value parsed (as an int), 0 in case of error
456 */
457int
458xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000459 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000460 int count = 0;
461
Owen Taylor3473f882001-02-23 17:55:21 +0000462 /*
463 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
464 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000465 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000466 (NXT(2) == 'x')) {
467 SKIP(3);
468 GROW;
469 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000470 if (count++ > 20) {
471 count = 0;
472 GROW;
473 }
474 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000475 val = val * 16 + (CUR - '0');
476 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
477 val = val * 16 + (CUR - 'a') + 10;
478 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
479 val = val * 16 + (CUR - 'A') + 10;
480 else {
481 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
483 ctxt->sax->error(ctxt->userData,
484 "xmlParseCharRef: invalid hexadecimal value\n");
485 ctxt->wellFormed = 0;
486 ctxt->disableSAX = 1;
487 val = 0;
488 break;
489 }
490 NEXT;
491 count++;
492 }
493 if (RAW == ';') {
494 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
495 ctxt->nbChars ++;
496 ctxt->input->cur++;
497 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000498 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000499 SKIP(2);
500 GROW;
501 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000502 if (count++ > 20) {
503 count = 0;
504 GROW;
505 }
506 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000507 val = val * 10 + (CUR - '0');
508 else {
509 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
511 ctxt->sax->error(ctxt->userData,
512 "xmlParseCharRef: invalid decimal value\n");
513 ctxt->wellFormed = 0;
514 ctxt->disableSAX = 1;
515 val = 0;
516 break;
517 }
518 NEXT;
519 count++;
520 }
521 if (RAW == ';') {
522 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
523 ctxt->nbChars ++;
524 ctxt->input->cur++;
525 }
526 } else {
527 ctxt->errNo = XML_ERR_INVALID_CHARREF;
528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
529 ctxt->sax->error(ctxt->userData,
530 "xmlParseCharRef: invalid value\n");
531 ctxt->wellFormed = 0;
532 ctxt->disableSAX = 1;
533 }
534
535 /*
536 * [ WFC: Legal Character ]
537 * Characters referred to using character references must match the
538 * production for Char.
539 */
540 if (IS_CHAR(val)) {
541 return(val);
542 } else {
543 ctxt->errNo = XML_ERR_INVALID_CHAR;
544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000545 ctxt->sax->error(ctxt->userData,
546 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000547 val);
548 ctxt->wellFormed = 0;
549 ctxt->disableSAX = 1;
550 }
551 return(0);
552}
553
554/**
555 * xmlParseStringCharRef:
556 * @ctxt: an XML parser context
557 * @str: a pointer to an index in the string
558 *
559 * parse Reference declarations, variant parsing from a string rather
560 * than an an input flow.
561 *
562 * [66] CharRef ::= '&#' [0-9]+ ';' |
563 * '&#x' [0-9a-fA-F]+ ';'
564 *
565 * [ WFC: Legal Character ]
566 * Characters referred to using character references must match the
567 * production for Char.
568 *
569 * Returns the value parsed (as an int), 0 in case of error, str will be
570 * updated to the current value of the index
571 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000572static int
Owen Taylor3473f882001-02-23 17:55:21 +0000573xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
574 const xmlChar *ptr;
575 xmlChar cur;
576 int val = 0;
577
578 if ((str == NULL) || (*str == NULL)) return(0);
579 ptr = *str;
580 cur = *ptr;
581 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
582 ptr += 3;
583 cur = *ptr;
584 while (cur != ';') { /* Non input consuming loop */
585 if ((cur >= '0') && (cur <= '9'))
586 val = val * 16 + (cur - '0');
587 else if ((cur >= 'a') && (cur <= 'f'))
588 val = val * 16 + (cur - 'a') + 10;
589 else if ((cur >= 'A') && (cur <= 'F'))
590 val = val * 16 + (cur - 'A') + 10;
591 else {
592 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
594 ctxt->sax->error(ctxt->userData,
595 "xmlParseStringCharRef: invalid hexadecimal value\n");
596 ctxt->wellFormed = 0;
597 ctxt->disableSAX = 1;
598 val = 0;
599 break;
600 }
601 ptr++;
602 cur = *ptr;
603 }
604 if (cur == ';')
605 ptr++;
606 } else if ((cur == '&') && (ptr[1] == '#')){
607 ptr += 2;
608 cur = *ptr;
609 while (cur != ';') { /* Non input consuming loops */
610 if ((cur >= '0') && (cur <= '9'))
611 val = val * 10 + (cur - '0');
612 else {
613 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
615 ctxt->sax->error(ctxt->userData,
616 "xmlParseStringCharRef: invalid decimal value\n");
617 ctxt->wellFormed = 0;
618 ctxt->disableSAX = 1;
619 val = 0;
620 break;
621 }
622 ptr++;
623 cur = *ptr;
624 }
625 if (cur == ';')
626 ptr++;
627 } else {
628 ctxt->errNo = XML_ERR_INVALID_CHARREF;
629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
630 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000631 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000632 ctxt->wellFormed = 0;
633 ctxt->disableSAX = 1;
634 return(0);
635 }
636 *str = ptr;
637
638 /*
639 * [ WFC: Legal Character ]
640 * Characters referred to using character references must match the
641 * production for Char.
642 */
643 if (IS_CHAR(val)) {
644 return(val);
645 } else {
646 ctxt->errNo = XML_ERR_INVALID_CHAR;
647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
648 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000649 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000650 ctxt->wellFormed = 0;
651 ctxt->disableSAX = 1;
652 }
653 return(0);
654}
655
656/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000657 * xmlNewBlanksWrapperInputStream:
658 * @ctxt: an XML parser context
659 * @entity: an Entity pointer
660 *
661 * Create a new input stream for wrapping
662 * blanks around a PEReference
663 *
664 * Returns the new input stream or NULL
665 */
666
667static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
668
669xmlParserInputPtr
670xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
671 xmlParserInputPtr input;
672 xmlChar *buffer;
673 size_t length;
674 if (entity == NULL) {
675 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
677 ctxt->sax->error(ctxt->userData,
678 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
679 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
680 return(NULL);
681 }
682 if (xmlParserDebugEntities)
683 xmlGenericError(xmlGenericErrorContext,
684 "new blanks wrapper for entity: %s\n", entity->name);
685 input = xmlNewInputStream(ctxt);
686 if (input == NULL) {
687 return(NULL);
688 }
689 length = xmlStrlen(entity->name) + 5;
690 buffer = xmlMalloc(length);
691 if (buffer == NULL) {
692 return(NULL);
693 }
694 buffer [0] = ' ';
695 buffer [1] = '%';
696 buffer [length-3] = ';';
697 buffer [length-2] = ' ';
698 buffer [length-1] = 0;
699 memcpy(buffer + 2, entity->name, length - 5);
700 input->free = deallocblankswrapper;
701 input->base = buffer;
702 input->cur = buffer;
703 input->length = length;
704 input->end = &buffer[length];
705 return(input);
706}
707
708/**
Owen Taylor3473f882001-02-23 17:55:21 +0000709 * xmlParserHandlePEReference:
710 * @ctxt: the parser context
711 *
712 * [69] PEReference ::= '%' Name ';'
713 *
714 * [ WFC: No Recursion ]
715 * A parsed entity must not contain a recursive
716 * reference to itself, either directly or indirectly.
717 *
718 * [ WFC: Entity Declared ]
719 * In a document without any DTD, a document with only an internal DTD
720 * subset which contains no parameter entity references, or a document
721 * with "standalone='yes'", ... ... The declaration of a parameter
722 * entity must precede any reference to it...
723 *
724 * [ VC: Entity Declared ]
725 * In a document with an external subset or external parameter entities
726 * with "standalone='no'", ... ... The declaration of a parameter entity
727 * must precede any reference to it...
728 *
729 * [ WFC: In DTD ]
730 * Parameter-entity references may only appear in the DTD.
731 * NOTE: misleading but this is handled.
732 *
733 * A PEReference may have been detected in the current input stream
734 * the handling is done accordingly to
735 * http://www.w3.org/TR/REC-xml#entproc
736 * i.e.
737 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000738 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000739 */
740void
741xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
742 xmlChar *name;
743 xmlEntityPtr entity = NULL;
744 xmlParserInputPtr input;
745
Owen Taylor3473f882001-02-23 17:55:21 +0000746 if (RAW != '%') return;
747 switch(ctxt->instate) {
748 case XML_PARSER_CDATA_SECTION:
749 return;
750 case XML_PARSER_COMMENT:
751 return;
752 case XML_PARSER_START_TAG:
753 return;
754 case XML_PARSER_END_TAG:
755 return;
756 case XML_PARSER_EOF:
757 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
759 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
760 ctxt->wellFormed = 0;
761 ctxt->disableSAX = 1;
762 return;
763 case XML_PARSER_PROLOG:
764 case XML_PARSER_START:
765 case XML_PARSER_MISC:
766 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
768 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
769 ctxt->wellFormed = 0;
770 ctxt->disableSAX = 1;
771 return;
772 case XML_PARSER_ENTITY_DECL:
773 case XML_PARSER_CONTENT:
774 case XML_PARSER_ATTRIBUTE_VALUE:
775 case XML_PARSER_PI:
776 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000777 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000778 /* we just ignore it there */
779 return;
780 case XML_PARSER_EPILOG:
781 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
783 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
784 ctxt->wellFormed = 0;
785 ctxt->disableSAX = 1;
786 return;
787 case XML_PARSER_ENTITY_VALUE:
788 /*
789 * NOTE: in the case of entity values, we don't do the
790 * substitution here since we need the literal
791 * entity value to be able to save the internal
792 * subset of the document.
793 * This will be handled by xmlStringDecodeEntities
794 */
795 return;
796 case XML_PARSER_DTD:
797 /*
798 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
799 * In the internal DTD subset, parameter-entity references
800 * can occur only where markup declarations can occur, not
801 * within markup declarations.
802 * In that case this is handled in xmlParseMarkupDecl
803 */
804 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
805 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000806 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
807 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000808 break;
809 case XML_PARSER_IGNORE:
810 return;
811 }
812
813 NEXT;
814 name = xmlParseName(ctxt);
815 if (xmlParserDebugEntities)
816 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000817 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000818 if (name == NULL) {
819 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000821 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000822 ctxt->wellFormed = 0;
823 ctxt->disableSAX = 1;
824 } else {
825 if (RAW == ';') {
826 NEXT;
827 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
828 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
829 if (entity == NULL) {
830
831 /*
832 * [ WFC: Entity Declared ]
833 * In a document without any DTD, a document with only an
834 * internal DTD subset which contains no parameter entity
835 * references, or a document with "standalone='yes'", ...
836 * ... The declaration of a parameter entity must precede
837 * any reference to it...
838 */
839 if ((ctxt->standalone == 1) ||
840 ((ctxt->hasExternalSubset == 0) &&
841 (ctxt->hasPErefs == 0))) {
842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
843 ctxt->sax->error(ctxt->userData,
844 "PEReference: %%%s; not found\n", name);
845 ctxt->wellFormed = 0;
846 ctxt->disableSAX = 1;
847 } else {
848 /*
849 * [ VC: Entity Declared ]
850 * In a document with an external subset or external
851 * parameter entities with "standalone='no'", ...
852 * ... The declaration of a parameter entity must precede
853 * any reference to it...
854 */
855 if ((!ctxt->disableSAX) &&
856 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
857 ctxt->vctxt.error(ctxt->vctxt.userData,
858 "PEReference: %%%s; not found\n", name);
859 } else if ((!ctxt->disableSAX) &&
860 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
861 ctxt->sax->warning(ctxt->userData,
862 "PEReference: %%%s; not found\n", name);
863 ctxt->valid = 0;
864 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000865 } else if (ctxt->input->free != deallocblankswrapper) {
866 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
867 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000868 } else {
869 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
870 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000871 xmlChar start[4];
872 xmlCharEncoding enc;
873
Owen Taylor3473f882001-02-23 17:55:21 +0000874 /*
875 * handle the extra spaces added before and after
876 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000877 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000878 */
879 input = xmlNewEntityInputStream(ctxt, entity);
880 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000881
882 /*
883 * Get the 4 first bytes and decode the charset
884 * if enc != XML_CHAR_ENCODING_NONE
885 * plug some encoding conversion routines.
886 */
887 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000888 if (entity->length >= 4) {
889 start[0] = RAW;
890 start[1] = NXT(1);
891 start[2] = NXT(2);
892 start[3] = NXT(3);
893 enc = xmlDetectCharEncoding(start, 4);
894 if (enc != XML_CHAR_ENCODING_NONE) {
895 xmlSwitchEncoding(ctxt, enc);
896 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000897 }
898
Owen Taylor3473f882001-02-23 17:55:21 +0000899 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
900 (RAW == '<') && (NXT(1) == '?') &&
901 (NXT(2) == 'x') && (NXT(3) == 'm') &&
902 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
903 xmlParseTextDecl(ctxt);
904 }
Owen Taylor3473f882001-02-23 17:55:21 +0000905 } else {
906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
907 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000908 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000909 name);
910 ctxt->wellFormed = 0;
911 ctxt->disableSAX = 1;
912 }
913 }
914 } else {
915 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
917 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000918 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000919 ctxt->wellFormed = 0;
920 ctxt->disableSAX = 1;
921 }
922 xmlFree(name);
923 }
924}
925
926/*
927 * Macro used to grow the current buffer.
928 */
929#define growBuffer(buffer) { \
930 buffer##_size *= 2; \
931 buffer = (xmlChar *) \
932 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
933 if (buffer == NULL) { \
934 perror("realloc failed"); \
935 return(NULL); \
936 } \
937}
938
939/**
940 * xmlStringDecodeEntities:
941 * @ctxt: the parser context
942 * @str: the input string
943 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
944 * @end: an end marker xmlChar, 0 if none
945 * @end2: an end marker xmlChar, 0 if none
946 * @end3: an end marker xmlChar, 0 if none
947 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000948 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000949 *
950 * [67] Reference ::= EntityRef | CharRef
951 *
952 * [69] PEReference ::= '%' Name ';'
953 *
954 * Returns A newly allocated string with the substitution done. The caller
955 * must deallocate it !
956 */
957xmlChar *
958xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
959 xmlChar end, xmlChar end2, xmlChar end3) {
960 xmlChar *buffer = NULL;
961 int buffer_size = 0;
962
963 xmlChar *current = NULL;
964 xmlEntityPtr ent;
965 int c,l;
966 int nbchars = 0;
967
968 if (str == NULL)
969 return(NULL);
970
971 if (ctxt->depth > 40) {
972 ctxt->errNo = XML_ERR_ENTITY_LOOP;
973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
974 ctxt->sax->error(ctxt->userData,
975 "Detected entity reference loop\n");
976 ctxt->wellFormed = 0;
977 ctxt->disableSAX = 1;
978 return(NULL);
979 }
980
981 /*
982 * allocate a translation buffer.
983 */
984 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
985 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
986 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000987 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000988 return(NULL);
989 }
990
991 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000992 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000993 * we are operating on already parsed values.
994 */
995 c = CUR_SCHAR(str, l);
996 while ((c != 0) && (c != end) && /* non input consuming loop */
997 (c != end2) && (c != end3)) {
998
999 if (c == 0) break;
1000 if ((c == '&') && (str[1] == '#')) {
1001 int val = xmlParseStringCharRef(ctxt, &str);
1002 if (val != 0) {
1003 COPY_BUF(0,buffer,nbchars,val);
1004 }
1005 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1006 if (xmlParserDebugEntities)
1007 xmlGenericError(xmlGenericErrorContext,
1008 "String decoding Entity Reference: %.30s\n",
1009 str);
1010 ent = xmlParseStringEntityRef(ctxt, &str);
1011 if ((ent != NULL) &&
1012 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1013 if (ent->content != NULL) {
1014 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1015 } else {
1016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1017 ctxt->sax->error(ctxt->userData,
1018 "internal error entity has no content\n");
1019 }
1020 } else if ((ent != NULL) && (ent->content != NULL)) {
1021 xmlChar *rep;
1022
1023 ctxt->depth++;
1024 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1025 0, 0, 0);
1026 ctxt->depth--;
1027 if (rep != NULL) {
1028 current = rep;
1029 while (*current != 0) { /* non input consuming loop */
1030 buffer[nbchars++] = *current++;
1031 if (nbchars >
1032 buffer_size - XML_PARSER_BUFFER_SIZE) {
1033 growBuffer(buffer);
1034 }
1035 }
1036 xmlFree(rep);
1037 }
1038 } else if (ent != NULL) {
1039 int i = xmlStrlen(ent->name);
1040 const xmlChar *cur = ent->name;
1041
1042 buffer[nbchars++] = '&';
1043 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1044 growBuffer(buffer);
1045 }
1046 for (;i > 0;i--)
1047 buffer[nbchars++] = *cur++;
1048 buffer[nbchars++] = ';';
1049 }
1050 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1051 if (xmlParserDebugEntities)
1052 xmlGenericError(xmlGenericErrorContext,
1053 "String decoding PE Reference: %.30s\n", str);
1054 ent = xmlParseStringPEReference(ctxt, &str);
1055 if (ent != NULL) {
1056 xmlChar *rep;
1057
1058 ctxt->depth++;
1059 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1060 0, 0, 0);
1061 ctxt->depth--;
1062 if (rep != NULL) {
1063 current = rep;
1064 while (*current != 0) { /* non input consuming loop */
1065 buffer[nbchars++] = *current++;
1066 if (nbchars >
1067 buffer_size - XML_PARSER_BUFFER_SIZE) {
1068 growBuffer(buffer);
1069 }
1070 }
1071 xmlFree(rep);
1072 }
1073 }
1074 } else {
1075 COPY_BUF(l,buffer,nbchars,c);
1076 str += l;
1077 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1078 growBuffer(buffer);
1079 }
1080 }
1081 c = CUR_SCHAR(str, l);
1082 }
1083 buffer[nbchars++] = 0;
1084 return(buffer);
1085}
1086
1087
1088/************************************************************************
1089 * *
1090 * Commodity functions to handle xmlChars *
1091 * *
1092 ************************************************************************/
1093
1094/**
1095 * xmlStrndup:
1096 * @cur: the input xmlChar *
1097 * @len: the len of @cur
1098 *
1099 * a strndup for array of xmlChar's
1100 *
1101 * Returns a new xmlChar * or NULL
1102 */
1103xmlChar *
1104xmlStrndup(const xmlChar *cur, int len) {
1105 xmlChar *ret;
1106
1107 if ((cur == NULL) || (len < 0)) return(NULL);
1108 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1109 if (ret == NULL) {
1110 xmlGenericError(xmlGenericErrorContext,
1111 "malloc of %ld byte failed\n",
1112 (len + 1) * (long)sizeof(xmlChar));
1113 return(NULL);
1114 }
1115 memcpy(ret, cur, len * sizeof(xmlChar));
1116 ret[len] = 0;
1117 return(ret);
1118}
1119
1120/**
1121 * xmlStrdup:
1122 * @cur: the input xmlChar *
1123 *
1124 * a strdup for array of xmlChar's. Since they are supposed to be
1125 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1126 * a termination mark of '0'.
1127 *
1128 * Returns a new xmlChar * or NULL
1129 */
1130xmlChar *
1131xmlStrdup(const xmlChar *cur) {
1132 const xmlChar *p = cur;
1133
1134 if (cur == NULL) return(NULL);
1135 while (*p != 0) p++; /* non input consuming */
1136 return(xmlStrndup(cur, p - cur));
1137}
1138
1139/**
1140 * xmlCharStrndup:
1141 * @cur: the input char *
1142 * @len: the len of @cur
1143 *
1144 * a strndup for char's to xmlChar's
1145 *
1146 * Returns a new xmlChar * or NULL
1147 */
1148
1149xmlChar *
1150xmlCharStrndup(const char *cur, int len) {
1151 int i;
1152 xmlChar *ret;
1153
1154 if ((cur == NULL) || (len < 0)) return(NULL);
1155 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1156 if (ret == NULL) {
1157 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1158 (len + 1) * (long)sizeof(xmlChar));
1159 return(NULL);
1160 }
1161 for (i = 0;i < len;i++)
1162 ret[i] = (xmlChar) cur[i];
1163 ret[len] = 0;
1164 return(ret);
1165}
1166
1167/**
1168 * xmlCharStrdup:
1169 * @cur: the input char *
1170 * @len: the len of @cur
1171 *
1172 * a strdup for char's to xmlChar's
1173 *
1174 * Returns a new xmlChar * or NULL
1175 */
1176
1177xmlChar *
1178xmlCharStrdup(const char *cur) {
1179 const char *p = cur;
1180
1181 if (cur == NULL) return(NULL);
1182 while (*p != '\0') p++; /* non input consuming */
1183 return(xmlCharStrndup(cur, p - cur));
1184}
1185
1186/**
1187 * xmlStrcmp:
1188 * @str1: the first xmlChar *
1189 * @str2: the second xmlChar *
1190 *
1191 * a strcmp for xmlChar's
1192 *
1193 * Returns the integer result of the comparison
1194 */
1195
1196int
1197xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1198 register int tmp;
1199
1200 if (str1 == str2) return(0);
1201 if (str1 == NULL) return(-1);
1202 if (str2 == NULL) return(1);
1203 do {
1204 tmp = *str1++ - *str2;
1205 if (tmp != 0) return(tmp);
1206 } while (*str2++ != 0);
1207 return 0;
1208}
1209
1210/**
1211 * xmlStrEqual:
1212 * @str1: the first xmlChar *
1213 * @str2: the second xmlChar *
1214 *
1215 * Check if both string are equal of have same content
1216 * Should be a bit more readable and faster than xmlStrEqual()
1217 *
1218 * Returns 1 if they are equal, 0 if they are different
1219 */
1220
1221int
1222xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1223 if (str1 == str2) return(1);
1224 if (str1 == NULL) return(0);
1225 if (str2 == NULL) return(0);
1226 do {
1227 if (*str1++ != *str2) return(0);
1228 } while (*str2++);
1229 return(1);
1230}
1231
1232/**
1233 * xmlStrncmp:
1234 * @str1: the first xmlChar *
1235 * @str2: the second xmlChar *
1236 * @len: the max comparison length
1237 *
1238 * a strncmp for xmlChar's
1239 *
1240 * Returns the integer result of the comparison
1241 */
1242
1243int
1244xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1245 register int tmp;
1246
1247 if (len <= 0) return(0);
1248 if (str1 == str2) return(0);
1249 if (str1 == NULL) return(-1);
1250 if (str2 == NULL) return(1);
1251 do {
1252 tmp = *str1++ - *str2;
1253 if (tmp != 0 || --len == 0) return(tmp);
1254 } while (*str2++ != 0);
1255 return 0;
1256}
1257
Daniel Veillardb44025c2001-10-11 22:55:55 +00001258static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001259 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1260 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1261 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1262 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1263 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1264 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1265 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1266 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1267 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1268 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1269 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1270 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1271 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1272 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1273 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1274 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1275 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1276 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1277 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1278 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1279 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1280 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1281 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1282 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1283 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1284 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1285 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1286 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1287 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1288 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1289 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1290 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1291};
1292
1293/**
1294 * xmlStrcasecmp:
1295 * @str1: the first xmlChar *
1296 * @str2: the second xmlChar *
1297 *
1298 * a strcasecmp for xmlChar's
1299 *
1300 * Returns the integer result of the comparison
1301 */
1302
1303int
1304xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1305 register int tmp;
1306
1307 if (str1 == str2) return(0);
1308 if (str1 == NULL) return(-1);
1309 if (str2 == NULL) return(1);
1310 do {
1311 tmp = casemap[*str1++] - casemap[*str2];
1312 if (tmp != 0) return(tmp);
1313 } while (*str2++ != 0);
1314 return 0;
1315}
1316
1317/**
1318 * xmlStrncasecmp:
1319 * @str1: the first xmlChar *
1320 * @str2: the second xmlChar *
1321 * @len: the max comparison length
1322 *
1323 * a strncasecmp for xmlChar's
1324 *
1325 * Returns the integer result of the comparison
1326 */
1327
1328int
1329xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1330 register int tmp;
1331
1332 if (len <= 0) return(0);
1333 if (str1 == str2) return(0);
1334 if (str1 == NULL) return(-1);
1335 if (str2 == NULL) return(1);
1336 do {
1337 tmp = casemap[*str1++] - casemap[*str2];
1338 if (tmp != 0 || --len == 0) return(tmp);
1339 } while (*str2++ != 0);
1340 return 0;
1341}
1342
1343/**
1344 * xmlStrchr:
1345 * @str: the xmlChar * array
1346 * @val: the xmlChar to search
1347 *
1348 * a strchr for xmlChar's
1349 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001350 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001351 */
1352
1353const xmlChar *
1354xmlStrchr(const xmlChar *str, xmlChar val) {
1355 if (str == NULL) return(NULL);
1356 while (*str != 0) { /* non input consuming */
1357 if (*str == val) return((xmlChar *) str);
1358 str++;
1359 }
1360 return(NULL);
1361}
1362
1363/**
1364 * xmlStrstr:
1365 * @str: the xmlChar * array (haystack)
1366 * @val: the xmlChar to search (needle)
1367 *
1368 * a strstr for xmlChar's
1369 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001370 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001371 */
1372
1373const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001374xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001375 int n;
1376
1377 if (str == NULL) return(NULL);
1378 if (val == NULL) return(NULL);
1379 n = xmlStrlen(val);
1380
1381 if (n == 0) return(str);
1382 while (*str != 0) { /* non input consuming */
1383 if (*str == *val) {
1384 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1385 }
1386 str++;
1387 }
1388 return(NULL);
1389}
1390
1391/**
1392 * xmlStrcasestr:
1393 * @str: the xmlChar * array (haystack)
1394 * @val: the xmlChar to search (needle)
1395 *
1396 * a case-ignoring strstr for xmlChar's
1397 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001398 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001399 */
1400
1401const xmlChar *
1402xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1403 int n;
1404
1405 if (str == NULL) return(NULL);
1406 if (val == NULL) return(NULL);
1407 n = xmlStrlen(val);
1408
1409 if (n == 0) return(str);
1410 while (*str != 0) { /* non input consuming */
1411 if (casemap[*str] == casemap[*val])
1412 if (!xmlStrncasecmp(str, val, n)) return(str);
1413 str++;
1414 }
1415 return(NULL);
1416}
1417
1418/**
1419 * xmlStrsub:
1420 * @str: the xmlChar * array (haystack)
1421 * @start: the index of the first char (zero based)
1422 * @len: the length of the substring
1423 *
1424 * Extract a substring of a given string
1425 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001426 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001427 */
1428
1429xmlChar *
1430xmlStrsub(const xmlChar *str, int start, int len) {
1431 int i;
1432
1433 if (str == NULL) return(NULL);
1434 if (start < 0) return(NULL);
1435 if (len < 0) return(NULL);
1436
1437 for (i = 0;i < start;i++) {
1438 if (*str == 0) return(NULL);
1439 str++;
1440 }
1441 if (*str == 0) return(NULL);
1442 return(xmlStrndup(str, len));
1443}
1444
1445/**
1446 * xmlStrlen:
1447 * @str: the xmlChar * array
1448 *
1449 * length of a xmlChar's string
1450 *
1451 * Returns the number of xmlChar contained in the ARRAY.
1452 */
1453
1454int
1455xmlStrlen(const xmlChar *str) {
1456 int len = 0;
1457
1458 if (str == NULL) return(0);
1459 while (*str != 0) { /* non input consuming */
1460 str++;
1461 len++;
1462 }
1463 return(len);
1464}
1465
1466/**
1467 * xmlStrncat:
1468 * @cur: the original xmlChar * array
1469 * @add: the xmlChar * array added
1470 * @len: the length of @add
1471 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001472 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001473 * first bytes of @add.
1474 *
1475 * Returns a new xmlChar *, the original @cur is reallocated if needed
1476 * and should not be freed
1477 */
1478
1479xmlChar *
1480xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1481 int size;
1482 xmlChar *ret;
1483
1484 if ((add == NULL) || (len == 0))
1485 return(cur);
1486 if (cur == NULL)
1487 return(xmlStrndup(add, len));
1488
1489 size = xmlStrlen(cur);
1490 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1491 if (ret == NULL) {
1492 xmlGenericError(xmlGenericErrorContext,
1493 "xmlStrncat: realloc of %ld byte failed\n",
1494 (size + len + 1) * (long)sizeof(xmlChar));
1495 return(cur);
1496 }
1497 memcpy(&ret[size], add, len * sizeof(xmlChar));
1498 ret[size + len] = 0;
1499 return(ret);
1500}
1501
1502/**
1503 * xmlStrcat:
1504 * @cur: the original xmlChar * array
1505 * @add: the xmlChar * array added
1506 *
1507 * a strcat for array of xmlChar's. Since they are supposed to be
1508 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1509 * a termination mark of '0'.
1510 *
1511 * Returns a new xmlChar * containing the concatenated string.
1512 */
1513xmlChar *
1514xmlStrcat(xmlChar *cur, const xmlChar *add) {
1515 const xmlChar *p = add;
1516
1517 if (add == NULL) return(cur);
1518 if (cur == NULL)
1519 return(xmlStrdup(add));
1520
1521 while (*p != 0) p++; /* non input consuming */
1522 return(xmlStrncat(cur, add, p - add));
1523}
1524
1525/************************************************************************
1526 * *
1527 * Commodity functions, cleanup needed ? *
1528 * *
1529 ************************************************************************/
1530
1531/**
1532 * areBlanks:
1533 * @ctxt: an XML parser context
1534 * @str: a xmlChar *
1535 * @len: the size of @str
1536 *
1537 * Is this a sequence of blank chars that one can ignore ?
1538 *
1539 * Returns 1 if ignorable 0 otherwise.
1540 */
1541
1542static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1543 int i, ret;
1544 xmlNodePtr lastChild;
1545
Daniel Veillard05c13a22001-09-09 08:38:09 +00001546 /*
1547 * Don't spend time trying to differentiate them, the same callback is
1548 * used !
1549 */
1550 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001551 return(0);
1552
Owen Taylor3473f882001-02-23 17:55:21 +00001553 /*
1554 * Check for xml:space value.
1555 */
1556 if (*(ctxt->space) == 1)
1557 return(0);
1558
1559 /*
1560 * Check that the string is made of blanks
1561 */
1562 for (i = 0;i < len;i++)
1563 if (!(IS_BLANK(str[i]))) return(0);
1564
1565 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001566 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001567 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001568 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001569 if (ctxt->myDoc != NULL) {
1570 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1571 if (ret == 0) return(1);
1572 if (ret == 1) return(0);
1573 }
1574
1575 /*
1576 * Otherwise, heuristic :-\
1577 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001578 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001579 if ((ctxt->node->children == NULL) &&
1580 (RAW == '<') && (NXT(1) == '/')) return(0);
1581
1582 lastChild = xmlGetLastChild(ctxt->node);
1583 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001584 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1585 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001586 } else if (xmlNodeIsText(lastChild))
1587 return(0);
1588 else if ((ctxt->node->children != NULL) &&
1589 (xmlNodeIsText(ctxt->node->children)))
1590 return(0);
1591 return(1);
1592}
1593
Owen Taylor3473f882001-02-23 17:55:21 +00001594/************************************************************************
1595 * *
1596 * Extra stuff for namespace support *
1597 * Relates to http://www.w3.org/TR/WD-xml-names *
1598 * *
1599 ************************************************************************/
1600
1601/**
1602 * xmlSplitQName:
1603 * @ctxt: an XML parser context
1604 * @name: an XML parser context
1605 * @prefix: a xmlChar **
1606 *
1607 * parse an UTF8 encoded XML qualified name string
1608 *
1609 * [NS 5] QName ::= (Prefix ':')? LocalPart
1610 *
1611 * [NS 6] Prefix ::= NCName
1612 *
1613 * [NS 7] LocalPart ::= NCName
1614 *
1615 * Returns the local part, and prefix is updated
1616 * to get the Prefix if any.
1617 */
1618
1619xmlChar *
1620xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1621 xmlChar buf[XML_MAX_NAMELEN + 5];
1622 xmlChar *buffer = NULL;
1623 int len = 0;
1624 int max = XML_MAX_NAMELEN;
1625 xmlChar *ret = NULL;
1626 const xmlChar *cur = name;
1627 int c;
1628
1629 *prefix = NULL;
1630
1631#ifndef XML_XML_NAMESPACE
1632 /* xml: prefix is not really a namespace */
1633 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1634 (cur[2] == 'l') && (cur[3] == ':'))
1635 return(xmlStrdup(name));
1636#endif
1637
1638 /* nasty but valid */
1639 if (cur[0] == ':')
1640 return(xmlStrdup(name));
1641
1642 c = *cur++;
1643 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1644 buf[len++] = c;
1645 c = *cur++;
1646 }
1647 if (len >= max) {
1648 /*
1649 * Okay someone managed to make a huge name, so he's ready to pay
1650 * for the processing speed.
1651 */
1652 max = len * 2;
1653
1654 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1655 if (buffer == NULL) {
1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1657 ctxt->sax->error(ctxt->userData,
1658 "xmlSplitQName: out of memory\n");
1659 return(NULL);
1660 }
1661 memcpy(buffer, buf, len);
1662 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1663 if (len + 10 > max) {
1664 max *= 2;
1665 buffer = (xmlChar *) xmlRealloc(buffer,
1666 max * sizeof(xmlChar));
1667 if (buffer == NULL) {
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "xmlSplitQName: out of memory\n");
1671 return(NULL);
1672 }
1673 }
1674 buffer[len++] = c;
1675 c = *cur++;
1676 }
1677 buffer[len] = 0;
1678 }
1679
1680 if (buffer == NULL)
1681 ret = xmlStrndup(buf, len);
1682 else {
1683 ret = buffer;
1684 buffer = NULL;
1685 max = XML_MAX_NAMELEN;
1686 }
1687
1688
1689 if (c == ':') {
1690 c = *cur++;
1691 if (c == 0) return(ret);
1692 *prefix = ret;
1693 len = 0;
1694
1695 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1696 buf[len++] = c;
1697 c = *cur++;
1698 }
1699 if (len >= max) {
1700 /*
1701 * Okay someone managed to make a huge name, so he's ready to pay
1702 * for the processing speed.
1703 */
1704 max = len * 2;
1705
1706 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1707 if (buffer == NULL) {
1708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1709 ctxt->sax->error(ctxt->userData,
1710 "xmlSplitQName: out of memory\n");
1711 return(NULL);
1712 }
1713 memcpy(buffer, buf, len);
1714 while (c != 0) { /* tested bigname2.xml */
1715 if (len + 10 > max) {
1716 max *= 2;
1717 buffer = (xmlChar *) xmlRealloc(buffer,
1718 max * sizeof(xmlChar));
1719 if (buffer == NULL) {
1720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1721 ctxt->sax->error(ctxt->userData,
1722 "xmlSplitQName: out of memory\n");
1723 return(NULL);
1724 }
1725 }
1726 buffer[len++] = c;
1727 c = *cur++;
1728 }
1729 buffer[len] = 0;
1730 }
1731
1732 if (buffer == NULL)
1733 ret = xmlStrndup(buf, len);
1734 else {
1735 ret = buffer;
1736 }
1737 }
1738
1739 return(ret);
1740}
1741
1742/************************************************************************
1743 * *
1744 * The parser itself *
1745 * Relates to http://www.w3.org/TR/REC-xml *
1746 * *
1747 ************************************************************************/
1748
Daniel Veillard76d66f42001-05-16 21:05:17 +00001749static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001750/**
1751 * xmlParseName:
1752 * @ctxt: an XML parser context
1753 *
1754 * parse an XML name.
1755 *
1756 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1757 * CombiningChar | Extender
1758 *
1759 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1760 *
1761 * [6] Names ::= Name (S Name)*
1762 *
1763 * Returns the Name parsed or NULL
1764 */
1765
1766xmlChar *
1767xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001768 const xmlChar *in;
1769 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001770 int count = 0;
1771
1772 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001773
1774 /*
1775 * Accelerator for simple ASCII names
1776 */
1777 in = ctxt->input->cur;
1778 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1779 ((*in >= 0x41) && (*in <= 0x5A)) ||
1780 (*in == '_') || (*in == ':')) {
1781 in++;
1782 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1783 ((*in >= 0x41) && (*in <= 0x5A)) ||
1784 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001785 (*in == '_') || (*in == '-') ||
1786 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001787 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001788 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001789 count = in - ctxt->input->cur;
1790 ret = xmlStrndup(ctxt->input->cur, count);
1791 ctxt->input->cur = in;
1792 return(ret);
1793 }
1794 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001795 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001796}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001797
Daniel Veillard46de64e2002-05-29 08:21:33 +00001798/**
1799 * xmlParseNameAndCompare:
1800 * @ctxt: an XML parser context
1801 *
1802 * parse an XML name and compares for match
1803 * (specialized for endtag parsing)
1804 *
1805 *
1806 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1807 * and the name for mismatch
1808 */
1809
1810xmlChar *
1811xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1812 const xmlChar *cmp = other;
1813 const xmlChar *in;
1814 xmlChar *ret;
1815 int count = 0;
1816
1817 GROW;
1818
1819 in = ctxt->input->cur;
1820 while (*in != 0 && *in == *cmp) {
1821 ++in;
1822 ++cmp;
1823 }
1824 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1825 /* success */
1826 ctxt->input->cur = in;
1827 return (xmlChar*) 1;
1828 }
1829 /* failure (or end of input buffer), check with full function */
1830 ret = xmlParseName (ctxt);
1831 if (ret != 0 && xmlStrEqual (ret, other)) {
1832 xmlFree (ret);
1833 return (xmlChar*) 1;
1834 }
1835 return ret;
1836}
1837
Daniel Veillard76d66f42001-05-16 21:05:17 +00001838static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001839xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1840 xmlChar buf[XML_MAX_NAMELEN + 5];
1841 int len = 0, l;
1842 int c;
1843 int count = 0;
1844
1845 /*
1846 * Handler for more complex cases
1847 */
1848 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001849 c = CUR_CHAR(l);
1850 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1851 (!IS_LETTER(c) && (c != '_') &&
1852 (c != ':'))) {
1853 return(NULL);
1854 }
1855
1856 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1857 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1858 (c == '.') || (c == '-') ||
1859 (c == '_') || (c == ':') ||
1860 (IS_COMBINING(c)) ||
1861 (IS_EXTENDER(c)))) {
1862 if (count++ > 100) {
1863 count = 0;
1864 GROW;
1865 }
1866 COPY_BUF(l,buf,len,c);
1867 NEXTL(l);
1868 c = CUR_CHAR(l);
1869 if (len >= XML_MAX_NAMELEN) {
1870 /*
1871 * Okay someone managed to make a huge name, so he's ready to pay
1872 * for the processing speed.
1873 */
1874 xmlChar *buffer;
1875 int max = len * 2;
1876
1877 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1878 if (buffer == NULL) {
1879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1880 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001881 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001882 return(NULL);
1883 }
1884 memcpy(buffer, buf, len);
1885 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1886 (c == '.') || (c == '-') ||
1887 (c == '_') || (c == ':') ||
1888 (IS_COMBINING(c)) ||
1889 (IS_EXTENDER(c))) {
1890 if (count++ > 100) {
1891 count = 0;
1892 GROW;
1893 }
1894 if (len + 10 > max) {
1895 max *= 2;
1896 buffer = (xmlChar *) xmlRealloc(buffer,
1897 max * sizeof(xmlChar));
1898 if (buffer == NULL) {
1899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1900 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001901 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001902 return(NULL);
1903 }
1904 }
1905 COPY_BUF(l,buffer,len,c);
1906 NEXTL(l);
1907 c = CUR_CHAR(l);
1908 }
1909 buffer[len] = 0;
1910 return(buffer);
1911 }
1912 }
1913 return(xmlStrndup(buf, len));
1914}
1915
1916/**
1917 * xmlParseStringName:
1918 * @ctxt: an XML parser context
1919 * @str: a pointer to the string pointer (IN/OUT)
1920 *
1921 * parse an XML name.
1922 *
1923 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1924 * CombiningChar | Extender
1925 *
1926 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1927 *
1928 * [6] Names ::= Name (S Name)*
1929 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001930 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001931 * is updated to the current location in the string.
1932 */
1933
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001934static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001935xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1936 xmlChar buf[XML_MAX_NAMELEN + 5];
1937 const xmlChar *cur = *str;
1938 int len = 0, l;
1939 int c;
1940
1941 c = CUR_SCHAR(cur, l);
1942 if (!IS_LETTER(c) && (c != '_') &&
1943 (c != ':')) {
1944 return(NULL);
1945 }
1946
1947 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1948 (c == '.') || (c == '-') ||
1949 (c == '_') || (c == ':') ||
1950 (IS_COMBINING(c)) ||
1951 (IS_EXTENDER(c))) {
1952 COPY_BUF(l,buf,len,c);
1953 cur += l;
1954 c = CUR_SCHAR(cur, l);
1955 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1956 /*
1957 * Okay someone managed to make a huge name, so he's ready to pay
1958 * for the processing speed.
1959 */
1960 xmlChar *buffer;
1961 int max = len * 2;
1962
1963 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1964 if (buffer == NULL) {
1965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1966 ctxt->sax->error(ctxt->userData,
1967 "xmlParseStringName: out of memory\n");
1968 return(NULL);
1969 }
1970 memcpy(buffer, buf, len);
1971 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1972 (c == '.') || (c == '-') ||
1973 (c == '_') || (c == ':') ||
1974 (IS_COMBINING(c)) ||
1975 (IS_EXTENDER(c))) {
1976 if (len + 10 > max) {
1977 max *= 2;
1978 buffer = (xmlChar *) xmlRealloc(buffer,
1979 max * sizeof(xmlChar));
1980 if (buffer == NULL) {
1981 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1982 ctxt->sax->error(ctxt->userData,
1983 "xmlParseStringName: out of memory\n");
1984 return(NULL);
1985 }
1986 }
1987 COPY_BUF(l,buffer,len,c);
1988 cur += l;
1989 c = CUR_SCHAR(cur, l);
1990 }
1991 buffer[len] = 0;
1992 *str = cur;
1993 return(buffer);
1994 }
1995 }
1996 *str = cur;
1997 return(xmlStrndup(buf, len));
1998}
1999
2000/**
2001 * xmlParseNmtoken:
2002 * @ctxt: an XML parser context
2003 *
2004 * parse an XML Nmtoken.
2005 *
2006 * [7] Nmtoken ::= (NameChar)+
2007 *
2008 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2009 *
2010 * Returns the Nmtoken parsed or NULL
2011 */
2012
2013xmlChar *
2014xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2015 xmlChar buf[XML_MAX_NAMELEN + 5];
2016 int len = 0, l;
2017 int c;
2018 int count = 0;
2019
2020 GROW;
2021 c = CUR_CHAR(l);
2022
2023 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2024 (c == '.') || (c == '-') ||
2025 (c == '_') || (c == ':') ||
2026 (IS_COMBINING(c)) ||
2027 (IS_EXTENDER(c))) {
2028 if (count++ > 100) {
2029 count = 0;
2030 GROW;
2031 }
2032 COPY_BUF(l,buf,len,c);
2033 NEXTL(l);
2034 c = CUR_CHAR(l);
2035 if (len >= XML_MAX_NAMELEN) {
2036 /*
2037 * Okay someone managed to make a huge token, so he's ready to pay
2038 * for the processing speed.
2039 */
2040 xmlChar *buffer;
2041 int max = len * 2;
2042
2043 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2044 if (buffer == NULL) {
2045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2046 ctxt->sax->error(ctxt->userData,
2047 "xmlParseNmtoken: out of memory\n");
2048 return(NULL);
2049 }
2050 memcpy(buffer, buf, len);
2051 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2052 (c == '.') || (c == '-') ||
2053 (c == '_') || (c == ':') ||
2054 (IS_COMBINING(c)) ||
2055 (IS_EXTENDER(c))) {
2056 if (count++ > 100) {
2057 count = 0;
2058 GROW;
2059 }
2060 if (len + 10 > max) {
2061 max *= 2;
2062 buffer = (xmlChar *) xmlRealloc(buffer,
2063 max * sizeof(xmlChar));
2064 if (buffer == NULL) {
2065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2066 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002067 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002068 return(NULL);
2069 }
2070 }
2071 COPY_BUF(l,buffer,len,c);
2072 NEXTL(l);
2073 c = CUR_CHAR(l);
2074 }
2075 buffer[len] = 0;
2076 return(buffer);
2077 }
2078 }
2079 if (len == 0)
2080 return(NULL);
2081 return(xmlStrndup(buf, len));
2082}
2083
2084/**
2085 * xmlParseEntityValue:
2086 * @ctxt: an XML parser context
2087 * @orig: if non-NULL store a copy of the original entity value
2088 *
2089 * parse a value for ENTITY declarations
2090 *
2091 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2092 * "'" ([^%&'] | PEReference | Reference)* "'"
2093 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002094 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002095 */
2096
2097xmlChar *
2098xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2099 xmlChar *buf = NULL;
2100 int len = 0;
2101 int size = XML_PARSER_BUFFER_SIZE;
2102 int c, l;
2103 xmlChar stop;
2104 xmlChar *ret = NULL;
2105 const xmlChar *cur = NULL;
2106 xmlParserInputPtr input;
2107
2108 if (RAW == '"') stop = '"';
2109 else if (RAW == '\'') stop = '\'';
2110 else {
2111 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2112 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2113 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2114 ctxt->wellFormed = 0;
2115 ctxt->disableSAX = 1;
2116 return(NULL);
2117 }
2118 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2119 if (buf == NULL) {
2120 xmlGenericError(xmlGenericErrorContext,
2121 "malloc of %d byte failed\n", size);
2122 return(NULL);
2123 }
2124
2125 /*
2126 * The content of the entity definition is copied in a buffer.
2127 */
2128
2129 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2130 input = ctxt->input;
2131 GROW;
2132 NEXT;
2133 c = CUR_CHAR(l);
2134 /*
2135 * NOTE: 4.4.5 Included in Literal
2136 * When a parameter entity reference appears in a literal entity
2137 * value, ... a single or double quote character in the replacement
2138 * text is always treated as a normal data character and will not
2139 * terminate the literal.
2140 * In practice it means we stop the loop only when back at parsing
2141 * the initial entity and the quote is found
2142 */
2143 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2144 (ctxt->input != input))) {
2145 if (len + 5 >= size) {
2146 size *= 2;
2147 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2148 if (buf == NULL) {
2149 xmlGenericError(xmlGenericErrorContext,
2150 "realloc of %d byte failed\n", size);
2151 return(NULL);
2152 }
2153 }
2154 COPY_BUF(l,buf,len,c);
2155 NEXTL(l);
2156 /*
2157 * Pop-up of finished entities.
2158 */
2159 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2160 xmlPopInput(ctxt);
2161
2162 GROW;
2163 c = CUR_CHAR(l);
2164 if (c == 0) {
2165 GROW;
2166 c = CUR_CHAR(l);
2167 }
2168 }
2169 buf[len] = 0;
2170
2171 /*
2172 * Raise problem w.r.t. '&' and '%' being used in non-entities
2173 * reference constructs. Note Charref will be handled in
2174 * xmlStringDecodeEntities()
2175 */
2176 cur = buf;
2177 while (*cur != 0) { /* non input consuming */
2178 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2179 xmlChar *name;
2180 xmlChar tmp = *cur;
2181
2182 cur++;
2183 name = xmlParseStringName(ctxt, &cur);
2184 if ((name == NULL) || (*cur != ';')) {
2185 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2187 ctxt->sax->error(ctxt->userData,
2188 "EntityValue: '%c' forbidden except for entities references\n",
2189 tmp);
2190 ctxt->wellFormed = 0;
2191 ctxt->disableSAX = 1;
2192 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002193 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2194 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002195 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2197 ctxt->sax->error(ctxt->userData,
2198 "EntityValue: PEReferences forbidden in internal subset\n",
2199 tmp);
2200 ctxt->wellFormed = 0;
2201 ctxt->disableSAX = 1;
2202 }
2203 if (name != NULL)
2204 xmlFree(name);
2205 }
2206 cur++;
2207 }
2208
2209 /*
2210 * Then PEReference entities are substituted.
2211 */
2212 if (c != stop) {
2213 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2215 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2216 ctxt->wellFormed = 0;
2217 ctxt->disableSAX = 1;
2218 xmlFree(buf);
2219 } else {
2220 NEXT;
2221 /*
2222 * NOTE: 4.4.7 Bypassed
2223 * When a general entity reference appears in the EntityValue in
2224 * an entity declaration, it is bypassed and left as is.
2225 * so XML_SUBSTITUTE_REF is not set here.
2226 */
2227 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2228 0, 0, 0);
2229 if (orig != NULL)
2230 *orig = buf;
2231 else
2232 xmlFree(buf);
2233 }
2234
2235 return(ret);
2236}
2237
2238/**
2239 * xmlParseAttValue:
2240 * @ctxt: an XML parser context
2241 *
2242 * parse a value for an attribute
2243 * Note: the parser won't do substitution of entities here, this
2244 * will be handled later in xmlStringGetNodeList
2245 *
2246 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2247 * "'" ([^<&'] | Reference)* "'"
2248 *
2249 * 3.3.3 Attribute-Value Normalization:
2250 * Before the value of an attribute is passed to the application or
2251 * checked for validity, the XML processor must normalize it as follows:
2252 * - a character reference is processed by appending the referenced
2253 * character to the attribute value
2254 * - an entity reference is processed by recursively processing the
2255 * replacement text of the entity
2256 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2257 * appending #x20 to the normalized value, except that only a single
2258 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2259 * parsed entity or the literal entity value of an internal parsed entity
2260 * - other characters are processed by appending them to the normalized value
2261 * If the declared value is not CDATA, then the XML processor must further
2262 * process the normalized attribute value by discarding any leading and
2263 * trailing space (#x20) characters, and by replacing sequences of space
2264 * (#x20) characters by a single space (#x20) character.
2265 * All attributes for which no declaration has been read should be treated
2266 * by a non-validating parser as if declared CDATA.
2267 *
2268 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2269 */
2270
2271xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002272xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2273
2274xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002275xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2276 xmlChar limit = 0;
2277 xmlChar *buf = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002278 xmlChar *in = NULL;
2279 xmlChar *ret = NULL;
2280 SHRINK;
2281 GROW;
2282 in = CUR_PTR;
2283 if (*in != '"' && *in != '\'') {
2284 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2286 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2287 ctxt->wellFormed = 0;
2288 ctxt->disableSAX = 1;
2289 return(NULL);
2290 }
2291 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2292 limit = *in;
2293 ++in;
2294
2295 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2296 *in != '&' && *in != '<'
2297 ) {
2298 ++in;
2299 }
2300 if (*in != limit) {
2301 return xmlParseAttValueComplex(ctxt);
2302 }
2303 ++in;
2304 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2305 CUR_PTR = in;
2306 return ret;
2307}
2308
2309xmlChar *
2310xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2311 xmlChar limit = 0;
2312 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002313 int len = 0;
2314 int buf_size = 0;
2315 int c, l;
2316 xmlChar *current = NULL;
2317 xmlEntityPtr ent;
2318
2319
2320 SHRINK;
2321 if (NXT(0) == '"') {
2322 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2323 limit = '"';
2324 NEXT;
2325 } else if (NXT(0) == '\'') {
2326 limit = '\'';
2327 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2328 NEXT;
2329 } else {
2330 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2332 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2333 ctxt->wellFormed = 0;
2334 ctxt->disableSAX = 1;
2335 return(NULL);
2336 }
2337
2338 /*
2339 * allocate a translation buffer.
2340 */
2341 buf_size = XML_PARSER_BUFFER_SIZE;
2342 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2343 if (buf == NULL) {
2344 perror("xmlParseAttValue: malloc failed");
2345 return(NULL);
2346 }
2347
2348 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002349 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002350 */
2351 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002352 while ((NXT(0) != limit) && /* checked */
2353 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002354 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002355 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002356 if (NXT(1) == '#') {
2357 int val = xmlParseCharRef(ctxt);
2358 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002359 if (ctxt->replaceEntities) {
2360 if (len > buf_size - 10) {
2361 growBuffer(buf);
2362 }
2363 buf[len++] = '&';
2364 } else {
2365 /*
2366 * The reparsing will be done in xmlStringGetNodeList()
2367 * called by the attribute() function in SAX.c
2368 */
2369 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002370
Daniel Veillard319a7422001-09-11 09:27:09 +00002371 if (len > buf_size - 10) {
2372 growBuffer(buf);
2373 }
2374 current = &buffer[0];
2375 while (*current != 0) { /* non input consuming */
2376 buf[len++] = *current++;
2377 }
Owen Taylor3473f882001-02-23 17:55:21 +00002378 }
2379 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002380 if (len > buf_size - 10) {
2381 growBuffer(buf);
2382 }
Owen Taylor3473f882001-02-23 17:55:21 +00002383 len += xmlCopyChar(0, &buf[len], val);
2384 }
2385 } else {
2386 ent = xmlParseEntityRef(ctxt);
2387 if ((ent != NULL) &&
2388 (ctxt->replaceEntities != 0)) {
2389 xmlChar *rep;
2390
2391 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2392 rep = xmlStringDecodeEntities(ctxt, ent->content,
2393 XML_SUBSTITUTE_REF, 0, 0, 0);
2394 if (rep != NULL) {
2395 current = rep;
2396 while (*current != 0) { /* non input consuming */
2397 buf[len++] = *current++;
2398 if (len > buf_size - 10) {
2399 growBuffer(buf);
2400 }
2401 }
2402 xmlFree(rep);
2403 }
2404 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002405 if (len > buf_size - 10) {
2406 growBuffer(buf);
2407 }
Owen Taylor3473f882001-02-23 17:55:21 +00002408 if (ent->content != NULL)
2409 buf[len++] = ent->content[0];
2410 }
2411 } else if (ent != NULL) {
2412 int i = xmlStrlen(ent->name);
2413 const xmlChar *cur = ent->name;
2414
2415 /*
2416 * This may look absurd but is needed to detect
2417 * entities problems
2418 */
2419 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2420 (ent->content != NULL)) {
2421 xmlChar *rep;
2422 rep = xmlStringDecodeEntities(ctxt, ent->content,
2423 XML_SUBSTITUTE_REF, 0, 0, 0);
2424 if (rep != NULL)
2425 xmlFree(rep);
2426 }
2427
2428 /*
2429 * Just output the reference
2430 */
2431 buf[len++] = '&';
2432 if (len > buf_size - i - 10) {
2433 growBuffer(buf);
2434 }
2435 for (;i > 0;i--)
2436 buf[len++] = *cur++;
2437 buf[len++] = ';';
2438 }
2439 }
2440 } else {
2441 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2442 COPY_BUF(l,buf,len,0x20);
2443 if (len > buf_size - 10) {
2444 growBuffer(buf);
2445 }
2446 } else {
2447 COPY_BUF(l,buf,len,c);
2448 if (len > buf_size - 10) {
2449 growBuffer(buf);
2450 }
2451 }
2452 NEXTL(l);
2453 }
2454 GROW;
2455 c = CUR_CHAR(l);
2456 }
2457 buf[len++] = 0;
2458 if (RAW == '<') {
2459 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2461 ctxt->sax->error(ctxt->userData,
2462 "Unescaped '<' not allowed in attributes values\n");
2463 ctxt->wellFormed = 0;
2464 ctxt->disableSAX = 1;
2465 } else if (RAW != limit) {
2466 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2468 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2469 ctxt->wellFormed = 0;
2470 ctxt->disableSAX = 1;
2471 } else
2472 NEXT;
2473 return(buf);
2474}
2475
2476/**
2477 * xmlParseSystemLiteral:
2478 * @ctxt: an XML parser context
2479 *
2480 * parse an XML Literal
2481 *
2482 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2483 *
2484 * Returns the SystemLiteral parsed or NULL
2485 */
2486
2487xmlChar *
2488xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2489 xmlChar *buf = NULL;
2490 int len = 0;
2491 int size = XML_PARSER_BUFFER_SIZE;
2492 int cur, l;
2493 xmlChar stop;
2494 int state = ctxt->instate;
2495 int count = 0;
2496
2497 SHRINK;
2498 if (RAW == '"') {
2499 NEXT;
2500 stop = '"';
2501 } else if (RAW == '\'') {
2502 NEXT;
2503 stop = '\'';
2504 } else {
2505 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2507 ctxt->sax->error(ctxt->userData,
2508 "SystemLiteral \" or ' expected\n");
2509 ctxt->wellFormed = 0;
2510 ctxt->disableSAX = 1;
2511 return(NULL);
2512 }
2513
2514 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2515 if (buf == NULL) {
2516 xmlGenericError(xmlGenericErrorContext,
2517 "malloc of %d byte failed\n", size);
2518 return(NULL);
2519 }
2520 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2521 cur = CUR_CHAR(l);
2522 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2523 if (len + 5 >= size) {
2524 size *= 2;
2525 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2526 if (buf == NULL) {
2527 xmlGenericError(xmlGenericErrorContext,
2528 "realloc of %d byte failed\n", size);
2529 ctxt->instate = (xmlParserInputState) state;
2530 return(NULL);
2531 }
2532 }
2533 count++;
2534 if (count > 50) {
2535 GROW;
2536 count = 0;
2537 }
2538 COPY_BUF(l,buf,len,cur);
2539 NEXTL(l);
2540 cur = CUR_CHAR(l);
2541 if (cur == 0) {
2542 GROW;
2543 SHRINK;
2544 cur = CUR_CHAR(l);
2545 }
2546 }
2547 buf[len] = 0;
2548 ctxt->instate = (xmlParserInputState) state;
2549 if (!IS_CHAR(cur)) {
2550 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2552 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2553 ctxt->wellFormed = 0;
2554 ctxt->disableSAX = 1;
2555 } else {
2556 NEXT;
2557 }
2558 return(buf);
2559}
2560
2561/**
2562 * xmlParsePubidLiteral:
2563 * @ctxt: an XML parser context
2564 *
2565 * parse an XML public literal
2566 *
2567 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2568 *
2569 * Returns the PubidLiteral parsed or NULL.
2570 */
2571
2572xmlChar *
2573xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2574 xmlChar *buf = NULL;
2575 int len = 0;
2576 int size = XML_PARSER_BUFFER_SIZE;
2577 xmlChar cur;
2578 xmlChar stop;
2579 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002580 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002581
2582 SHRINK;
2583 if (RAW == '"') {
2584 NEXT;
2585 stop = '"';
2586 } else if (RAW == '\'') {
2587 NEXT;
2588 stop = '\'';
2589 } else {
2590 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2592 ctxt->sax->error(ctxt->userData,
2593 "SystemLiteral \" or ' expected\n");
2594 ctxt->wellFormed = 0;
2595 ctxt->disableSAX = 1;
2596 return(NULL);
2597 }
2598 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2599 if (buf == NULL) {
2600 xmlGenericError(xmlGenericErrorContext,
2601 "malloc of %d byte failed\n", size);
2602 return(NULL);
2603 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002604 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002605 cur = CUR;
2606 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2607 if (len + 1 >= size) {
2608 size *= 2;
2609 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2610 if (buf == NULL) {
2611 xmlGenericError(xmlGenericErrorContext,
2612 "realloc of %d byte failed\n", size);
2613 return(NULL);
2614 }
2615 }
2616 buf[len++] = cur;
2617 count++;
2618 if (count > 50) {
2619 GROW;
2620 count = 0;
2621 }
2622 NEXT;
2623 cur = CUR;
2624 if (cur == 0) {
2625 GROW;
2626 SHRINK;
2627 cur = CUR;
2628 }
2629 }
2630 buf[len] = 0;
2631 if (cur != stop) {
2632 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2634 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2635 ctxt->wellFormed = 0;
2636 ctxt->disableSAX = 1;
2637 } else {
2638 NEXT;
2639 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002640 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002641 return(buf);
2642}
2643
Daniel Veillard48b2f892001-02-25 16:11:03 +00002644void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002645/**
2646 * xmlParseCharData:
2647 * @ctxt: an XML parser context
2648 * @cdata: int indicating whether we are within a CDATA section
2649 *
2650 * parse a CharData section.
2651 * if we are within a CDATA section ']]>' marks an end of section.
2652 *
2653 * The right angle bracket (>) may be represented using the string "&gt;",
2654 * and must, for compatibility, be escaped using "&gt;" or a character
2655 * reference when it appears in the string "]]>" in content, when that
2656 * string is not marking the end of a CDATA section.
2657 *
2658 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2659 */
2660
2661void
2662xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002663 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002664 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002665 int line = ctxt->input->line;
2666 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002667
2668 SHRINK;
2669 GROW;
2670 /*
2671 * Accelerated common case where input don't need to be
2672 * modified before passing it to the handler.
2673 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002674 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002675 in = ctxt->input->cur;
2676 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002677get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002678 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2679 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002680 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002681 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002682 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002683 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002684 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002685 ctxt->input->line++;
2686 in++;
2687 }
2688 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002689 }
2690 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002691 if ((in[1] == ']') && (in[2] == '>')) {
2692 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2694 ctxt->sax->error(ctxt->userData,
2695 "Sequence ']]>' not allowed in content\n");
2696 ctxt->input->cur = in;
2697 ctxt->wellFormed = 0;
2698 ctxt->disableSAX = 1;
2699 return;
2700 }
2701 in++;
2702 goto get_more;
2703 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002704 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002705 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002706 if (IS_BLANK(*ctxt->input->cur)) {
2707 const xmlChar *tmp = ctxt->input->cur;
2708 ctxt->input->cur = in;
2709 if (areBlanks(ctxt, tmp, nbchar)) {
2710 if (ctxt->sax->ignorableWhitespace != NULL)
2711 ctxt->sax->ignorableWhitespace(ctxt->userData,
2712 tmp, nbchar);
2713 } else {
2714 if (ctxt->sax->characters != NULL)
2715 ctxt->sax->characters(ctxt->userData,
2716 tmp, nbchar);
2717 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002718 line = ctxt->input->line;
2719 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002720 } else {
2721 if (ctxt->sax->characters != NULL)
2722 ctxt->sax->characters(ctxt->userData,
2723 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002724 line = ctxt->input->line;
2725 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002726 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002727 }
2728 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002729 if (*in == 0xD) {
2730 in++;
2731 if (*in == 0xA) {
2732 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002733 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002734 ctxt->input->line++;
2735 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002736 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002737 in--;
2738 }
2739 if (*in == '<') {
2740 return;
2741 }
2742 if (*in == '&') {
2743 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002744 }
2745 SHRINK;
2746 GROW;
2747 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002748 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002749 nbchar = 0;
2750 }
Daniel Veillard50582112001-03-26 22:52:16 +00002751 ctxt->input->line = line;
2752 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002753 xmlParseCharDataComplex(ctxt, cdata);
2754}
2755
2756void
2757xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002758 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2759 int nbchar = 0;
2760 int cur, l;
2761 int count = 0;
2762
2763 SHRINK;
2764 GROW;
2765 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002766 while ((cur != '<') && /* checked */
2767 (cur != '&') &&
2768 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002769 if ((cur == ']') && (NXT(1) == ']') &&
2770 (NXT(2) == '>')) {
2771 if (cdata) break;
2772 else {
2773 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2775 ctxt->sax->error(ctxt->userData,
2776 "Sequence ']]>' not allowed in content\n");
2777 /* Should this be relaxed ??? I see a "must here */
2778 ctxt->wellFormed = 0;
2779 ctxt->disableSAX = 1;
2780 }
2781 }
2782 COPY_BUF(l,buf,nbchar,cur);
2783 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2784 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002785 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002786 */
2787 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2788 if (areBlanks(ctxt, buf, nbchar)) {
2789 if (ctxt->sax->ignorableWhitespace != NULL)
2790 ctxt->sax->ignorableWhitespace(ctxt->userData,
2791 buf, nbchar);
2792 } else {
2793 if (ctxt->sax->characters != NULL)
2794 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2795 }
2796 }
2797 nbchar = 0;
2798 }
2799 count++;
2800 if (count > 50) {
2801 GROW;
2802 count = 0;
2803 }
2804 NEXTL(l);
2805 cur = CUR_CHAR(l);
2806 }
2807 if (nbchar != 0) {
2808 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002809 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002810 */
2811 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2812 if (areBlanks(ctxt, buf, nbchar)) {
2813 if (ctxt->sax->ignorableWhitespace != NULL)
2814 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2815 } else {
2816 if (ctxt->sax->characters != NULL)
2817 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2818 }
2819 }
2820 }
2821}
2822
2823/**
2824 * xmlParseExternalID:
2825 * @ctxt: an XML parser context
2826 * @publicID: a xmlChar** receiving PubidLiteral
2827 * @strict: indicate whether we should restrict parsing to only
2828 * production [75], see NOTE below
2829 *
2830 * Parse an External ID or a Public ID
2831 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002832 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002833 * 'PUBLIC' S PubidLiteral S SystemLiteral
2834 *
2835 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2836 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2837 *
2838 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2839 *
2840 * Returns the function returns SystemLiteral and in the second
2841 * case publicID receives PubidLiteral, is strict is off
2842 * it is possible to return NULL and have publicID set.
2843 */
2844
2845xmlChar *
2846xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2847 xmlChar *URI = NULL;
2848
2849 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002850
2851 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002852 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2853 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2854 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2855 SKIP(6);
2856 if (!IS_BLANK(CUR)) {
2857 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2859 ctxt->sax->error(ctxt->userData,
2860 "Space required after 'SYSTEM'\n");
2861 ctxt->wellFormed = 0;
2862 ctxt->disableSAX = 1;
2863 }
2864 SKIP_BLANKS;
2865 URI = xmlParseSystemLiteral(ctxt);
2866 if (URI == NULL) {
2867 ctxt->errNo = XML_ERR_URI_REQUIRED;
2868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2869 ctxt->sax->error(ctxt->userData,
2870 "xmlParseExternalID: SYSTEM, no URI\n");
2871 ctxt->wellFormed = 0;
2872 ctxt->disableSAX = 1;
2873 }
2874 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2875 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2876 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2877 SKIP(6);
2878 if (!IS_BLANK(CUR)) {
2879 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882 "Space required after 'PUBLIC'\n");
2883 ctxt->wellFormed = 0;
2884 ctxt->disableSAX = 1;
2885 }
2886 SKIP_BLANKS;
2887 *publicID = xmlParsePubidLiteral(ctxt);
2888 if (*publicID == NULL) {
2889 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2890 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2891 ctxt->sax->error(ctxt->userData,
2892 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2893 ctxt->wellFormed = 0;
2894 ctxt->disableSAX = 1;
2895 }
2896 if (strict) {
2897 /*
2898 * We don't handle [83] so "S SystemLiteral" is required.
2899 */
2900 if (!IS_BLANK(CUR)) {
2901 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2903 ctxt->sax->error(ctxt->userData,
2904 "Space required after the Public Identifier\n");
2905 ctxt->wellFormed = 0;
2906 ctxt->disableSAX = 1;
2907 }
2908 } else {
2909 /*
2910 * We handle [83] so we return immediately, if
2911 * "S SystemLiteral" is not detected. From a purely parsing
2912 * point of view that's a nice mess.
2913 */
2914 const xmlChar *ptr;
2915 GROW;
2916
2917 ptr = CUR_PTR;
2918 if (!IS_BLANK(*ptr)) return(NULL);
2919
2920 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2921 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2922 }
2923 SKIP_BLANKS;
2924 URI = xmlParseSystemLiteral(ctxt);
2925 if (URI == NULL) {
2926 ctxt->errNo = XML_ERR_URI_REQUIRED;
2927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2928 ctxt->sax->error(ctxt->userData,
2929 "xmlParseExternalID: PUBLIC, no URI\n");
2930 ctxt->wellFormed = 0;
2931 ctxt->disableSAX = 1;
2932 }
2933 }
2934 return(URI);
2935}
2936
2937/**
2938 * xmlParseComment:
2939 * @ctxt: an XML parser context
2940 *
2941 * Skip an XML (SGML) comment <!-- .... -->
2942 * The spec says that "For compatibility, the string "--" (double-hyphen)
2943 * must not occur within comments. "
2944 *
2945 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2946 */
2947void
2948xmlParseComment(xmlParserCtxtPtr ctxt) {
2949 xmlChar *buf = NULL;
2950 int len;
2951 int size = XML_PARSER_BUFFER_SIZE;
2952 int q, ql;
2953 int r, rl;
2954 int cur, l;
2955 xmlParserInputState state;
2956 xmlParserInputPtr input = ctxt->input;
2957 int count = 0;
2958
2959 /*
2960 * Check that there is a comment right here.
2961 */
2962 if ((RAW != '<') || (NXT(1) != '!') ||
2963 (NXT(2) != '-') || (NXT(3) != '-')) return;
2964
2965 state = ctxt->instate;
2966 ctxt->instate = XML_PARSER_COMMENT;
2967 SHRINK;
2968 SKIP(4);
2969 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2970 if (buf == NULL) {
2971 xmlGenericError(xmlGenericErrorContext,
2972 "malloc of %d byte failed\n", size);
2973 ctxt->instate = state;
2974 return;
2975 }
2976 q = CUR_CHAR(ql);
2977 NEXTL(ql);
2978 r = CUR_CHAR(rl);
2979 NEXTL(rl);
2980 cur = CUR_CHAR(l);
2981 len = 0;
2982 while (IS_CHAR(cur) && /* checked */
2983 ((cur != '>') ||
2984 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002985 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002986 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2988 ctxt->sax->error(ctxt->userData,
2989 "Comment must not contain '--' (double-hyphen)`\n");
2990 ctxt->wellFormed = 0;
2991 ctxt->disableSAX = 1;
2992 }
2993 if (len + 5 >= size) {
2994 size *= 2;
2995 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2996 if (buf == NULL) {
2997 xmlGenericError(xmlGenericErrorContext,
2998 "realloc of %d byte failed\n", size);
2999 ctxt->instate = state;
3000 return;
3001 }
3002 }
3003 COPY_BUF(ql,buf,len,q);
3004 q = r;
3005 ql = rl;
3006 r = cur;
3007 rl = l;
3008
3009 count++;
3010 if (count > 50) {
3011 GROW;
3012 count = 0;
3013 }
3014 NEXTL(l);
3015 cur = CUR_CHAR(l);
3016 if (cur == 0) {
3017 SHRINK;
3018 GROW;
3019 cur = CUR_CHAR(l);
3020 }
3021 }
3022 buf[len] = 0;
3023 if (!IS_CHAR(cur)) {
3024 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3026 ctxt->sax->error(ctxt->userData,
3027 "Comment not terminated \n<!--%.50s\n", buf);
3028 ctxt->wellFormed = 0;
3029 ctxt->disableSAX = 1;
3030 xmlFree(buf);
3031 } else {
3032 if (input != ctxt->input) {
3033 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3035 ctxt->sax->error(ctxt->userData,
3036"Comment doesn't start and stop in the same entity\n");
3037 ctxt->wellFormed = 0;
3038 ctxt->disableSAX = 1;
3039 }
3040 NEXT;
3041 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3042 (!ctxt->disableSAX))
3043 ctxt->sax->comment(ctxt->userData, buf);
3044 xmlFree(buf);
3045 }
3046 ctxt->instate = state;
3047}
3048
3049/**
3050 * xmlParsePITarget:
3051 * @ctxt: an XML parser context
3052 *
3053 * parse the name of a PI
3054 *
3055 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3056 *
3057 * Returns the PITarget name or NULL
3058 */
3059
3060xmlChar *
3061xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3062 xmlChar *name;
3063
3064 name = xmlParseName(ctxt);
3065 if ((name != NULL) &&
3066 ((name[0] == 'x') || (name[0] == 'X')) &&
3067 ((name[1] == 'm') || (name[1] == 'M')) &&
3068 ((name[2] == 'l') || (name[2] == 'L'))) {
3069 int i;
3070 if ((name[0] == 'x') && (name[1] == 'm') &&
3071 (name[2] == 'l') && (name[3] == 0)) {
3072 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3074 ctxt->sax->error(ctxt->userData,
3075 "XML declaration allowed only at the start of the document\n");
3076 ctxt->wellFormed = 0;
3077 ctxt->disableSAX = 1;
3078 return(name);
3079 } else if (name[3] == 0) {
3080 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3082 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3083 ctxt->wellFormed = 0;
3084 ctxt->disableSAX = 1;
3085 return(name);
3086 }
3087 for (i = 0;;i++) {
3088 if (xmlW3CPIs[i] == NULL) break;
3089 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3090 return(name);
3091 }
3092 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3093 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3094 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003095 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003096 }
3097 }
3098 return(name);
3099}
3100
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003101#ifdef LIBXML_CATALOG_ENABLED
3102/**
3103 * xmlParseCatalogPI:
3104 * @ctxt: an XML parser context
3105 * @catalog: the PI value string
3106 *
3107 * parse an XML Catalog Processing Instruction.
3108 *
3109 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3110 *
3111 * Occurs only if allowed by the user and if happening in the Misc
3112 * part of the document before any doctype informations
3113 * This will add the given catalog to the parsing context in order
3114 * to be used if there is a resolution need further down in the document
3115 */
3116
3117static void
3118xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3119 xmlChar *URL = NULL;
3120 const xmlChar *tmp, *base;
3121 xmlChar marker;
3122
3123 tmp = catalog;
3124 while (IS_BLANK(*tmp)) tmp++;
3125 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3126 goto error;
3127 tmp += 7;
3128 while (IS_BLANK(*tmp)) tmp++;
3129 if (*tmp != '=') {
3130 return;
3131 }
3132 tmp++;
3133 while (IS_BLANK(*tmp)) tmp++;
3134 marker = *tmp;
3135 if ((marker != '\'') && (marker != '"'))
3136 goto error;
3137 tmp++;
3138 base = tmp;
3139 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3140 if (*tmp == 0)
3141 goto error;
3142 URL = xmlStrndup(base, tmp - base);
3143 tmp++;
3144 while (IS_BLANK(*tmp)) tmp++;
3145 if (*tmp != 0)
3146 goto error;
3147
3148 if (URL != NULL) {
3149 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3150 xmlFree(URL);
3151 }
3152 return;
3153
3154error:
3155 ctxt->errNo = XML_WAR_CATALOG_PI;
3156 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3157 ctxt->sax->warning(ctxt->userData,
3158 "Catalog PI syntax error: %s\n", catalog);
3159 if (URL != NULL)
3160 xmlFree(URL);
3161}
3162#endif
3163
Owen Taylor3473f882001-02-23 17:55:21 +00003164/**
3165 * xmlParsePI:
3166 * @ctxt: an XML parser context
3167 *
3168 * parse an XML Processing Instruction.
3169 *
3170 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3171 *
3172 * The processing is transfered to SAX once parsed.
3173 */
3174
3175void
3176xmlParsePI(xmlParserCtxtPtr ctxt) {
3177 xmlChar *buf = NULL;
3178 int len = 0;
3179 int size = XML_PARSER_BUFFER_SIZE;
3180 int cur, l;
3181 xmlChar *target;
3182 xmlParserInputState state;
3183 int count = 0;
3184
3185 if ((RAW == '<') && (NXT(1) == '?')) {
3186 xmlParserInputPtr input = ctxt->input;
3187 state = ctxt->instate;
3188 ctxt->instate = XML_PARSER_PI;
3189 /*
3190 * this is a Processing Instruction.
3191 */
3192 SKIP(2);
3193 SHRINK;
3194
3195 /*
3196 * Parse the target name and check for special support like
3197 * namespace.
3198 */
3199 target = xmlParsePITarget(ctxt);
3200 if (target != NULL) {
3201 if ((RAW == '?') && (NXT(1) == '>')) {
3202 if (input != ctxt->input) {
3203 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3205 ctxt->sax->error(ctxt->userData,
3206 "PI declaration doesn't start and stop in the same entity\n");
3207 ctxt->wellFormed = 0;
3208 ctxt->disableSAX = 1;
3209 }
3210 SKIP(2);
3211
3212 /*
3213 * SAX: PI detected.
3214 */
3215 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3216 (ctxt->sax->processingInstruction != NULL))
3217 ctxt->sax->processingInstruction(ctxt->userData,
3218 target, NULL);
3219 ctxt->instate = state;
3220 xmlFree(target);
3221 return;
3222 }
3223 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3224 if (buf == NULL) {
3225 xmlGenericError(xmlGenericErrorContext,
3226 "malloc of %d byte failed\n", size);
3227 ctxt->instate = state;
3228 return;
3229 }
3230 cur = CUR;
3231 if (!IS_BLANK(cur)) {
3232 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3234 ctxt->sax->error(ctxt->userData,
3235 "xmlParsePI: PI %s space expected\n", target);
3236 ctxt->wellFormed = 0;
3237 ctxt->disableSAX = 1;
3238 }
3239 SKIP_BLANKS;
3240 cur = CUR_CHAR(l);
3241 while (IS_CHAR(cur) && /* checked */
3242 ((cur != '?') || (NXT(1) != '>'))) {
3243 if (len + 5 >= size) {
3244 size *= 2;
3245 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3246 if (buf == NULL) {
3247 xmlGenericError(xmlGenericErrorContext,
3248 "realloc of %d byte failed\n", size);
3249 ctxt->instate = state;
3250 return;
3251 }
3252 }
3253 count++;
3254 if (count > 50) {
3255 GROW;
3256 count = 0;
3257 }
3258 COPY_BUF(l,buf,len,cur);
3259 NEXTL(l);
3260 cur = CUR_CHAR(l);
3261 if (cur == 0) {
3262 SHRINK;
3263 GROW;
3264 cur = CUR_CHAR(l);
3265 }
3266 }
3267 buf[len] = 0;
3268 if (cur != '?') {
3269 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3271 ctxt->sax->error(ctxt->userData,
3272 "xmlParsePI: PI %s never end ...\n", target);
3273 ctxt->wellFormed = 0;
3274 ctxt->disableSAX = 1;
3275 } else {
3276 if (input != ctxt->input) {
3277 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3279 ctxt->sax->error(ctxt->userData,
3280 "PI declaration doesn't start and stop in the same entity\n");
3281 ctxt->wellFormed = 0;
3282 ctxt->disableSAX = 1;
3283 }
3284 SKIP(2);
3285
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003286#ifdef LIBXML_CATALOG_ENABLED
3287 if (((state == XML_PARSER_MISC) ||
3288 (state == XML_PARSER_START)) &&
3289 (xmlStrEqual(target, XML_CATALOG_PI))) {
3290 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3291 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3292 (allow == XML_CATA_ALLOW_ALL))
3293 xmlParseCatalogPI(ctxt, buf);
3294 }
3295#endif
3296
3297
Owen Taylor3473f882001-02-23 17:55:21 +00003298 /*
3299 * SAX: PI detected.
3300 */
3301 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3302 (ctxt->sax->processingInstruction != NULL))
3303 ctxt->sax->processingInstruction(ctxt->userData,
3304 target, buf);
3305 }
3306 xmlFree(buf);
3307 xmlFree(target);
3308 } else {
3309 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3311 ctxt->sax->error(ctxt->userData,
3312 "xmlParsePI : no target name\n");
3313 ctxt->wellFormed = 0;
3314 ctxt->disableSAX = 1;
3315 }
3316 ctxt->instate = state;
3317 }
3318}
3319
3320/**
3321 * xmlParseNotationDecl:
3322 * @ctxt: an XML parser context
3323 *
3324 * parse a notation declaration
3325 *
3326 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3327 *
3328 * Hence there is actually 3 choices:
3329 * 'PUBLIC' S PubidLiteral
3330 * 'PUBLIC' S PubidLiteral S SystemLiteral
3331 * and 'SYSTEM' S SystemLiteral
3332 *
3333 * See the NOTE on xmlParseExternalID().
3334 */
3335
3336void
3337xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3338 xmlChar *name;
3339 xmlChar *Pubid;
3340 xmlChar *Systemid;
3341
3342 if ((RAW == '<') && (NXT(1) == '!') &&
3343 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3344 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3345 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3346 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3347 xmlParserInputPtr input = ctxt->input;
3348 SHRINK;
3349 SKIP(10);
3350 if (!IS_BLANK(CUR)) {
3351 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3353 ctxt->sax->error(ctxt->userData,
3354 "Space required after '<!NOTATION'\n");
3355 ctxt->wellFormed = 0;
3356 ctxt->disableSAX = 1;
3357 return;
3358 }
3359 SKIP_BLANKS;
3360
Daniel Veillard76d66f42001-05-16 21:05:17 +00003361 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003362 if (name == NULL) {
3363 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3365 ctxt->sax->error(ctxt->userData,
3366 "NOTATION: Name expected here\n");
3367 ctxt->wellFormed = 0;
3368 ctxt->disableSAX = 1;
3369 return;
3370 }
3371 if (!IS_BLANK(CUR)) {
3372 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3374 ctxt->sax->error(ctxt->userData,
3375 "Space required after the NOTATION name'\n");
3376 ctxt->wellFormed = 0;
3377 ctxt->disableSAX = 1;
3378 return;
3379 }
3380 SKIP_BLANKS;
3381
3382 /*
3383 * Parse the IDs.
3384 */
3385 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3386 SKIP_BLANKS;
3387
3388 if (RAW == '>') {
3389 if (input != ctxt->input) {
3390 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3392 ctxt->sax->error(ctxt->userData,
3393"Notation declaration doesn't start and stop in the same entity\n");
3394 ctxt->wellFormed = 0;
3395 ctxt->disableSAX = 1;
3396 }
3397 NEXT;
3398 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3399 (ctxt->sax->notationDecl != NULL))
3400 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3401 } else {
3402 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3404 ctxt->sax->error(ctxt->userData,
3405 "'>' required to close NOTATION declaration\n");
3406 ctxt->wellFormed = 0;
3407 ctxt->disableSAX = 1;
3408 }
3409 xmlFree(name);
3410 if (Systemid != NULL) xmlFree(Systemid);
3411 if (Pubid != NULL) xmlFree(Pubid);
3412 }
3413}
3414
3415/**
3416 * xmlParseEntityDecl:
3417 * @ctxt: an XML parser context
3418 *
3419 * parse <!ENTITY declarations
3420 *
3421 * [70] EntityDecl ::= GEDecl | PEDecl
3422 *
3423 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3424 *
3425 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3426 *
3427 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3428 *
3429 * [74] PEDef ::= EntityValue | ExternalID
3430 *
3431 * [76] NDataDecl ::= S 'NDATA' S Name
3432 *
3433 * [ VC: Notation Declared ]
3434 * The Name must match the declared name of a notation.
3435 */
3436
3437void
3438xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3439 xmlChar *name = NULL;
3440 xmlChar *value = NULL;
3441 xmlChar *URI = NULL, *literal = NULL;
3442 xmlChar *ndata = NULL;
3443 int isParameter = 0;
3444 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003445 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003446
3447 GROW;
3448 if ((RAW == '<') && (NXT(1) == '!') &&
3449 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3450 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3451 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3452 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003453 SHRINK;
3454 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003455 skipped = SKIP_BLANKS;
3456 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003457 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3459 ctxt->sax->error(ctxt->userData,
3460 "Space required after '<!ENTITY'\n");
3461 ctxt->wellFormed = 0;
3462 ctxt->disableSAX = 1;
3463 }
Owen Taylor3473f882001-02-23 17:55:21 +00003464
3465 if (RAW == '%') {
3466 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003467 skipped = SKIP_BLANKS;
3468 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003469 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3471 ctxt->sax->error(ctxt->userData,
3472 "Space required after '%'\n");
3473 ctxt->wellFormed = 0;
3474 ctxt->disableSAX = 1;
3475 }
Owen Taylor3473f882001-02-23 17:55:21 +00003476 isParameter = 1;
3477 }
3478
Daniel Veillard76d66f42001-05-16 21:05:17 +00003479 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003480 if (name == NULL) {
3481 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3483 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3484 ctxt->wellFormed = 0;
3485 ctxt->disableSAX = 1;
3486 return;
3487 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003488 skipped = SKIP_BLANKS;
3489 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003490 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3492 ctxt->sax->error(ctxt->userData,
3493 "Space required after the entity name\n");
3494 ctxt->wellFormed = 0;
3495 ctxt->disableSAX = 1;
3496 }
Owen Taylor3473f882001-02-23 17:55:21 +00003497
Daniel Veillardf5582f12002-06-11 10:08:16 +00003498 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003499 /*
3500 * handle the various case of definitions...
3501 */
3502 if (isParameter) {
3503 if ((RAW == '"') || (RAW == '\'')) {
3504 value = xmlParseEntityValue(ctxt, &orig);
3505 if (value) {
3506 if ((ctxt->sax != NULL) &&
3507 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3508 ctxt->sax->entityDecl(ctxt->userData, name,
3509 XML_INTERNAL_PARAMETER_ENTITY,
3510 NULL, NULL, value);
3511 }
3512 } else {
3513 URI = xmlParseExternalID(ctxt, &literal, 1);
3514 if ((URI == NULL) && (literal == NULL)) {
3515 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3517 ctxt->sax->error(ctxt->userData,
3518 "Entity value required\n");
3519 ctxt->wellFormed = 0;
3520 ctxt->disableSAX = 1;
3521 }
3522 if (URI) {
3523 xmlURIPtr uri;
3524
3525 uri = xmlParseURI((const char *) URI);
3526 if (uri == NULL) {
3527 ctxt->errNo = XML_ERR_INVALID_URI;
3528 if ((ctxt->sax != NULL) &&
3529 (!ctxt->disableSAX) &&
3530 (ctxt->sax->error != NULL))
3531 ctxt->sax->error(ctxt->userData,
3532 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003533 /*
3534 * This really ought to be a well formedness error
3535 * but the XML Core WG decided otherwise c.f. issue
3536 * E26 of the XML erratas.
3537 */
Owen Taylor3473f882001-02-23 17:55:21 +00003538 } else {
3539 if (uri->fragment != NULL) {
3540 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3541 if ((ctxt->sax != NULL) &&
3542 (!ctxt->disableSAX) &&
3543 (ctxt->sax->error != NULL))
3544 ctxt->sax->error(ctxt->userData,
3545 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003546 /*
3547 * Okay this is foolish to block those but not
3548 * invalid URIs.
3549 */
Owen Taylor3473f882001-02-23 17:55:21 +00003550 ctxt->wellFormed = 0;
3551 } else {
3552 if ((ctxt->sax != NULL) &&
3553 (!ctxt->disableSAX) &&
3554 (ctxt->sax->entityDecl != NULL))
3555 ctxt->sax->entityDecl(ctxt->userData, name,
3556 XML_EXTERNAL_PARAMETER_ENTITY,
3557 literal, URI, NULL);
3558 }
3559 xmlFreeURI(uri);
3560 }
3561 }
3562 }
3563 } else {
3564 if ((RAW == '"') || (RAW == '\'')) {
3565 value = xmlParseEntityValue(ctxt, &orig);
3566 if ((ctxt->sax != NULL) &&
3567 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3568 ctxt->sax->entityDecl(ctxt->userData, name,
3569 XML_INTERNAL_GENERAL_ENTITY,
3570 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003571 /*
3572 * For expat compatibility in SAX mode.
3573 */
3574 if ((ctxt->myDoc == NULL) ||
3575 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3576 if (ctxt->myDoc == NULL) {
3577 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3578 }
3579 if (ctxt->myDoc->intSubset == NULL)
3580 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3581 BAD_CAST "fake", NULL, NULL);
3582
3583 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3584 NULL, NULL, value);
3585 }
Owen Taylor3473f882001-02-23 17:55:21 +00003586 } else {
3587 URI = xmlParseExternalID(ctxt, &literal, 1);
3588 if ((URI == NULL) && (literal == NULL)) {
3589 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3591 ctxt->sax->error(ctxt->userData,
3592 "Entity value required\n");
3593 ctxt->wellFormed = 0;
3594 ctxt->disableSAX = 1;
3595 }
3596 if (URI) {
3597 xmlURIPtr uri;
3598
3599 uri = xmlParseURI((const char *)URI);
3600 if (uri == NULL) {
3601 ctxt->errNo = XML_ERR_INVALID_URI;
3602 if ((ctxt->sax != NULL) &&
3603 (!ctxt->disableSAX) &&
3604 (ctxt->sax->error != NULL))
3605 ctxt->sax->error(ctxt->userData,
3606 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003607 /*
3608 * This really ought to be a well formedness error
3609 * but the XML Core WG decided otherwise c.f. issue
3610 * E26 of the XML erratas.
3611 */
Owen Taylor3473f882001-02-23 17:55:21 +00003612 } else {
3613 if (uri->fragment != NULL) {
3614 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3615 if ((ctxt->sax != NULL) &&
3616 (!ctxt->disableSAX) &&
3617 (ctxt->sax->error != NULL))
3618 ctxt->sax->error(ctxt->userData,
3619 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003620 /*
3621 * Okay this is foolish to block those but not
3622 * invalid URIs.
3623 */
Owen Taylor3473f882001-02-23 17:55:21 +00003624 ctxt->wellFormed = 0;
3625 }
3626 xmlFreeURI(uri);
3627 }
3628 }
3629 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3630 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3632 ctxt->sax->error(ctxt->userData,
3633 "Space required before 'NDATA'\n");
3634 ctxt->wellFormed = 0;
3635 ctxt->disableSAX = 1;
3636 }
3637 SKIP_BLANKS;
3638 if ((RAW == 'N') && (NXT(1) == 'D') &&
3639 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3640 (NXT(4) == 'A')) {
3641 SKIP(5);
3642 if (!IS_BLANK(CUR)) {
3643 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3644 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3645 ctxt->sax->error(ctxt->userData,
3646 "Space required after 'NDATA'\n");
3647 ctxt->wellFormed = 0;
3648 ctxt->disableSAX = 1;
3649 }
3650 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003651 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003652 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3653 (ctxt->sax->unparsedEntityDecl != NULL))
3654 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3655 literal, URI, ndata);
3656 } else {
3657 if ((ctxt->sax != NULL) &&
3658 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3659 ctxt->sax->entityDecl(ctxt->userData, name,
3660 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3661 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003662 /*
3663 * For expat compatibility in SAX mode.
3664 * assuming the entity repalcement was asked for
3665 */
3666 if ((ctxt->replaceEntities != 0) &&
3667 ((ctxt->myDoc == NULL) ||
3668 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3669 if (ctxt->myDoc == NULL) {
3670 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3671 }
3672
3673 if (ctxt->myDoc->intSubset == NULL)
3674 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3675 BAD_CAST "fake", NULL, NULL);
3676 entityDecl(ctxt, name,
3677 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3678 literal, URI, NULL);
3679 }
Owen Taylor3473f882001-02-23 17:55:21 +00003680 }
3681 }
3682 }
3683 SKIP_BLANKS;
3684 if (RAW != '>') {
3685 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3687 ctxt->sax->error(ctxt->userData,
3688 "xmlParseEntityDecl: entity %s not terminated\n", name);
3689 ctxt->wellFormed = 0;
3690 ctxt->disableSAX = 1;
3691 } else {
3692 if (input != ctxt->input) {
3693 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3695 ctxt->sax->error(ctxt->userData,
3696"Entity declaration doesn't start and stop in the same entity\n");
3697 ctxt->wellFormed = 0;
3698 ctxt->disableSAX = 1;
3699 }
3700 NEXT;
3701 }
3702 if (orig != NULL) {
3703 /*
3704 * Ugly mechanism to save the raw entity value.
3705 */
3706 xmlEntityPtr cur = NULL;
3707
3708 if (isParameter) {
3709 if ((ctxt->sax != NULL) &&
3710 (ctxt->sax->getParameterEntity != NULL))
3711 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3712 } else {
3713 if ((ctxt->sax != NULL) &&
3714 (ctxt->sax->getEntity != NULL))
3715 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003716 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3717 cur = getEntity(ctxt, name);
3718 }
Owen Taylor3473f882001-02-23 17:55:21 +00003719 }
3720 if (cur != NULL) {
3721 if (cur->orig != NULL)
3722 xmlFree(orig);
3723 else
3724 cur->orig = orig;
3725 } else
3726 xmlFree(orig);
3727 }
3728 if (name != NULL) xmlFree(name);
3729 if (value != NULL) xmlFree(value);
3730 if (URI != NULL) xmlFree(URI);
3731 if (literal != NULL) xmlFree(literal);
3732 if (ndata != NULL) xmlFree(ndata);
3733 }
3734}
3735
3736/**
3737 * xmlParseDefaultDecl:
3738 * @ctxt: an XML parser context
3739 * @value: Receive a possible fixed default value for the attribute
3740 *
3741 * Parse an attribute default declaration
3742 *
3743 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3744 *
3745 * [ VC: Required Attribute ]
3746 * if the default declaration is the keyword #REQUIRED, then the
3747 * attribute must be specified for all elements of the type in the
3748 * attribute-list declaration.
3749 *
3750 * [ VC: Attribute Default Legal ]
3751 * The declared default value must meet the lexical constraints of
3752 * the declared attribute type c.f. xmlValidateAttributeDecl()
3753 *
3754 * [ VC: Fixed Attribute Default ]
3755 * if an attribute has a default value declared with the #FIXED
3756 * keyword, instances of that attribute must match the default value.
3757 *
3758 * [ WFC: No < in Attribute Values ]
3759 * handled in xmlParseAttValue()
3760 *
3761 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3762 * or XML_ATTRIBUTE_FIXED.
3763 */
3764
3765int
3766xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3767 int val;
3768 xmlChar *ret;
3769
3770 *value = NULL;
3771 if ((RAW == '#') && (NXT(1) == 'R') &&
3772 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3773 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3774 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3775 (NXT(8) == 'D')) {
3776 SKIP(9);
3777 return(XML_ATTRIBUTE_REQUIRED);
3778 }
3779 if ((RAW == '#') && (NXT(1) == 'I') &&
3780 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3781 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3782 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3783 SKIP(8);
3784 return(XML_ATTRIBUTE_IMPLIED);
3785 }
3786 val = XML_ATTRIBUTE_NONE;
3787 if ((RAW == '#') && (NXT(1) == 'F') &&
3788 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3789 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3790 SKIP(6);
3791 val = XML_ATTRIBUTE_FIXED;
3792 if (!IS_BLANK(CUR)) {
3793 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3795 ctxt->sax->error(ctxt->userData,
3796 "Space required after '#FIXED'\n");
3797 ctxt->wellFormed = 0;
3798 ctxt->disableSAX = 1;
3799 }
3800 SKIP_BLANKS;
3801 }
3802 ret = xmlParseAttValue(ctxt);
3803 ctxt->instate = XML_PARSER_DTD;
3804 if (ret == NULL) {
3805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3806 ctxt->sax->error(ctxt->userData,
3807 "Attribute default value declaration error\n");
3808 ctxt->wellFormed = 0;
3809 ctxt->disableSAX = 1;
3810 } else
3811 *value = ret;
3812 return(val);
3813}
3814
3815/**
3816 * xmlParseNotationType:
3817 * @ctxt: an XML parser context
3818 *
3819 * parse an Notation attribute type.
3820 *
3821 * Note: the leading 'NOTATION' S part has already being parsed...
3822 *
3823 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3824 *
3825 * [ VC: Notation Attributes ]
3826 * Values of this type must match one of the notation names included
3827 * in the declaration; all notation names in the declaration must be declared.
3828 *
3829 * Returns: the notation attribute tree built while parsing
3830 */
3831
3832xmlEnumerationPtr
3833xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3834 xmlChar *name;
3835 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3836
3837 if (RAW != '(') {
3838 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3839 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3840 ctxt->sax->error(ctxt->userData,
3841 "'(' required to start 'NOTATION'\n");
3842 ctxt->wellFormed = 0;
3843 ctxt->disableSAX = 1;
3844 return(NULL);
3845 }
3846 SHRINK;
3847 do {
3848 NEXT;
3849 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003850 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003851 if (name == NULL) {
3852 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3854 ctxt->sax->error(ctxt->userData,
3855 "Name expected in NOTATION declaration\n");
3856 ctxt->wellFormed = 0;
3857 ctxt->disableSAX = 1;
3858 return(ret);
3859 }
3860 cur = xmlCreateEnumeration(name);
3861 xmlFree(name);
3862 if (cur == NULL) return(ret);
3863 if (last == NULL) ret = last = cur;
3864 else {
3865 last->next = cur;
3866 last = cur;
3867 }
3868 SKIP_BLANKS;
3869 } while (RAW == '|');
3870 if (RAW != ')') {
3871 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3873 ctxt->sax->error(ctxt->userData,
3874 "')' required to finish NOTATION declaration\n");
3875 ctxt->wellFormed = 0;
3876 ctxt->disableSAX = 1;
3877 if ((last != NULL) && (last != ret))
3878 xmlFreeEnumeration(last);
3879 return(ret);
3880 }
3881 NEXT;
3882 return(ret);
3883}
3884
3885/**
3886 * xmlParseEnumerationType:
3887 * @ctxt: an XML parser context
3888 *
3889 * parse an Enumeration attribute type.
3890 *
3891 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3892 *
3893 * [ VC: Enumeration ]
3894 * Values of this type must match one of the Nmtoken tokens in
3895 * the declaration
3896 *
3897 * Returns: the enumeration attribute tree built while parsing
3898 */
3899
3900xmlEnumerationPtr
3901xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3902 xmlChar *name;
3903 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3904
3905 if (RAW != '(') {
3906 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3908 ctxt->sax->error(ctxt->userData,
3909 "'(' required to start ATTLIST enumeration\n");
3910 ctxt->wellFormed = 0;
3911 ctxt->disableSAX = 1;
3912 return(NULL);
3913 }
3914 SHRINK;
3915 do {
3916 NEXT;
3917 SKIP_BLANKS;
3918 name = xmlParseNmtoken(ctxt);
3919 if (name == NULL) {
3920 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3922 ctxt->sax->error(ctxt->userData,
3923 "NmToken expected in ATTLIST enumeration\n");
3924 ctxt->wellFormed = 0;
3925 ctxt->disableSAX = 1;
3926 return(ret);
3927 }
3928 cur = xmlCreateEnumeration(name);
3929 xmlFree(name);
3930 if (cur == NULL) return(ret);
3931 if (last == NULL) ret = last = cur;
3932 else {
3933 last->next = cur;
3934 last = cur;
3935 }
3936 SKIP_BLANKS;
3937 } while (RAW == '|');
3938 if (RAW != ')') {
3939 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3941 ctxt->sax->error(ctxt->userData,
3942 "')' required to finish ATTLIST enumeration\n");
3943 ctxt->wellFormed = 0;
3944 ctxt->disableSAX = 1;
3945 return(ret);
3946 }
3947 NEXT;
3948 return(ret);
3949}
3950
3951/**
3952 * xmlParseEnumeratedType:
3953 * @ctxt: an XML parser context
3954 * @tree: the enumeration tree built while parsing
3955 *
3956 * parse an Enumerated attribute type.
3957 *
3958 * [57] EnumeratedType ::= NotationType | Enumeration
3959 *
3960 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3961 *
3962 *
3963 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3964 */
3965
3966int
3967xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3968 if ((RAW == 'N') && (NXT(1) == 'O') &&
3969 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3970 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3971 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3972 SKIP(8);
3973 if (!IS_BLANK(CUR)) {
3974 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3976 ctxt->sax->error(ctxt->userData,
3977 "Space required after 'NOTATION'\n");
3978 ctxt->wellFormed = 0;
3979 ctxt->disableSAX = 1;
3980 return(0);
3981 }
3982 SKIP_BLANKS;
3983 *tree = xmlParseNotationType(ctxt);
3984 if (*tree == NULL) return(0);
3985 return(XML_ATTRIBUTE_NOTATION);
3986 }
3987 *tree = xmlParseEnumerationType(ctxt);
3988 if (*tree == NULL) return(0);
3989 return(XML_ATTRIBUTE_ENUMERATION);
3990}
3991
3992/**
3993 * xmlParseAttributeType:
3994 * @ctxt: an XML parser context
3995 * @tree: the enumeration tree built while parsing
3996 *
3997 * parse the Attribute list def for an element
3998 *
3999 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4000 *
4001 * [55] StringType ::= 'CDATA'
4002 *
4003 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4004 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4005 *
4006 * Validity constraints for attribute values syntax are checked in
4007 * xmlValidateAttributeValue()
4008 *
4009 * [ VC: ID ]
4010 * Values of type ID must match the Name production. A name must not
4011 * appear more than once in an XML document as a value of this type;
4012 * i.e., ID values must uniquely identify the elements which bear them.
4013 *
4014 * [ VC: One ID per Element Type ]
4015 * No element type may have more than one ID attribute specified.
4016 *
4017 * [ VC: ID Attribute Default ]
4018 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4019 *
4020 * [ VC: IDREF ]
4021 * Values of type IDREF must match the Name production, and values
4022 * of type IDREFS must match Names; each IDREF Name must match the value
4023 * of an ID attribute on some element in the XML document; i.e. IDREF
4024 * values must match the value of some ID attribute.
4025 *
4026 * [ VC: Entity Name ]
4027 * Values of type ENTITY must match the Name production, values
4028 * of type ENTITIES must match Names; each Entity Name must match the
4029 * name of an unparsed entity declared in the DTD.
4030 *
4031 * [ VC: Name Token ]
4032 * Values of type NMTOKEN must match the Nmtoken production; values
4033 * of type NMTOKENS must match Nmtokens.
4034 *
4035 * Returns the attribute type
4036 */
4037int
4038xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4039 SHRINK;
4040 if ((RAW == 'C') && (NXT(1) == 'D') &&
4041 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4042 (NXT(4) == 'A')) {
4043 SKIP(5);
4044 return(XML_ATTRIBUTE_CDATA);
4045 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4046 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4047 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4048 SKIP(6);
4049 return(XML_ATTRIBUTE_IDREFS);
4050 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4051 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4052 (NXT(4) == 'F')) {
4053 SKIP(5);
4054 return(XML_ATTRIBUTE_IDREF);
4055 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4056 SKIP(2);
4057 return(XML_ATTRIBUTE_ID);
4058 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4059 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4060 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4061 SKIP(6);
4062 return(XML_ATTRIBUTE_ENTITY);
4063 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4064 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4065 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4066 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4067 SKIP(8);
4068 return(XML_ATTRIBUTE_ENTITIES);
4069 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4070 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4071 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4072 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4073 SKIP(8);
4074 return(XML_ATTRIBUTE_NMTOKENS);
4075 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4076 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4077 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4078 (NXT(6) == 'N')) {
4079 SKIP(7);
4080 return(XML_ATTRIBUTE_NMTOKEN);
4081 }
4082 return(xmlParseEnumeratedType(ctxt, tree));
4083}
4084
4085/**
4086 * xmlParseAttributeListDecl:
4087 * @ctxt: an XML parser context
4088 *
4089 * : parse the Attribute list def for an element
4090 *
4091 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4092 *
4093 * [53] AttDef ::= S Name S AttType S DefaultDecl
4094 *
4095 */
4096void
4097xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4098 xmlChar *elemName;
4099 xmlChar *attrName;
4100 xmlEnumerationPtr tree;
4101
4102 if ((RAW == '<') && (NXT(1) == '!') &&
4103 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4104 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4105 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4106 (NXT(8) == 'T')) {
4107 xmlParserInputPtr input = ctxt->input;
4108
4109 SKIP(9);
4110 if (!IS_BLANK(CUR)) {
4111 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4112 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4113 ctxt->sax->error(ctxt->userData,
4114 "Space required after '<!ATTLIST'\n");
4115 ctxt->wellFormed = 0;
4116 ctxt->disableSAX = 1;
4117 }
4118 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004119 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004120 if (elemName == NULL) {
4121 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4123 ctxt->sax->error(ctxt->userData,
4124 "ATTLIST: no name for Element\n");
4125 ctxt->wellFormed = 0;
4126 ctxt->disableSAX = 1;
4127 return;
4128 }
4129 SKIP_BLANKS;
4130 GROW;
4131 while (RAW != '>') {
4132 const xmlChar *check = CUR_PTR;
4133 int type;
4134 int def;
4135 xmlChar *defaultValue = NULL;
4136
4137 GROW;
4138 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004139 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004140 if (attrName == NULL) {
4141 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4143 ctxt->sax->error(ctxt->userData,
4144 "ATTLIST: no name for Attribute\n");
4145 ctxt->wellFormed = 0;
4146 ctxt->disableSAX = 1;
4147 break;
4148 }
4149 GROW;
4150 if (!IS_BLANK(CUR)) {
4151 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4153 ctxt->sax->error(ctxt->userData,
4154 "Space required after the attribute name\n");
4155 ctxt->wellFormed = 0;
4156 ctxt->disableSAX = 1;
4157 if (attrName != NULL)
4158 xmlFree(attrName);
4159 if (defaultValue != NULL)
4160 xmlFree(defaultValue);
4161 break;
4162 }
4163 SKIP_BLANKS;
4164
4165 type = xmlParseAttributeType(ctxt, &tree);
4166 if (type <= 0) {
4167 if (attrName != NULL)
4168 xmlFree(attrName);
4169 if (defaultValue != NULL)
4170 xmlFree(defaultValue);
4171 break;
4172 }
4173
4174 GROW;
4175 if (!IS_BLANK(CUR)) {
4176 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4178 ctxt->sax->error(ctxt->userData,
4179 "Space required after the attribute type\n");
4180 ctxt->wellFormed = 0;
4181 ctxt->disableSAX = 1;
4182 if (attrName != NULL)
4183 xmlFree(attrName);
4184 if (defaultValue != NULL)
4185 xmlFree(defaultValue);
4186 if (tree != NULL)
4187 xmlFreeEnumeration(tree);
4188 break;
4189 }
4190 SKIP_BLANKS;
4191
4192 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4193 if (def <= 0) {
4194 if (attrName != NULL)
4195 xmlFree(attrName);
4196 if (defaultValue != NULL)
4197 xmlFree(defaultValue);
4198 if (tree != NULL)
4199 xmlFreeEnumeration(tree);
4200 break;
4201 }
4202
4203 GROW;
4204 if (RAW != '>') {
4205 if (!IS_BLANK(CUR)) {
4206 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4208 ctxt->sax->error(ctxt->userData,
4209 "Space required after the attribute default value\n");
4210 ctxt->wellFormed = 0;
4211 ctxt->disableSAX = 1;
4212 if (attrName != NULL)
4213 xmlFree(attrName);
4214 if (defaultValue != NULL)
4215 xmlFree(defaultValue);
4216 if (tree != NULL)
4217 xmlFreeEnumeration(tree);
4218 break;
4219 }
4220 SKIP_BLANKS;
4221 }
4222 if (check == CUR_PTR) {
4223 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4225 ctxt->sax->error(ctxt->userData,
4226 "xmlParseAttributeListDecl: detected internal error\n");
4227 if (attrName != NULL)
4228 xmlFree(attrName);
4229 if (defaultValue != NULL)
4230 xmlFree(defaultValue);
4231 if (tree != NULL)
4232 xmlFreeEnumeration(tree);
4233 break;
4234 }
4235 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4236 (ctxt->sax->attributeDecl != NULL))
4237 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4238 type, def, defaultValue, tree);
4239 if (attrName != NULL)
4240 xmlFree(attrName);
4241 if (defaultValue != NULL)
4242 xmlFree(defaultValue);
4243 GROW;
4244 }
4245 if (RAW == '>') {
4246 if (input != ctxt->input) {
4247 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4249 ctxt->sax->error(ctxt->userData,
4250"Attribute list declaration doesn't start and stop in the same entity\n");
4251 ctxt->wellFormed = 0;
4252 ctxt->disableSAX = 1;
4253 }
4254 NEXT;
4255 }
4256
4257 xmlFree(elemName);
4258 }
4259}
4260
4261/**
4262 * xmlParseElementMixedContentDecl:
4263 * @ctxt: an XML parser context
4264 *
4265 * parse the declaration for a Mixed Element content
4266 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4267 *
4268 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4269 * '(' S? '#PCDATA' S? ')'
4270 *
4271 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4272 *
4273 * [ VC: No Duplicate Types ]
4274 * The same name must not appear more than once in a single
4275 * mixed-content declaration.
4276 *
4277 * returns: the list of the xmlElementContentPtr describing the element choices
4278 */
4279xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004280xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004281 xmlElementContentPtr ret = NULL, cur = NULL, n;
4282 xmlChar *elem = NULL;
4283
4284 GROW;
4285 if ((RAW == '#') && (NXT(1) == 'P') &&
4286 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4287 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4288 (NXT(6) == 'A')) {
4289 SKIP(7);
4290 SKIP_BLANKS;
4291 SHRINK;
4292 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004293 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4294 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4295 if (ctxt->vctxt.error != NULL)
4296 ctxt->vctxt.error(ctxt->vctxt.userData,
4297"Element content declaration doesn't start and stop in the same entity\n");
4298 ctxt->valid = 0;
4299 }
Owen Taylor3473f882001-02-23 17:55:21 +00004300 NEXT;
4301 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4302 if (RAW == '*') {
4303 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4304 NEXT;
4305 }
4306 return(ret);
4307 }
4308 if ((RAW == '(') || (RAW == '|')) {
4309 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4310 if (ret == NULL) return(NULL);
4311 }
4312 while (RAW == '|') {
4313 NEXT;
4314 if (elem == NULL) {
4315 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4316 if (ret == NULL) return(NULL);
4317 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004318 if (cur != NULL)
4319 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004320 cur = ret;
4321 } else {
4322 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4323 if (n == NULL) return(NULL);
4324 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004325 if (n->c1 != NULL)
4326 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004327 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004328 if (n != NULL)
4329 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004330 cur = n;
4331 xmlFree(elem);
4332 }
4333 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004334 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004335 if (elem == NULL) {
4336 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4338 ctxt->sax->error(ctxt->userData,
4339 "xmlParseElementMixedContentDecl : Name expected\n");
4340 ctxt->wellFormed = 0;
4341 ctxt->disableSAX = 1;
4342 xmlFreeElementContent(cur);
4343 return(NULL);
4344 }
4345 SKIP_BLANKS;
4346 GROW;
4347 }
4348 if ((RAW == ')') && (NXT(1) == '*')) {
4349 if (elem != NULL) {
4350 cur->c2 = xmlNewElementContent(elem,
4351 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004352 if (cur->c2 != NULL)
4353 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004354 xmlFree(elem);
4355 }
4356 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004357 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4358 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4359 if (ctxt->vctxt.error != NULL)
4360 ctxt->vctxt.error(ctxt->vctxt.userData,
4361"Element content declaration doesn't start and stop in the same entity\n");
4362 ctxt->valid = 0;
4363 }
Owen Taylor3473f882001-02-23 17:55:21 +00004364 SKIP(2);
4365 } else {
4366 if (elem != NULL) xmlFree(elem);
4367 xmlFreeElementContent(ret);
4368 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4370 ctxt->sax->error(ctxt->userData,
4371 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4372 ctxt->wellFormed = 0;
4373 ctxt->disableSAX = 1;
4374 return(NULL);
4375 }
4376
4377 } else {
4378 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4379 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4380 ctxt->sax->error(ctxt->userData,
4381 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4382 ctxt->wellFormed = 0;
4383 ctxt->disableSAX = 1;
4384 }
4385 return(ret);
4386}
4387
4388/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004389 * xmlParseElementChildrenContentD:
4390 * @ctxt: an XML parser context
4391 *
4392 * VMS version of xmlParseElementChildrenContentDecl()
4393 *
4394 * Returns the tree of xmlElementContentPtr describing the element
4395 * hierarchy.
4396 */
4397/**
Owen Taylor3473f882001-02-23 17:55:21 +00004398 * xmlParseElementChildrenContentDecl:
4399 * @ctxt: an XML parser context
4400 *
4401 * parse the declaration for a Mixed Element content
4402 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4403 *
4404 *
4405 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4406 *
4407 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4408 *
4409 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4410 *
4411 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4412 *
4413 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4414 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004415 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004416 * opening or closing parentheses in a choice, seq, or Mixed
4417 * construct is contained in the replacement text for a parameter
4418 * entity, both must be contained in the same replacement text. For
4419 * interoperability, if a parameter-entity reference appears in a
4420 * choice, seq, or Mixed construct, its replacement text should not
4421 * be empty, and neither the first nor last non-blank character of
4422 * the replacement text should be a connector (| or ,).
4423 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004424 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004425 * hierarchy.
4426 */
4427xmlElementContentPtr
4428#ifdef VMS
4429xmlParseElementChildrenContentD
4430#else
4431xmlParseElementChildrenContentDecl
4432#endif
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004433(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004434 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4435 xmlChar *elem;
4436 xmlChar type = 0;
4437
4438 SKIP_BLANKS;
4439 GROW;
4440 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004441 xmlParserInputPtr input = ctxt->input;
4442
Owen Taylor3473f882001-02-23 17:55:21 +00004443 /* Recurse on first child */
4444 NEXT;
4445 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004446 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004447 SKIP_BLANKS;
4448 GROW;
4449 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004450 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004451 if (elem == NULL) {
4452 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4454 ctxt->sax->error(ctxt->userData,
4455 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4456 ctxt->wellFormed = 0;
4457 ctxt->disableSAX = 1;
4458 return(NULL);
4459 }
4460 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4461 GROW;
4462 if (RAW == '?') {
4463 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4464 NEXT;
4465 } else if (RAW == '*') {
4466 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4467 NEXT;
4468 } else if (RAW == '+') {
4469 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4470 NEXT;
4471 } else {
4472 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4473 }
4474 xmlFree(elem);
4475 GROW;
4476 }
4477 SKIP_BLANKS;
4478 SHRINK;
4479 while (RAW != ')') {
4480 /*
4481 * Each loop we parse one separator and one element.
4482 */
4483 if (RAW == ',') {
4484 if (type == 0) type = CUR;
4485
4486 /*
4487 * Detect "Name | Name , Name" error
4488 */
4489 else if (type != CUR) {
4490 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4492 ctxt->sax->error(ctxt->userData,
4493 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4494 type);
4495 ctxt->wellFormed = 0;
4496 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004497 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004498 xmlFreeElementContent(last);
4499 if (ret != NULL)
4500 xmlFreeElementContent(ret);
4501 return(NULL);
4502 }
4503 NEXT;
4504
4505 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4506 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004507 if ((last != NULL) && (last != ret))
4508 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004509 xmlFreeElementContent(ret);
4510 return(NULL);
4511 }
4512 if (last == NULL) {
4513 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004514 if (ret != NULL)
4515 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004516 ret = cur = op;
4517 } else {
4518 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004519 if (op != NULL)
4520 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004521 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004522 if (last != NULL)
4523 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004524 cur =op;
4525 last = NULL;
4526 }
4527 } else if (RAW == '|') {
4528 if (type == 0) type = CUR;
4529
4530 /*
4531 * Detect "Name , Name | Name" error
4532 */
4533 else if (type != CUR) {
4534 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4536 ctxt->sax->error(ctxt->userData,
4537 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4538 type);
4539 ctxt->wellFormed = 0;
4540 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004541 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004542 xmlFreeElementContent(last);
4543 if (ret != NULL)
4544 xmlFreeElementContent(ret);
4545 return(NULL);
4546 }
4547 NEXT;
4548
4549 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4550 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004551 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004552 xmlFreeElementContent(last);
4553 if (ret != NULL)
4554 xmlFreeElementContent(ret);
4555 return(NULL);
4556 }
4557 if (last == NULL) {
4558 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004559 if (ret != NULL)
4560 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004561 ret = cur = op;
4562 } else {
4563 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004564 if (op != NULL)
4565 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004566 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004567 if (last != NULL)
4568 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004569 cur =op;
4570 last = NULL;
4571 }
4572 } else {
4573 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4575 ctxt->sax->error(ctxt->userData,
4576 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4577 ctxt->wellFormed = 0;
4578 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004579 if (ret != NULL)
4580 xmlFreeElementContent(ret);
4581 return(NULL);
4582 }
4583 GROW;
4584 SKIP_BLANKS;
4585 GROW;
4586 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004587 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004588 /* Recurse on second child */
4589 NEXT;
4590 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004591 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004592 SKIP_BLANKS;
4593 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004594 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004595 if (elem == NULL) {
4596 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4598 ctxt->sax->error(ctxt->userData,
4599 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4600 ctxt->wellFormed = 0;
4601 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004602 if (ret != NULL)
4603 xmlFreeElementContent(ret);
4604 return(NULL);
4605 }
4606 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4607 xmlFree(elem);
4608 if (RAW == '?') {
4609 last->ocur = XML_ELEMENT_CONTENT_OPT;
4610 NEXT;
4611 } else if (RAW == '*') {
4612 last->ocur = XML_ELEMENT_CONTENT_MULT;
4613 NEXT;
4614 } else if (RAW == '+') {
4615 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4616 NEXT;
4617 } else {
4618 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4619 }
4620 }
4621 SKIP_BLANKS;
4622 GROW;
4623 }
4624 if ((cur != NULL) && (last != NULL)) {
4625 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004626 if (last != NULL)
4627 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004628 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004629 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4630 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4631 if (ctxt->vctxt.error != NULL)
4632 ctxt->vctxt.error(ctxt->vctxt.userData,
4633"Element content declaration doesn't start and stop in the same entity\n");
4634 ctxt->valid = 0;
4635 }
Owen Taylor3473f882001-02-23 17:55:21 +00004636 NEXT;
4637 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004638 if (ret != NULL)
4639 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004640 NEXT;
4641 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004642 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004643 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004644 cur = ret;
4645 /*
4646 * Some normalization:
4647 * (a | b* | c?)* == (a | b | c)*
4648 */
4649 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4650 if ((cur->c1 != NULL) &&
4651 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4652 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4653 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4654 if ((cur->c2 != NULL) &&
4655 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4656 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4657 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4658 cur = cur->c2;
4659 }
4660 }
Owen Taylor3473f882001-02-23 17:55:21 +00004661 NEXT;
4662 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004663 if (ret != NULL) {
4664 int found = 0;
4665
Daniel Veillarde470df72001-04-18 21:41:07 +00004666 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004667 /*
4668 * Some normalization:
4669 * (a | b*)+ == (a | b)*
4670 * (a | b?)+ == (a | b)*
4671 */
4672 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4673 if ((cur->c1 != NULL) &&
4674 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4675 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4676 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4677 found = 1;
4678 }
4679 if ((cur->c2 != NULL) &&
4680 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4681 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4682 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4683 found = 1;
4684 }
4685 cur = cur->c2;
4686 }
4687 if (found)
4688 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4689 }
Owen Taylor3473f882001-02-23 17:55:21 +00004690 NEXT;
4691 }
4692 return(ret);
4693}
4694
4695/**
4696 * xmlParseElementContentDecl:
4697 * @ctxt: an XML parser context
4698 * @name: the name of the element being defined.
4699 * @result: the Element Content pointer will be stored here if any
4700 *
4701 * parse the declaration for an Element content either Mixed or Children,
4702 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4703 *
4704 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4705 *
4706 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4707 */
4708
4709int
4710xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4711 xmlElementContentPtr *result) {
4712
4713 xmlElementContentPtr tree = NULL;
4714 xmlParserInputPtr input = ctxt->input;
4715 int res;
4716
4717 *result = NULL;
4718
4719 if (RAW != '(') {
4720 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4722 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004723 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004724 ctxt->wellFormed = 0;
4725 ctxt->disableSAX = 1;
4726 return(-1);
4727 }
4728 NEXT;
4729 GROW;
4730 SKIP_BLANKS;
4731 if ((RAW == '#') && (NXT(1) == 'P') &&
4732 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4733 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4734 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004735 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004736 res = XML_ELEMENT_TYPE_MIXED;
4737 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004738 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004739 res = XML_ELEMENT_TYPE_ELEMENT;
4740 }
Owen Taylor3473f882001-02-23 17:55:21 +00004741 SKIP_BLANKS;
4742 *result = tree;
4743 return(res);
4744}
4745
4746/**
4747 * xmlParseElementDecl:
4748 * @ctxt: an XML parser context
4749 *
4750 * parse an Element declaration.
4751 *
4752 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4753 *
4754 * [ VC: Unique Element Type Declaration ]
4755 * No element type may be declared more than once
4756 *
4757 * Returns the type of the element, or -1 in case of error
4758 */
4759int
4760xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4761 xmlChar *name;
4762 int ret = -1;
4763 xmlElementContentPtr content = NULL;
4764
4765 GROW;
4766 if ((RAW == '<') && (NXT(1) == '!') &&
4767 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4768 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4769 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4770 (NXT(8) == 'T')) {
4771 xmlParserInputPtr input = ctxt->input;
4772
4773 SKIP(9);
4774 if (!IS_BLANK(CUR)) {
4775 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4777 ctxt->sax->error(ctxt->userData,
4778 "Space required after 'ELEMENT'\n");
4779 ctxt->wellFormed = 0;
4780 ctxt->disableSAX = 1;
4781 }
4782 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004783 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004784 if (name == NULL) {
4785 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4787 ctxt->sax->error(ctxt->userData,
4788 "xmlParseElementDecl: no name for Element\n");
4789 ctxt->wellFormed = 0;
4790 ctxt->disableSAX = 1;
4791 return(-1);
4792 }
4793 while ((RAW == 0) && (ctxt->inputNr > 1))
4794 xmlPopInput(ctxt);
4795 if (!IS_BLANK(CUR)) {
4796 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4798 ctxt->sax->error(ctxt->userData,
4799 "Space required after the element name\n");
4800 ctxt->wellFormed = 0;
4801 ctxt->disableSAX = 1;
4802 }
4803 SKIP_BLANKS;
4804 if ((RAW == 'E') && (NXT(1) == 'M') &&
4805 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4806 (NXT(4) == 'Y')) {
4807 SKIP(5);
4808 /*
4809 * Element must always be empty.
4810 */
4811 ret = XML_ELEMENT_TYPE_EMPTY;
4812 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4813 (NXT(2) == 'Y')) {
4814 SKIP(3);
4815 /*
4816 * Element is a generic container.
4817 */
4818 ret = XML_ELEMENT_TYPE_ANY;
4819 } else if (RAW == '(') {
4820 ret = xmlParseElementContentDecl(ctxt, name, &content);
4821 } else {
4822 /*
4823 * [ WFC: PEs in Internal Subset ] error handling.
4824 */
4825 if ((RAW == '%') && (ctxt->external == 0) &&
4826 (ctxt->inputNr == 1)) {
4827 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4829 ctxt->sax->error(ctxt->userData,
4830 "PEReference: forbidden within markup decl in internal subset\n");
4831 } else {
4832 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4834 ctxt->sax->error(ctxt->userData,
4835 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4836 }
4837 ctxt->wellFormed = 0;
4838 ctxt->disableSAX = 1;
4839 if (name != NULL) xmlFree(name);
4840 return(-1);
4841 }
4842
4843 SKIP_BLANKS;
4844 /*
4845 * Pop-up of finished entities.
4846 */
4847 while ((RAW == 0) && (ctxt->inputNr > 1))
4848 xmlPopInput(ctxt);
4849 SKIP_BLANKS;
4850
4851 if (RAW != '>') {
4852 ctxt->errNo = XML_ERR_GT_REQUIRED;
4853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4854 ctxt->sax->error(ctxt->userData,
4855 "xmlParseElementDecl: expected '>' at the end\n");
4856 ctxt->wellFormed = 0;
4857 ctxt->disableSAX = 1;
4858 } else {
4859 if (input != ctxt->input) {
4860 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4862 ctxt->sax->error(ctxt->userData,
4863"Element declaration doesn't start and stop in the same entity\n");
4864 ctxt->wellFormed = 0;
4865 ctxt->disableSAX = 1;
4866 }
4867
4868 NEXT;
4869 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4870 (ctxt->sax->elementDecl != NULL))
4871 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4872 content);
4873 }
4874 if (content != NULL) {
4875 xmlFreeElementContent(content);
4876 }
4877 if (name != NULL) {
4878 xmlFree(name);
4879 }
4880 }
4881 return(ret);
4882}
4883
4884/**
Owen Taylor3473f882001-02-23 17:55:21 +00004885 * xmlParseConditionalSections
4886 * @ctxt: an XML parser context
4887 *
4888 * [61] conditionalSect ::= includeSect | ignoreSect
4889 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4890 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4891 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4892 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4893 */
4894
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004895static void
Owen Taylor3473f882001-02-23 17:55:21 +00004896xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4897 SKIP(3);
4898 SKIP_BLANKS;
4899 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4900 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4901 (NXT(6) == 'E')) {
4902 SKIP(7);
4903 SKIP_BLANKS;
4904 if (RAW != '[') {
4905 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4907 ctxt->sax->error(ctxt->userData,
4908 "XML conditional section '[' expected\n");
4909 ctxt->wellFormed = 0;
4910 ctxt->disableSAX = 1;
4911 } else {
4912 NEXT;
4913 }
4914 if (xmlParserDebugEntities) {
4915 if ((ctxt->input != NULL) && (ctxt->input->filename))
4916 xmlGenericError(xmlGenericErrorContext,
4917 "%s(%d): ", ctxt->input->filename,
4918 ctxt->input->line);
4919 xmlGenericError(xmlGenericErrorContext,
4920 "Entering INCLUDE Conditional Section\n");
4921 }
4922
4923 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4924 (NXT(2) != '>'))) {
4925 const xmlChar *check = CUR_PTR;
4926 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00004927
4928 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4929 xmlParseConditionalSections(ctxt);
4930 } else if (IS_BLANK(CUR)) {
4931 NEXT;
4932 } else if (RAW == '%') {
4933 xmlParsePEReference(ctxt);
4934 } else
4935 xmlParseMarkupDecl(ctxt);
4936
4937 /*
4938 * Pop-up of finished entities.
4939 */
4940 while ((RAW == 0) && (ctxt->inputNr > 1))
4941 xmlPopInput(ctxt);
4942
Daniel Veillardfdc91562002-07-01 21:52:03 +00004943 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004944 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4946 ctxt->sax->error(ctxt->userData,
4947 "Content error in the external subset\n");
4948 ctxt->wellFormed = 0;
4949 ctxt->disableSAX = 1;
4950 break;
4951 }
4952 }
4953 if (xmlParserDebugEntities) {
4954 if ((ctxt->input != NULL) && (ctxt->input->filename))
4955 xmlGenericError(xmlGenericErrorContext,
4956 "%s(%d): ", ctxt->input->filename,
4957 ctxt->input->line);
4958 xmlGenericError(xmlGenericErrorContext,
4959 "Leaving INCLUDE Conditional Section\n");
4960 }
4961
4962 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4963 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4964 int state;
4965 int instate;
4966 int depth = 0;
4967
4968 SKIP(6);
4969 SKIP_BLANKS;
4970 if (RAW != '[') {
4971 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4973 ctxt->sax->error(ctxt->userData,
4974 "XML conditional section '[' expected\n");
4975 ctxt->wellFormed = 0;
4976 ctxt->disableSAX = 1;
4977 } else {
4978 NEXT;
4979 }
4980 if (xmlParserDebugEntities) {
4981 if ((ctxt->input != NULL) && (ctxt->input->filename))
4982 xmlGenericError(xmlGenericErrorContext,
4983 "%s(%d): ", ctxt->input->filename,
4984 ctxt->input->line);
4985 xmlGenericError(xmlGenericErrorContext,
4986 "Entering IGNORE Conditional Section\n");
4987 }
4988
4989 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004990 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004991 * But disable SAX event generating DTD building in the meantime
4992 */
4993 state = ctxt->disableSAX;
4994 instate = ctxt->instate;
4995 ctxt->disableSAX = 1;
4996 ctxt->instate = XML_PARSER_IGNORE;
4997
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004998 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004999 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5000 depth++;
5001 SKIP(3);
5002 continue;
5003 }
5004 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5005 if (--depth >= 0) SKIP(3);
5006 continue;
5007 }
5008 NEXT;
5009 continue;
5010 }
5011
5012 ctxt->disableSAX = state;
5013 ctxt->instate = instate;
5014
5015 if (xmlParserDebugEntities) {
5016 if ((ctxt->input != NULL) && (ctxt->input->filename))
5017 xmlGenericError(xmlGenericErrorContext,
5018 "%s(%d): ", ctxt->input->filename,
5019 ctxt->input->line);
5020 xmlGenericError(xmlGenericErrorContext,
5021 "Leaving IGNORE Conditional Section\n");
5022 }
5023
5024 } else {
5025 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5027 ctxt->sax->error(ctxt->userData,
5028 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5029 ctxt->wellFormed = 0;
5030 ctxt->disableSAX = 1;
5031 }
5032
5033 if (RAW == 0)
5034 SHRINK;
5035
5036 if (RAW == 0) {
5037 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5039 ctxt->sax->error(ctxt->userData,
5040 "XML conditional section not closed\n");
5041 ctxt->wellFormed = 0;
5042 ctxt->disableSAX = 1;
5043 } else {
5044 SKIP(3);
5045 }
5046}
5047
5048/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005049 * xmlParseMarkupDecl:
5050 * @ctxt: an XML parser context
5051 *
5052 * parse Markup declarations
5053 *
5054 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5055 * NotationDecl | PI | Comment
5056 *
5057 * [ VC: Proper Declaration/PE Nesting ]
5058 * Parameter-entity replacement text must be properly nested with
5059 * markup declarations. That is to say, if either the first character
5060 * or the last character of a markup declaration (markupdecl above) is
5061 * contained in the replacement text for a parameter-entity reference,
5062 * both must be contained in the same replacement text.
5063 *
5064 * [ WFC: PEs in Internal Subset ]
5065 * In the internal DTD subset, parameter-entity references can occur
5066 * only where markup declarations can occur, not within markup declarations.
5067 * (This does not apply to references that occur in external parameter
5068 * entities or to the external subset.)
5069 */
5070void
5071xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5072 GROW;
5073 xmlParseElementDecl(ctxt);
5074 xmlParseAttributeListDecl(ctxt);
5075 xmlParseEntityDecl(ctxt);
5076 xmlParseNotationDecl(ctxt);
5077 xmlParsePI(ctxt);
5078 xmlParseComment(ctxt);
5079 /*
5080 * This is only for internal subset. On external entities,
5081 * the replacement is done before parsing stage
5082 */
5083 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5084 xmlParsePEReference(ctxt);
5085
5086 /*
5087 * Conditional sections are allowed from entities included
5088 * by PE References in the internal subset.
5089 */
5090 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5091 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5092 xmlParseConditionalSections(ctxt);
5093 }
5094 }
5095
5096 ctxt->instate = XML_PARSER_DTD;
5097}
5098
5099/**
5100 * xmlParseTextDecl:
5101 * @ctxt: an XML parser context
5102 *
5103 * parse an XML declaration header for external entities
5104 *
5105 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5106 *
5107 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5108 */
5109
5110void
5111xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5112 xmlChar *version;
5113
5114 /*
5115 * We know that '<?xml' is here.
5116 */
5117 if ((RAW == '<') && (NXT(1) == '?') &&
5118 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5119 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5120 SKIP(5);
5121 } else {
5122 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5123 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5124 ctxt->sax->error(ctxt->userData,
5125 "Text declaration '<?xml' required\n");
5126 ctxt->wellFormed = 0;
5127 ctxt->disableSAX = 1;
5128
5129 return;
5130 }
5131
5132 if (!IS_BLANK(CUR)) {
5133 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5135 ctxt->sax->error(ctxt->userData,
5136 "Space needed after '<?xml'\n");
5137 ctxt->wellFormed = 0;
5138 ctxt->disableSAX = 1;
5139 }
5140 SKIP_BLANKS;
5141
5142 /*
5143 * We may have the VersionInfo here.
5144 */
5145 version = xmlParseVersionInfo(ctxt);
5146 if (version == NULL)
5147 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005148 else {
5149 if (!IS_BLANK(CUR)) {
5150 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5152 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5153 ctxt->wellFormed = 0;
5154 ctxt->disableSAX = 1;
5155 }
5156 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005157 ctxt->input->version = version;
5158
5159 /*
5160 * We must have the encoding declaration
5161 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005162 xmlParseEncodingDecl(ctxt);
5163 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5164 /*
5165 * The XML REC instructs us to stop parsing right here
5166 */
5167 return;
5168 }
5169
5170 SKIP_BLANKS;
5171 if ((RAW == '?') && (NXT(1) == '>')) {
5172 SKIP(2);
5173 } else if (RAW == '>') {
5174 /* Deprecated old WD ... */
5175 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5177 ctxt->sax->error(ctxt->userData,
5178 "XML declaration must end-up with '?>'\n");
5179 ctxt->wellFormed = 0;
5180 ctxt->disableSAX = 1;
5181 NEXT;
5182 } else {
5183 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5185 ctxt->sax->error(ctxt->userData,
5186 "parsing XML declaration: '?>' expected\n");
5187 ctxt->wellFormed = 0;
5188 ctxt->disableSAX = 1;
5189 MOVETO_ENDTAG(CUR_PTR);
5190 NEXT;
5191 }
5192}
5193
5194/**
Owen Taylor3473f882001-02-23 17:55:21 +00005195 * xmlParseExternalSubset:
5196 * @ctxt: an XML parser context
5197 * @ExternalID: the external identifier
5198 * @SystemID: the system identifier (or URL)
5199 *
5200 * parse Markup declarations from an external subset
5201 *
5202 * [30] extSubset ::= textDecl? extSubsetDecl
5203 *
5204 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5205 */
5206void
5207xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5208 const xmlChar *SystemID) {
5209 GROW;
5210 if ((RAW == '<') && (NXT(1) == '?') &&
5211 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5212 (NXT(4) == 'l')) {
5213 xmlParseTextDecl(ctxt);
5214 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5215 /*
5216 * The XML REC instructs us to stop parsing right here
5217 */
5218 ctxt->instate = XML_PARSER_EOF;
5219 return;
5220 }
5221 }
5222 if (ctxt->myDoc == NULL) {
5223 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5224 }
5225 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5226 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5227
5228 ctxt->instate = XML_PARSER_DTD;
5229 ctxt->external = 1;
5230 while (((RAW == '<') && (NXT(1) == '?')) ||
5231 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005232 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005233 const xmlChar *check = CUR_PTR;
5234 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005235
5236 GROW;
5237 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5238 xmlParseConditionalSections(ctxt);
5239 } else if (IS_BLANK(CUR)) {
5240 NEXT;
5241 } else if (RAW == '%') {
5242 xmlParsePEReference(ctxt);
5243 } else
5244 xmlParseMarkupDecl(ctxt);
5245
5246 /*
5247 * Pop-up of finished entities.
5248 */
5249 while ((RAW == 0) && (ctxt->inputNr > 1))
5250 xmlPopInput(ctxt);
5251
Daniel Veillardfdc91562002-07-01 21:52:03 +00005252 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005253 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5254 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5255 ctxt->sax->error(ctxt->userData,
5256 "Content error in the external subset\n");
5257 ctxt->wellFormed = 0;
5258 ctxt->disableSAX = 1;
5259 break;
5260 }
5261 }
5262
5263 if (RAW != 0) {
5264 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5266 ctxt->sax->error(ctxt->userData,
5267 "Extra content at the end of the document\n");
5268 ctxt->wellFormed = 0;
5269 ctxt->disableSAX = 1;
5270 }
5271
5272}
5273
5274/**
5275 * xmlParseReference:
5276 * @ctxt: an XML parser context
5277 *
5278 * parse and handle entity references in content, depending on the SAX
5279 * interface, this may end-up in a call to character() if this is a
5280 * CharRef, a predefined entity, if there is no reference() callback.
5281 * or if the parser was asked to switch to that mode.
5282 *
5283 * [67] Reference ::= EntityRef | CharRef
5284 */
5285void
5286xmlParseReference(xmlParserCtxtPtr ctxt) {
5287 xmlEntityPtr ent;
5288 xmlChar *val;
5289 if (RAW != '&') return;
5290
5291 if (NXT(1) == '#') {
5292 int i = 0;
5293 xmlChar out[10];
5294 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005295 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005296
5297 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5298 /*
5299 * So we are using non-UTF-8 buffers
5300 * Check that the char fit on 8bits, if not
5301 * generate a CharRef.
5302 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005303 if (value <= 0xFF) {
5304 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005305 out[1] = 0;
5306 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5307 (!ctxt->disableSAX))
5308 ctxt->sax->characters(ctxt->userData, out, 1);
5309 } else {
5310 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005311 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005312 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005313 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005314 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5315 (!ctxt->disableSAX))
5316 ctxt->sax->reference(ctxt->userData, out);
5317 }
5318 } else {
5319 /*
5320 * Just encode the value in UTF-8
5321 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005322 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005323 out[i] = 0;
5324 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5325 (!ctxt->disableSAX))
5326 ctxt->sax->characters(ctxt->userData, out, i);
5327 }
5328 } else {
5329 ent = xmlParseEntityRef(ctxt);
5330 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005331 if (!ctxt->wellFormed)
5332 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005333 if ((ent->name != NULL) &&
5334 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5335 xmlNodePtr list = NULL;
5336 int ret;
5337
5338
5339 /*
5340 * The first reference to the entity trigger a parsing phase
5341 * where the ent->children is filled with the result from
5342 * the parsing.
5343 */
5344 if (ent->children == NULL) {
5345 xmlChar *value;
5346 value = ent->content;
5347
5348 /*
5349 * Check that this entity is well formed
5350 */
5351 if ((value != NULL) &&
5352 (value[1] == 0) && (value[0] == '<') &&
5353 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5354 /*
5355 * DONE: get definite answer on this !!!
5356 * Lots of entity decls are used to declare a single
5357 * char
5358 * <!ENTITY lt "<">
5359 * Which seems to be valid since
5360 * 2.4: The ampersand character (&) and the left angle
5361 * bracket (<) may appear in their literal form only
5362 * when used ... They are also legal within the literal
5363 * entity value of an internal entity declaration;i
5364 * see "4.3.2 Well-Formed Parsed Entities".
5365 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5366 * Looking at the OASIS test suite and James Clark
5367 * tests, this is broken. However the XML REC uses
5368 * it. Is the XML REC not well-formed ????
5369 * This is a hack to avoid this problem
5370 *
5371 * ANSWER: since lt gt amp .. are already defined,
5372 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005373 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005374 * is lousy but acceptable.
5375 */
5376 list = xmlNewDocText(ctxt->myDoc, value);
5377 if (list != NULL) {
5378 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5379 (ent->children == NULL)) {
5380 ent->children = list;
5381 ent->last = list;
5382 list->parent = (xmlNodePtr) ent;
5383 } else {
5384 xmlFreeNodeList(list);
5385 }
5386 } else if (list != NULL) {
5387 xmlFreeNodeList(list);
5388 }
5389 } else {
5390 /*
5391 * 4.3.2: An internal general parsed entity is well-formed
5392 * if its replacement text matches the production labeled
5393 * content.
5394 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005395
5396 void *user_data;
5397 /*
5398 * This is a bit hackish but this seems the best
5399 * way to make sure both SAX and DOM entity support
5400 * behaves okay.
5401 */
5402 if (ctxt->userData == ctxt)
5403 user_data = NULL;
5404 else
5405 user_data = ctxt->userData;
5406
Owen Taylor3473f882001-02-23 17:55:21 +00005407 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5408 ctxt->depth++;
5409 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005410 ctxt->sax, user_data, ctxt->depth,
Owen Taylor3473f882001-02-23 17:55:21 +00005411 value, &list);
5412 ctxt->depth--;
5413 } else if (ent->etype ==
5414 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5415 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005416 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005417 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005418 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005419 ctxt->depth--;
5420 } else {
5421 ret = -1;
5422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5423 ctxt->sax->error(ctxt->userData,
5424 "Internal: invalid entity type\n");
5425 }
5426 if (ret == XML_ERR_ENTITY_LOOP) {
5427 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5429 ctxt->sax->error(ctxt->userData,
5430 "Detected entity reference loop\n");
5431 ctxt->wellFormed = 0;
5432 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005433 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005434 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005435 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5436 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005437 (ent->children == NULL)) {
5438 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005439 if (ctxt->replaceEntities) {
5440 /*
5441 * Prune it directly in the generated document
5442 * except for single text nodes.
5443 */
5444 if ((list->type == XML_TEXT_NODE) &&
5445 (list->next == NULL)) {
5446 list->parent = (xmlNodePtr) ent;
5447 list = NULL;
5448 } else {
5449 while (list != NULL) {
5450 list->parent = (xmlNodePtr) ctxt->node;
5451 if (list->next == NULL)
5452 ent->last = list;
5453 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005454 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005455 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005456 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5457 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005458 }
5459 } else {
5460 while (list != NULL) {
5461 list->parent = (xmlNodePtr) ent;
5462 if (list->next == NULL)
5463 ent->last = list;
5464 list = list->next;
5465 }
Owen Taylor3473f882001-02-23 17:55:21 +00005466 }
5467 } else {
5468 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005469 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005470 }
5471 } else if (ret > 0) {
5472 ctxt->errNo = ret;
5473 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5474 ctxt->sax->error(ctxt->userData,
5475 "Entity value required\n");
5476 ctxt->wellFormed = 0;
5477 ctxt->disableSAX = 1;
5478 } else if (list != NULL) {
5479 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005480 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005481 }
5482 }
5483 }
5484 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5485 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5486 /*
5487 * Create a node.
5488 */
5489 ctxt->sax->reference(ctxt->userData, ent->name);
5490 return;
5491 } else if (ctxt->replaceEntities) {
5492 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5493 /*
5494 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005495 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005496 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005497 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005498 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005499 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005500 cur = ent->children;
5501 while (cur != NULL) {
5502 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005503 if (firstChild == NULL){
5504 firstChild = new;
5505 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005506 xmlAddChild(ctxt->node, new);
5507 if (cur == ent->last)
5508 break;
5509 cur = cur->next;
5510 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005511 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5512 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005513 } else {
5514 /*
5515 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005516 * node with a possible previous text one which
5517 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005518 */
5519 if (ent->children->type == XML_TEXT_NODE)
5520 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5521 if ((ent->last != ent->children) &&
5522 (ent->last->type == XML_TEXT_NODE))
5523 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5524 xmlAddChildList(ctxt->node, ent->children);
5525 }
5526
Owen Taylor3473f882001-02-23 17:55:21 +00005527 /*
5528 * This is to avoid a nasty side effect, see
5529 * characters() in SAX.c
5530 */
5531 ctxt->nodemem = 0;
5532 ctxt->nodelen = 0;
5533 return;
5534 } else {
5535 /*
5536 * Probably running in SAX mode
5537 */
5538 xmlParserInputPtr input;
5539
5540 input = xmlNewEntityInputStream(ctxt, ent);
5541 xmlPushInput(ctxt, input);
5542 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5543 (RAW == '<') && (NXT(1) == '?') &&
5544 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5545 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5546 xmlParseTextDecl(ctxt);
5547 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5548 /*
5549 * The XML REC instructs us to stop parsing right here
5550 */
5551 ctxt->instate = XML_PARSER_EOF;
5552 return;
5553 }
5554 if (input->standalone == 1) {
5555 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5557 ctxt->sax->error(ctxt->userData,
5558 "external parsed entities cannot be standalone\n");
5559 ctxt->wellFormed = 0;
5560 ctxt->disableSAX = 1;
5561 }
5562 }
5563 return;
5564 }
5565 }
5566 } else {
5567 val = ent->content;
5568 if (val == NULL) return;
5569 /*
5570 * inline the entity.
5571 */
5572 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5573 (!ctxt->disableSAX))
5574 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5575 }
5576 }
5577}
5578
5579/**
5580 * xmlParseEntityRef:
5581 * @ctxt: an XML parser context
5582 *
5583 * parse ENTITY references declarations
5584 *
5585 * [68] EntityRef ::= '&' Name ';'
5586 *
5587 * [ WFC: Entity Declared ]
5588 * In a document without any DTD, a document with only an internal DTD
5589 * subset which contains no parameter entity references, or a document
5590 * with "standalone='yes'", the Name given in the entity reference
5591 * must match that in an entity declaration, except that well-formed
5592 * documents need not declare any of the following entities: amp, lt,
5593 * gt, apos, quot. The declaration of a parameter entity must precede
5594 * any reference to it. Similarly, the declaration of a general entity
5595 * must precede any reference to it which appears in a default value in an
5596 * attribute-list declaration. Note that if entities are declared in the
5597 * external subset or in external parameter entities, a non-validating
5598 * processor is not obligated to read and process their declarations;
5599 * for such documents, the rule that an entity must be declared is a
5600 * well-formedness constraint only if standalone='yes'.
5601 *
5602 * [ WFC: Parsed Entity ]
5603 * An entity reference must not contain the name of an unparsed entity
5604 *
5605 * Returns the xmlEntityPtr if found, or NULL otherwise.
5606 */
5607xmlEntityPtr
5608xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5609 xmlChar *name;
5610 xmlEntityPtr ent = NULL;
5611
5612 GROW;
5613
5614 if (RAW == '&') {
5615 NEXT;
5616 name = xmlParseName(ctxt);
5617 if (name == NULL) {
5618 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5620 ctxt->sax->error(ctxt->userData,
5621 "xmlParseEntityRef: no name\n");
5622 ctxt->wellFormed = 0;
5623 ctxt->disableSAX = 1;
5624 } else {
5625 if (RAW == ';') {
5626 NEXT;
5627 /*
5628 * Ask first SAX for entity resolution, otherwise try the
5629 * predefined set.
5630 */
5631 if (ctxt->sax != NULL) {
5632 if (ctxt->sax->getEntity != NULL)
5633 ent = ctxt->sax->getEntity(ctxt->userData, name);
5634 if (ent == NULL)
5635 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005636 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5637 ent = getEntity(ctxt, name);
5638 }
Owen Taylor3473f882001-02-23 17:55:21 +00005639 }
5640 /*
5641 * [ WFC: Entity Declared ]
5642 * In a document without any DTD, a document with only an
5643 * internal DTD subset which contains no parameter entity
5644 * references, or a document with "standalone='yes'", the
5645 * Name given in the entity reference must match that in an
5646 * entity declaration, except that well-formed documents
5647 * need not declare any of the following entities: amp, lt,
5648 * gt, apos, quot.
5649 * The declaration of a parameter entity must precede any
5650 * reference to it.
5651 * Similarly, the declaration of a general entity must
5652 * precede any reference to it which appears in a default
5653 * value in an attribute-list declaration. Note that if
5654 * entities are declared in the external subset or in
5655 * external parameter entities, a non-validating processor
5656 * is not obligated to read and process their declarations;
5657 * for such documents, the rule that an entity must be
5658 * declared is a well-formedness constraint only if
5659 * standalone='yes'.
5660 */
5661 if (ent == NULL) {
5662 if ((ctxt->standalone == 1) ||
5663 ((ctxt->hasExternalSubset == 0) &&
5664 (ctxt->hasPErefs == 0))) {
5665 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5667 ctxt->sax->error(ctxt->userData,
5668 "Entity '%s' not defined\n", name);
5669 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005670 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005671 ctxt->disableSAX = 1;
5672 } else {
5673 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005675 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005676 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005677 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005678 }
5679 }
5680
5681 /*
5682 * [ WFC: Parsed Entity ]
5683 * An entity reference must not contain the name of an
5684 * unparsed entity
5685 */
5686 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5687 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5689 ctxt->sax->error(ctxt->userData,
5690 "Entity reference to unparsed entity %s\n", name);
5691 ctxt->wellFormed = 0;
5692 ctxt->disableSAX = 1;
5693 }
5694
5695 /*
5696 * [ WFC: No External Entity References ]
5697 * Attribute values cannot contain direct or indirect
5698 * entity references to external entities.
5699 */
5700 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5701 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5702 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5704 ctxt->sax->error(ctxt->userData,
5705 "Attribute references external entity '%s'\n", name);
5706 ctxt->wellFormed = 0;
5707 ctxt->disableSAX = 1;
5708 }
5709 /*
5710 * [ WFC: No < in Attribute Values ]
5711 * The replacement text of any entity referred to directly or
5712 * indirectly in an attribute value (other than "&lt;") must
5713 * not contain a <.
5714 */
5715 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5716 (ent != NULL) &&
5717 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5718 (ent->content != NULL) &&
5719 (xmlStrchr(ent->content, '<'))) {
5720 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5722 ctxt->sax->error(ctxt->userData,
5723 "'<' in entity '%s' is not allowed in attributes values\n", name);
5724 ctxt->wellFormed = 0;
5725 ctxt->disableSAX = 1;
5726 }
5727
5728 /*
5729 * Internal check, no parameter entities here ...
5730 */
5731 else {
5732 switch (ent->etype) {
5733 case XML_INTERNAL_PARAMETER_ENTITY:
5734 case XML_EXTERNAL_PARAMETER_ENTITY:
5735 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5737 ctxt->sax->error(ctxt->userData,
5738 "Attempt to reference the parameter entity '%s'\n", name);
5739 ctxt->wellFormed = 0;
5740 ctxt->disableSAX = 1;
5741 break;
5742 default:
5743 break;
5744 }
5745 }
5746
5747 /*
5748 * [ WFC: No Recursion ]
5749 * A parsed entity must not contain a recursive reference
5750 * to itself, either directly or indirectly.
5751 * Done somewhere else
5752 */
5753
5754 } else {
5755 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5757 ctxt->sax->error(ctxt->userData,
5758 "xmlParseEntityRef: expecting ';'\n");
5759 ctxt->wellFormed = 0;
5760 ctxt->disableSAX = 1;
5761 }
5762 xmlFree(name);
5763 }
5764 }
5765 return(ent);
5766}
5767
5768/**
5769 * xmlParseStringEntityRef:
5770 * @ctxt: an XML parser context
5771 * @str: a pointer to an index in the string
5772 *
5773 * parse ENTITY references declarations, but this version parses it from
5774 * a string value.
5775 *
5776 * [68] EntityRef ::= '&' Name ';'
5777 *
5778 * [ WFC: Entity Declared ]
5779 * In a document without any DTD, a document with only an internal DTD
5780 * subset which contains no parameter entity references, or a document
5781 * with "standalone='yes'", the Name given in the entity reference
5782 * must match that in an entity declaration, except that well-formed
5783 * documents need not declare any of the following entities: amp, lt,
5784 * gt, apos, quot. The declaration of a parameter entity must precede
5785 * any reference to it. Similarly, the declaration of a general entity
5786 * must precede any reference to it which appears in a default value in an
5787 * attribute-list declaration. Note that if entities are declared in the
5788 * external subset or in external parameter entities, a non-validating
5789 * processor is not obligated to read and process their declarations;
5790 * for such documents, the rule that an entity must be declared is a
5791 * well-formedness constraint only if standalone='yes'.
5792 *
5793 * [ WFC: Parsed Entity ]
5794 * An entity reference must not contain the name of an unparsed entity
5795 *
5796 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5797 * is updated to the current location in the string.
5798 */
5799xmlEntityPtr
5800xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5801 xmlChar *name;
5802 const xmlChar *ptr;
5803 xmlChar cur;
5804 xmlEntityPtr ent = NULL;
5805
5806 if ((str == NULL) || (*str == NULL))
5807 return(NULL);
5808 ptr = *str;
5809 cur = *ptr;
5810 if (cur == '&') {
5811 ptr++;
5812 cur = *ptr;
5813 name = xmlParseStringName(ctxt, &ptr);
5814 if (name == NULL) {
5815 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5817 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005818 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005819 ctxt->wellFormed = 0;
5820 ctxt->disableSAX = 1;
5821 } else {
5822 if (*ptr == ';') {
5823 ptr++;
5824 /*
5825 * Ask first SAX for entity resolution, otherwise try the
5826 * predefined set.
5827 */
5828 if (ctxt->sax != NULL) {
5829 if (ctxt->sax->getEntity != NULL)
5830 ent = ctxt->sax->getEntity(ctxt->userData, name);
5831 if (ent == NULL)
5832 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005833 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5834 ent = getEntity(ctxt, name);
5835 }
Owen Taylor3473f882001-02-23 17:55:21 +00005836 }
5837 /*
5838 * [ WFC: Entity Declared ]
5839 * In a document without any DTD, a document with only an
5840 * internal DTD subset which contains no parameter entity
5841 * references, or a document with "standalone='yes'", the
5842 * Name given in the entity reference must match that in an
5843 * entity declaration, except that well-formed documents
5844 * need not declare any of the following entities: amp, lt,
5845 * gt, apos, quot.
5846 * The declaration of a parameter entity must precede any
5847 * reference to it.
5848 * Similarly, the declaration of a general entity must
5849 * precede any reference to it which appears in a default
5850 * value in an attribute-list declaration. Note that if
5851 * entities are declared in the external subset or in
5852 * external parameter entities, a non-validating processor
5853 * is not obligated to read and process their declarations;
5854 * for such documents, the rule that an entity must be
5855 * declared is a well-formedness constraint only if
5856 * standalone='yes'.
5857 */
5858 if (ent == NULL) {
5859 if ((ctxt->standalone == 1) ||
5860 ((ctxt->hasExternalSubset == 0) &&
5861 (ctxt->hasPErefs == 0))) {
5862 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5864 ctxt->sax->error(ctxt->userData,
5865 "Entity '%s' not defined\n", name);
5866 ctxt->wellFormed = 0;
5867 ctxt->disableSAX = 1;
5868 } else {
5869 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5870 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5871 ctxt->sax->warning(ctxt->userData,
5872 "Entity '%s' not defined\n", name);
5873 }
5874 }
5875
5876 /*
5877 * [ WFC: Parsed Entity ]
5878 * An entity reference must not contain the name of an
5879 * unparsed entity
5880 */
5881 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5882 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5884 ctxt->sax->error(ctxt->userData,
5885 "Entity reference to unparsed entity %s\n", name);
5886 ctxt->wellFormed = 0;
5887 ctxt->disableSAX = 1;
5888 }
5889
5890 /*
5891 * [ WFC: No External Entity References ]
5892 * Attribute values cannot contain direct or indirect
5893 * entity references to external entities.
5894 */
5895 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5896 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5897 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5899 ctxt->sax->error(ctxt->userData,
5900 "Attribute references external entity '%s'\n", name);
5901 ctxt->wellFormed = 0;
5902 ctxt->disableSAX = 1;
5903 }
5904 /*
5905 * [ WFC: No < in Attribute Values ]
5906 * The replacement text of any entity referred to directly or
5907 * indirectly in an attribute value (other than "&lt;") must
5908 * not contain a <.
5909 */
5910 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5911 (ent != NULL) &&
5912 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5913 (ent->content != NULL) &&
5914 (xmlStrchr(ent->content, '<'))) {
5915 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5917 ctxt->sax->error(ctxt->userData,
5918 "'<' in entity '%s' is not allowed in attributes values\n", name);
5919 ctxt->wellFormed = 0;
5920 ctxt->disableSAX = 1;
5921 }
5922
5923 /*
5924 * Internal check, no parameter entities here ...
5925 */
5926 else {
5927 switch (ent->etype) {
5928 case XML_INTERNAL_PARAMETER_ENTITY:
5929 case XML_EXTERNAL_PARAMETER_ENTITY:
5930 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5932 ctxt->sax->error(ctxt->userData,
5933 "Attempt to reference the parameter entity '%s'\n", name);
5934 ctxt->wellFormed = 0;
5935 ctxt->disableSAX = 1;
5936 break;
5937 default:
5938 break;
5939 }
5940 }
5941
5942 /*
5943 * [ WFC: No Recursion ]
5944 * A parsed entity must not contain a recursive reference
5945 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005946 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005947 */
5948
5949 } else {
5950 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5952 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005953 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005954 ctxt->wellFormed = 0;
5955 ctxt->disableSAX = 1;
5956 }
5957 xmlFree(name);
5958 }
5959 }
5960 *str = ptr;
5961 return(ent);
5962}
5963
5964/**
5965 * xmlParsePEReference:
5966 * @ctxt: an XML parser context
5967 *
5968 * parse PEReference declarations
5969 * The entity content is handled directly by pushing it's content as
5970 * a new input stream.
5971 *
5972 * [69] PEReference ::= '%' Name ';'
5973 *
5974 * [ WFC: No Recursion ]
5975 * A parsed entity must not contain a recursive
5976 * reference to itself, either directly or indirectly.
5977 *
5978 * [ WFC: Entity Declared ]
5979 * In a document without any DTD, a document with only an internal DTD
5980 * subset which contains no parameter entity references, or a document
5981 * with "standalone='yes'", ... ... The declaration of a parameter
5982 * entity must precede any reference to it...
5983 *
5984 * [ VC: Entity Declared ]
5985 * In a document with an external subset or external parameter entities
5986 * with "standalone='no'", ... ... The declaration of a parameter entity
5987 * must precede any reference to it...
5988 *
5989 * [ WFC: In DTD ]
5990 * Parameter-entity references may only appear in the DTD.
5991 * NOTE: misleading but this is handled.
5992 */
5993void
5994xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5995 xmlChar *name;
5996 xmlEntityPtr entity = NULL;
5997 xmlParserInputPtr input;
5998
5999 if (RAW == '%') {
6000 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006001 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006002 if (name == NULL) {
6003 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6005 ctxt->sax->error(ctxt->userData,
6006 "xmlParsePEReference: no name\n");
6007 ctxt->wellFormed = 0;
6008 ctxt->disableSAX = 1;
6009 } else {
6010 if (RAW == ';') {
6011 NEXT;
6012 if ((ctxt->sax != NULL) &&
6013 (ctxt->sax->getParameterEntity != NULL))
6014 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6015 name);
6016 if (entity == NULL) {
6017 /*
6018 * [ WFC: Entity Declared ]
6019 * In a document without any DTD, a document with only an
6020 * internal DTD subset which contains no parameter entity
6021 * references, or a document with "standalone='yes'", ...
6022 * ... The declaration of a parameter entity must precede
6023 * any reference to it...
6024 */
6025 if ((ctxt->standalone == 1) ||
6026 ((ctxt->hasExternalSubset == 0) &&
6027 (ctxt->hasPErefs == 0))) {
6028 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6029 if ((!ctxt->disableSAX) &&
6030 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6031 ctxt->sax->error(ctxt->userData,
6032 "PEReference: %%%s; not found\n", name);
6033 ctxt->wellFormed = 0;
6034 ctxt->disableSAX = 1;
6035 } else {
6036 /*
6037 * [ VC: Entity Declared ]
6038 * In a document with an external subset or external
6039 * parameter entities with "standalone='no'", ...
6040 * ... The declaration of a parameter entity must precede
6041 * any reference to it...
6042 */
6043 if ((!ctxt->disableSAX) &&
6044 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6045 ctxt->sax->warning(ctxt->userData,
6046 "PEReference: %%%s; not found\n", name);
6047 ctxt->valid = 0;
6048 }
6049 } else {
6050 /*
6051 * Internal checking in case the entity quest barfed
6052 */
6053 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6054 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6055 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6056 ctxt->sax->warning(ctxt->userData,
6057 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006058 } else if (ctxt->input->free != deallocblankswrapper) {
6059 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6060 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006061 } else {
6062 /*
6063 * TODO !!!
6064 * handle the extra spaces added before and after
6065 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6066 */
6067 input = xmlNewEntityInputStream(ctxt, entity);
6068 xmlPushInput(ctxt, input);
6069 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6070 (RAW == '<') && (NXT(1) == '?') &&
6071 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6072 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6073 xmlParseTextDecl(ctxt);
6074 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6075 /*
6076 * The XML REC instructs us to stop parsing
6077 * right here
6078 */
6079 ctxt->instate = XML_PARSER_EOF;
6080 xmlFree(name);
6081 return;
6082 }
6083 }
Owen Taylor3473f882001-02-23 17:55:21 +00006084 }
6085 }
6086 ctxt->hasPErefs = 1;
6087 } else {
6088 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6090 ctxt->sax->error(ctxt->userData,
6091 "xmlParsePEReference: expecting ';'\n");
6092 ctxt->wellFormed = 0;
6093 ctxt->disableSAX = 1;
6094 }
6095 xmlFree(name);
6096 }
6097 }
6098}
6099
6100/**
6101 * xmlParseStringPEReference:
6102 * @ctxt: an XML parser context
6103 * @str: a pointer to an index in the string
6104 *
6105 * parse PEReference declarations
6106 *
6107 * [69] PEReference ::= '%' Name ';'
6108 *
6109 * [ WFC: No Recursion ]
6110 * A parsed entity must not contain a recursive
6111 * reference to itself, either directly or indirectly.
6112 *
6113 * [ WFC: Entity Declared ]
6114 * In a document without any DTD, a document with only an internal DTD
6115 * subset which contains no parameter entity references, or a document
6116 * with "standalone='yes'", ... ... The declaration of a parameter
6117 * entity must precede any reference to it...
6118 *
6119 * [ VC: Entity Declared ]
6120 * In a document with an external subset or external parameter entities
6121 * with "standalone='no'", ... ... The declaration of a parameter entity
6122 * must precede any reference to it...
6123 *
6124 * [ WFC: In DTD ]
6125 * Parameter-entity references may only appear in the DTD.
6126 * NOTE: misleading but this is handled.
6127 *
6128 * Returns the string of the entity content.
6129 * str is updated to the current value of the index
6130 */
6131xmlEntityPtr
6132xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6133 const xmlChar *ptr;
6134 xmlChar cur;
6135 xmlChar *name;
6136 xmlEntityPtr entity = NULL;
6137
6138 if ((str == NULL) || (*str == NULL)) return(NULL);
6139 ptr = *str;
6140 cur = *ptr;
6141 if (cur == '%') {
6142 ptr++;
6143 cur = *ptr;
6144 name = xmlParseStringName(ctxt, &ptr);
6145 if (name == NULL) {
6146 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6147 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6148 ctxt->sax->error(ctxt->userData,
6149 "xmlParseStringPEReference: no name\n");
6150 ctxt->wellFormed = 0;
6151 ctxt->disableSAX = 1;
6152 } else {
6153 cur = *ptr;
6154 if (cur == ';') {
6155 ptr++;
6156 cur = *ptr;
6157 if ((ctxt->sax != NULL) &&
6158 (ctxt->sax->getParameterEntity != NULL))
6159 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6160 name);
6161 if (entity == NULL) {
6162 /*
6163 * [ WFC: Entity Declared ]
6164 * In a document without any DTD, a document with only an
6165 * internal DTD subset which contains no parameter entity
6166 * references, or a document with "standalone='yes'", ...
6167 * ... The declaration of a parameter entity must precede
6168 * any reference to it...
6169 */
6170 if ((ctxt->standalone == 1) ||
6171 ((ctxt->hasExternalSubset == 0) &&
6172 (ctxt->hasPErefs == 0))) {
6173 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6175 ctxt->sax->error(ctxt->userData,
6176 "PEReference: %%%s; not found\n", name);
6177 ctxt->wellFormed = 0;
6178 ctxt->disableSAX = 1;
6179 } else {
6180 /*
6181 * [ VC: Entity Declared ]
6182 * In a document with an external subset or external
6183 * parameter entities with "standalone='no'", ...
6184 * ... The declaration of a parameter entity must
6185 * precede any reference to it...
6186 */
6187 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6188 ctxt->sax->warning(ctxt->userData,
6189 "PEReference: %%%s; not found\n", name);
6190 ctxt->valid = 0;
6191 }
6192 } else {
6193 /*
6194 * Internal checking in case the entity quest barfed
6195 */
6196 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6197 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6198 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6199 ctxt->sax->warning(ctxt->userData,
6200 "Internal: %%%s; is not a parameter entity\n", name);
6201 }
6202 }
6203 ctxt->hasPErefs = 1;
6204 } else {
6205 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6207 ctxt->sax->error(ctxt->userData,
6208 "xmlParseStringPEReference: expecting ';'\n");
6209 ctxt->wellFormed = 0;
6210 ctxt->disableSAX = 1;
6211 }
6212 xmlFree(name);
6213 }
6214 }
6215 *str = ptr;
6216 return(entity);
6217}
6218
6219/**
6220 * xmlParseDocTypeDecl:
6221 * @ctxt: an XML parser context
6222 *
6223 * parse a DOCTYPE declaration
6224 *
6225 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6226 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6227 *
6228 * [ VC: Root Element Type ]
6229 * The Name in the document type declaration must match the element
6230 * type of the root element.
6231 */
6232
6233void
6234xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6235 xmlChar *name = NULL;
6236 xmlChar *ExternalID = NULL;
6237 xmlChar *URI = NULL;
6238
6239 /*
6240 * We know that '<!DOCTYPE' has been detected.
6241 */
6242 SKIP(9);
6243
6244 SKIP_BLANKS;
6245
6246 /*
6247 * Parse the DOCTYPE name.
6248 */
6249 name = xmlParseName(ctxt);
6250 if (name == NULL) {
6251 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6253 ctxt->sax->error(ctxt->userData,
6254 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6255 ctxt->wellFormed = 0;
6256 ctxt->disableSAX = 1;
6257 }
6258 ctxt->intSubName = name;
6259
6260 SKIP_BLANKS;
6261
6262 /*
6263 * Check for SystemID and ExternalID
6264 */
6265 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6266
6267 if ((URI != NULL) || (ExternalID != NULL)) {
6268 ctxt->hasExternalSubset = 1;
6269 }
6270 ctxt->extSubURI = URI;
6271 ctxt->extSubSystem = ExternalID;
6272
6273 SKIP_BLANKS;
6274
6275 /*
6276 * Create and update the internal subset.
6277 */
6278 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6279 (!ctxt->disableSAX))
6280 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6281
6282 /*
6283 * Is there any internal subset declarations ?
6284 * they are handled separately in xmlParseInternalSubset()
6285 */
6286 if (RAW == '[')
6287 return;
6288
6289 /*
6290 * We should be at the end of the DOCTYPE declaration.
6291 */
6292 if (RAW != '>') {
6293 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006295 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006296 ctxt->wellFormed = 0;
6297 ctxt->disableSAX = 1;
6298 }
6299 NEXT;
6300}
6301
6302/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006303 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006304 * @ctxt: an XML parser context
6305 *
6306 * parse the internal subset declaration
6307 *
6308 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6309 */
6310
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006311static void
Owen Taylor3473f882001-02-23 17:55:21 +00006312xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6313 /*
6314 * Is there any DTD definition ?
6315 */
6316 if (RAW == '[') {
6317 ctxt->instate = XML_PARSER_DTD;
6318 NEXT;
6319 /*
6320 * Parse the succession of Markup declarations and
6321 * PEReferences.
6322 * Subsequence (markupdecl | PEReference | S)*
6323 */
6324 while (RAW != ']') {
6325 const xmlChar *check = CUR_PTR;
6326 int cons = ctxt->input->consumed;
6327
6328 SKIP_BLANKS;
6329 xmlParseMarkupDecl(ctxt);
6330 xmlParsePEReference(ctxt);
6331
6332 /*
6333 * Pop-up of finished entities.
6334 */
6335 while ((RAW == 0) && (ctxt->inputNr > 1))
6336 xmlPopInput(ctxt);
6337
6338 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6339 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6341 ctxt->sax->error(ctxt->userData,
6342 "xmlParseInternalSubset: error detected in Markup declaration\n");
6343 ctxt->wellFormed = 0;
6344 ctxt->disableSAX = 1;
6345 break;
6346 }
6347 }
6348 if (RAW == ']') {
6349 NEXT;
6350 SKIP_BLANKS;
6351 }
6352 }
6353
6354 /*
6355 * We should be at the end of the DOCTYPE declaration.
6356 */
6357 if (RAW != '>') {
6358 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006360 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006361 ctxt->wellFormed = 0;
6362 ctxt->disableSAX = 1;
6363 }
6364 NEXT;
6365}
6366
6367/**
6368 * xmlParseAttribute:
6369 * @ctxt: an XML parser context
6370 * @value: a xmlChar ** used to store the value of the attribute
6371 *
6372 * parse an attribute
6373 *
6374 * [41] Attribute ::= Name Eq AttValue
6375 *
6376 * [ WFC: No External Entity References ]
6377 * Attribute values cannot contain direct or indirect entity references
6378 * to external entities.
6379 *
6380 * [ WFC: No < in Attribute Values ]
6381 * The replacement text of any entity referred to directly or indirectly in
6382 * an attribute value (other than "&lt;") must not contain a <.
6383 *
6384 * [ VC: Attribute Value Type ]
6385 * The attribute must have been declared; the value must be of the type
6386 * declared for it.
6387 *
6388 * [25] Eq ::= S? '=' S?
6389 *
6390 * With namespace:
6391 *
6392 * [NS 11] Attribute ::= QName Eq AttValue
6393 *
6394 * Also the case QName == xmlns:??? is handled independently as a namespace
6395 * definition.
6396 *
6397 * Returns the attribute name, and the value in *value.
6398 */
6399
6400xmlChar *
6401xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6402 xmlChar *name, *val;
6403
6404 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006405 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006406 name = xmlParseName(ctxt);
6407 if (name == NULL) {
6408 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6410 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6411 ctxt->wellFormed = 0;
6412 ctxt->disableSAX = 1;
6413 return(NULL);
6414 }
6415
6416 /*
6417 * read the value
6418 */
6419 SKIP_BLANKS;
6420 if (RAW == '=') {
6421 NEXT;
6422 SKIP_BLANKS;
6423 val = xmlParseAttValue(ctxt);
6424 ctxt->instate = XML_PARSER_CONTENT;
6425 } else {
6426 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6428 ctxt->sax->error(ctxt->userData,
6429 "Specification mandate value for attribute %s\n", name);
6430 ctxt->wellFormed = 0;
6431 ctxt->disableSAX = 1;
6432 xmlFree(name);
6433 return(NULL);
6434 }
6435
6436 /*
6437 * Check that xml:lang conforms to the specification
6438 * No more registered as an error, just generate a warning now
6439 * since this was deprecated in XML second edition
6440 */
6441 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6442 if (!xmlCheckLanguageID(val)) {
6443 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6444 ctxt->sax->warning(ctxt->userData,
6445 "Malformed value for xml:lang : %s\n", val);
6446 }
6447 }
6448
6449 /*
6450 * Check that xml:space conforms to the specification
6451 */
6452 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6453 if (xmlStrEqual(val, BAD_CAST "default"))
6454 *(ctxt->space) = 0;
6455 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6456 *(ctxt->space) = 1;
6457 else {
6458 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6460 ctxt->sax->error(ctxt->userData,
6461"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6462 val);
6463 ctxt->wellFormed = 0;
6464 ctxt->disableSAX = 1;
6465 }
6466 }
6467
6468 *value = val;
6469 return(name);
6470}
6471
6472/**
6473 * xmlParseStartTag:
6474 * @ctxt: an XML parser context
6475 *
6476 * parse a start of tag either for rule element or
6477 * EmptyElement. In both case we don't parse the tag closing chars.
6478 *
6479 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6480 *
6481 * [ WFC: Unique Att Spec ]
6482 * No attribute name may appear more than once in the same start-tag or
6483 * empty-element tag.
6484 *
6485 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6486 *
6487 * [ WFC: Unique Att Spec ]
6488 * No attribute name may appear more than once in the same start-tag or
6489 * empty-element tag.
6490 *
6491 * With namespace:
6492 *
6493 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6494 *
6495 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6496 *
6497 * Returns the element name parsed
6498 */
6499
6500xmlChar *
6501xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6502 xmlChar *name;
6503 xmlChar *attname;
6504 xmlChar *attvalue;
6505 const xmlChar **atts = NULL;
6506 int nbatts = 0;
6507 int maxatts = 0;
6508 int i;
6509
6510 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006511 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006512
6513 name = xmlParseName(ctxt);
6514 if (name == NULL) {
6515 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6517 ctxt->sax->error(ctxt->userData,
6518 "xmlParseStartTag: invalid element name\n");
6519 ctxt->wellFormed = 0;
6520 ctxt->disableSAX = 1;
6521 return(NULL);
6522 }
6523
6524 /*
6525 * Now parse the attributes, it ends up with the ending
6526 *
6527 * (S Attribute)* S?
6528 */
6529 SKIP_BLANKS;
6530 GROW;
6531
Daniel Veillard21a0f912001-02-25 19:54:14 +00006532 while ((RAW != '>') &&
6533 ((RAW != '/') || (NXT(1) != '>')) &&
6534 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006535 const xmlChar *q = CUR_PTR;
6536 int cons = ctxt->input->consumed;
6537
6538 attname = xmlParseAttribute(ctxt, &attvalue);
6539 if ((attname != NULL) && (attvalue != NULL)) {
6540 /*
6541 * [ WFC: Unique Att Spec ]
6542 * No attribute name may appear more than once in the same
6543 * start-tag or empty-element tag.
6544 */
6545 for (i = 0; i < nbatts;i += 2) {
6546 if (xmlStrEqual(atts[i], attname)) {
6547 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6549 ctxt->sax->error(ctxt->userData,
6550 "Attribute %s redefined\n",
6551 attname);
6552 ctxt->wellFormed = 0;
6553 ctxt->disableSAX = 1;
6554 xmlFree(attname);
6555 xmlFree(attvalue);
6556 goto failed;
6557 }
6558 }
6559
6560 /*
6561 * Add the pair to atts
6562 */
6563 if (atts == NULL) {
6564 maxatts = 10;
6565 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6566 if (atts == NULL) {
6567 xmlGenericError(xmlGenericErrorContext,
6568 "malloc of %ld byte failed\n",
6569 maxatts * (long)sizeof(xmlChar *));
6570 return(NULL);
6571 }
6572 } else if (nbatts + 4 > maxatts) {
6573 maxatts *= 2;
6574 atts = (const xmlChar **) xmlRealloc((void *) atts,
6575 maxatts * sizeof(xmlChar *));
6576 if (atts == NULL) {
6577 xmlGenericError(xmlGenericErrorContext,
6578 "realloc of %ld byte failed\n",
6579 maxatts * (long)sizeof(xmlChar *));
6580 return(NULL);
6581 }
6582 }
6583 atts[nbatts++] = attname;
6584 atts[nbatts++] = attvalue;
6585 atts[nbatts] = NULL;
6586 atts[nbatts + 1] = NULL;
6587 } else {
6588 if (attname != NULL)
6589 xmlFree(attname);
6590 if (attvalue != NULL)
6591 xmlFree(attvalue);
6592 }
6593
6594failed:
6595
6596 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6597 break;
6598 if (!IS_BLANK(RAW)) {
6599 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6601 ctxt->sax->error(ctxt->userData,
6602 "attributes construct error\n");
6603 ctxt->wellFormed = 0;
6604 ctxt->disableSAX = 1;
6605 }
6606 SKIP_BLANKS;
6607 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6608 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6610 ctxt->sax->error(ctxt->userData,
6611 "xmlParseStartTag: problem parsing attributes\n");
6612 ctxt->wellFormed = 0;
6613 ctxt->disableSAX = 1;
6614 break;
6615 }
6616 GROW;
6617 }
6618
6619 /*
6620 * SAX: Start of Element !
6621 */
6622 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6623 (!ctxt->disableSAX))
6624 ctxt->sax->startElement(ctxt->userData, name, atts);
6625
6626 if (atts != NULL) {
6627 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6628 xmlFree((void *) atts);
6629 }
6630 return(name);
6631}
6632
6633/**
6634 * xmlParseEndTag:
6635 * @ctxt: an XML parser context
6636 *
6637 * parse an end of tag
6638 *
6639 * [42] ETag ::= '</' Name S? '>'
6640 *
6641 * With namespace
6642 *
6643 * [NS 9] ETag ::= '</' QName S? '>'
6644 */
6645
6646void
6647xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6648 xmlChar *name;
6649 xmlChar *oldname;
6650
6651 GROW;
6652 if ((RAW != '<') || (NXT(1) != '/')) {
6653 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6655 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6656 ctxt->wellFormed = 0;
6657 ctxt->disableSAX = 1;
6658 return;
6659 }
6660 SKIP(2);
6661
Daniel Veillard46de64e2002-05-29 08:21:33 +00006662 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006663
6664 /*
6665 * We should definitely be at the ending "S? '>'" part
6666 */
6667 GROW;
6668 SKIP_BLANKS;
6669 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6670 ctxt->errNo = XML_ERR_GT_REQUIRED;
6671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6672 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6673 ctxt->wellFormed = 0;
6674 ctxt->disableSAX = 1;
6675 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006676 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006677
6678 /*
6679 * [ WFC: Element Type Match ]
6680 * The Name in an element's end-tag must match the element type in the
6681 * start-tag.
6682 *
6683 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006684 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006685 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006687 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006688 ctxt->sax->error(ctxt->userData,
6689 "Opening and ending tag mismatch: %s and %s\n",
6690 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006691 xmlFree(name);
6692 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006693 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006694 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006695 }
6696
6697 }
6698 ctxt->wellFormed = 0;
6699 ctxt->disableSAX = 1;
6700 }
6701
6702 /*
6703 * SAX: End of Tag
6704 */
6705 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6706 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006707 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006708
Owen Taylor3473f882001-02-23 17:55:21 +00006709 oldname = namePop(ctxt);
6710 spacePop(ctxt);
6711 if (oldname != NULL) {
6712#ifdef DEBUG_STACK
6713 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6714#endif
6715 xmlFree(oldname);
6716 }
6717 return;
6718}
6719
6720/**
6721 * xmlParseCDSect:
6722 * @ctxt: an XML parser context
6723 *
6724 * Parse escaped pure raw content.
6725 *
6726 * [18] CDSect ::= CDStart CData CDEnd
6727 *
6728 * [19] CDStart ::= '<![CDATA['
6729 *
6730 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6731 *
6732 * [21] CDEnd ::= ']]>'
6733 */
6734void
6735xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6736 xmlChar *buf = NULL;
6737 int len = 0;
6738 int size = XML_PARSER_BUFFER_SIZE;
6739 int r, rl;
6740 int s, sl;
6741 int cur, l;
6742 int count = 0;
6743
6744 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6745 (NXT(2) == '[') && (NXT(3) == 'C') &&
6746 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6747 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6748 (NXT(8) == '[')) {
6749 SKIP(9);
6750 } else
6751 return;
6752
6753 ctxt->instate = XML_PARSER_CDATA_SECTION;
6754 r = CUR_CHAR(rl);
6755 if (!IS_CHAR(r)) {
6756 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6758 ctxt->sax->error(ctxt->userData,
6759 "CData section not finished\n");
6760 ctxt->wellFormed = 0;
6761 ctxt->disableSAX = 1;
6762 ctxt->instate = XML_PARSER_CONTENT;
6763 return;
6764 }
6765 NEXTL(rl);
6766 s = CUR_CHAR(sl);
6767 if (!IS_CHAR(s)) {
6768 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6770 ctxt->sax->error(ctxt->userData,
6771 "CData section not finished\n");
6772 ctxt->wellFormed = 0;
6773 ctxt->disableSAX = 1;
6774 ctxt->instate = XML_PARSER_CONTENT;
6775 return;
6776 }
6777 NEXTL(sl);
6778 cur = CUR_CHAR(l);
6779 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6780 if (buf == NULL) {
6781 xmlGenericError(xmlGenericErrorContext,
6782 "malloc of %d byte failed\n", size);
6783 return;
6784 }
6785 while (IS_CHAR(cur) &&
6786 ((r != ']') || (s != ']') || (cur != '>'))) {
6787 if (len + 5 >= size) {
6788 size *= 2;
6789 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6790 if (buf == NULL) {
6791 xmlGenericError(xmlGenericErrorContext,
6792 "realloc of %d byte failed\n", size);
6793 return;
6794 }
6795 }
6796 COPY_BUF(rl,buf,len,r);
6797 r = s;
6798 rl = sl;
6799 s = cur;
6800 sl = l;
6801 count++;
6802 if (count > 50) {
6803 GROW;
6804 count = 0;
6805 }
6806 NEXTL(l);
6807 cur = CUR_CHAR(l);
6808 }
6809 buf[len] = 0;
6810 ctxt->instate = XML_PARSER_CONTENT;
6811 if (cur != '>') {
6812 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6814 ctxt->sax->error(ctxt->userData,
6815 "CData section not finished\n%.50s\n", buf);
6816 ctxt->wellFormed = 0;
6817 ctxt->disableSAX = 1;
6818 xmlFree(buf);
6819 return;
6820 }
6821 NEXTL(l);
6822
6823 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006824 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006825 */
6826 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6827 if (ctxt->sax->cdataBlock != NULL)
6828 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006829 else if (ctxt->sax->characters != NULL)
6830 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006831 }
6832 xmlFree(buf);
6833}
6834
6835/**
6836 * xmlParseContent:
6837 * @ctxt: an XML parser context
6838 *
6839 * Parse a content:
6840 *
6841 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6842 */
6843
6844void
6845xmlParseContent(xmlParserCtxtPtr ctxt) {
6846 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006847 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006848 ((RAW != '<') || (NXT(1) != '/'))) {
6849 const xmlChar *test = CUR_PTR;
6850 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006851 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006852
6853 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006854 * First case : a Processing Instruction.
6855 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006856 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006857 xmlParsePI(ctxt);
6858 }
6859
6860 /*
6861 * Second case : a CDSection
6862 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006863 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006864 (NXT(2) == '[') && (NXT(3) == 'C') &&
6865 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6866 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6867 (NXT(8) == '[')) {
6868 xmlParseCDSect(ctxt);
6869 }
6870
6871 /*
6872 * Third case : a comment
6873 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006874 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006875 (NXT(2) == '-') && (NXT(3) == '-')) {
6876 xmlParseComment(ctxt);
6877 ctxt->instate = XML_PARSER_CONTENT;
6878 }
6879
6880 /*
6881 * Fourth case : a sub-element.
6882 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006883 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006884 xmlParseElement(ctxt);
6885 }
6886
6887 /*
6888 * Fifth case : a reference. If if has not been resolved,
6889 * parsing returns it's Name, create the node
6890 */
6891
Daniel Veillard21a0f912001-02-25 19:54:14 +00006892 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006893 xmlParseReference(ctxt);
6894 }
6895
6896 /*
6897 * Last case, text. Note that References are handled directly.
6898 */
6899 else {
6900 xmlParseCharData(ctxt, 0);
6901 }
6902
6903 GROW;
6904 /*
6905 * Pop-up of finished entities.
6906 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00006907 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00006908 xmlPopInput(ctxt);
6909 SHRINK;
6910
Daniel Veillardfdc91562002-07-01 21:52:03 +00006911 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006912 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6914 ctxt->sax->error(ctxt->userData,
6915 "detected an error in element content\n");
6916 ctxt->wellFormed = 0;
6917 ctxt->disableSAX = 1;
6918 ctxt->instate = XML_PARSER_EOF;
6919 break;
6920 }
6921 }
6922}
6923
6924/**
6925 * xmlParseElement:
6926 * @ctxt: an XML parser context
6927 *
6928 * parse an XML element, this is highly recursive
6929 *
6930 * [39] element ::= EmptyElemTag | STag content ETag
6931 *
6932 * [ WFC: Element Type Match ]
6933 * The Name in an element's end-tag must match the element type in the
6934 * start-tag.
6935 *
6936 * [ VC: Element Valid ]
6937 * An element is valid if there is a declaration matching elementdecl
6938 * where the Name matches the element type and one of the following holds:
6939 * - The declaration matches EMPTY and the element has no content.
6940 * - The declaration matches children and the sequence of child elements
6941 * belongs to the language generated by the regular expression in the
6942 * content model, with optional white space (characters matching the
6943 * nonterminal S) between each pair of child elements.
6944 * - The declaration matches Mixed and the content consists of character
6945 * data and child elements whose types match names in the content model.
6946 * - The declaration matches ANY, and the types of any child elements have
6947 * been declared.
6948 */
6949
6950void
6951xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00006952 xmlChar *name;
6953 xmlChar *oldname;
6954 xmlParserNodeInfo node_info;
6955 xmlNodePtr ret;
6956
6957 /* Capture start position */
6958 if (ctxt->record_info) {
6959 node_info.begin_pos = ctxt->input->consumed +
6960 (CUR_PTR - ctxt->input->base);
6961 node_info.begin_line = ctxt->input->line;
6962 }
6963
6964 if (ctxt->spaceNr == 0)
6965 spacePush(ctxt, -1);
6966 else
6967 spacePush(ctxt, *ctxt->space);
6968
6969 name = xmlParseStartTag(ctxt);
6970 if (name == NULL) {
6971 spacePop(ctxt);
6972 return;
6973 }
6974 namePush(ctxt, name);
6975 ret = ctxt->node;
6976
6977 /*
6978 * [ VC: Root Element Type ]
6979 * The Name in the document type declaration must match the element
6980 * type of the root element.
6981 */
6982 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6983 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6984 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6985
6986 /*
6987 * Check for an Empty Element.
6988 */
6989 if ((RAW == '/') && (NXT(1) == '>')) {
6990 SKIP(2);
6991 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6992 (!ctxt->disableSAX))
6993 ctxt->sax->endElement(ctxt->userData, name);
6994 oldname = namePop(ctxt);
6995 spacePop(ctxt);
6996 if (oldname != NULL) {
6997#ifdef DEBUG_STACK
6998 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6999#endif
7000 xmlFree(oldname);
7001 }
7002 if ( ret != NULL && ctxt->record_info ) {
7003 node_info.end_pos = ctxt->input->consumed +
7004 (CUR_PTR - ctxt->input->base);
7005 node_info.end_line = ctxt->input->line;
7006 node_info.node = ret;
7007 xmlParserAddNodeInfo(ctxt, &node_info);
7008 }
7009 return;
7010 }
7011 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007012 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007013 } else {
7014 ctxt->errNo = XML_ERR_GT_REQUIRED;
7015 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7016 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007017 "Couldn't find end of Start Tag %s\n",
7018 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007019 ctxt->wellFormed = 0;
7020 ctxt->disableSAX = 1;
7021
7022 /*
7023 * end of parsing of this node.
7024 */
7025 nodePop(ctxt);
7026 oldname = namePop(ctxt);
7027 spacePop(ctxt);
7028 if (oldname != NULL) {
7029#ifdef DEBUG_STACK
7030 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7031#endif
7032 xmlFree(oldname);
7033 }
7034
7035 /*
7036 * Capture end position and add node
7037 */
7038 if ( ret != NULL && ctxt->record_info ) {
7039 node_info.end_pos = ctxt->input->consumed +
7040 (CUR_PTR - ctxt->input->base);
7041 node_info.end_line = ctxt->input->line;
7042 node_info.node = ret;
7043 xmlParserAddNodeInfo(ctxt, &node_info);
7044 }
7045 return;
7046 }
7047
7048 /*
7049 * Parse the content of the element:
7050 */
7051 xmlParseContent(ctxt);
7052 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007053 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7055 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007056 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007057 ctxt->wellFormed = 0;
7058 ctxt->disableSAX = 1;
7059
7060 /*
7061 * end of parsing of this node.
7062 */
7063 nodePop(ctxt);
7064 oldname = namePop(ctxt);
7065 spacePop(ctxt);
7066 if (oldname != NULL) {
7067#ifdef DEBUG_STACK
7068 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7069#endif
7070 xmlFree(oldname);
7071 }
7072 return;
7073 }
7074
7075 /*
7076 * parse the end of tag: '</' should be here.
7077 */
7078 xmlParseEndTag(ctxt);
7079
7080 /*
7081 * Capture end position and add node
7082 */
7083 if ( ret != NULL && ctxt->record_info ) {
7084 node_info.end_pos = ctxt->input->consumed +
7085 (CUR_PTR - ctxt->input->base);
7086 node_info.end_line = ctxt->input->line;
7087 node_info.node = ret;
7088 xmlParserAddNodeInfo(ctxt, &node_info);
7089 }
7090}
7091
7092/**
7093 * xmlParseVersionNum:
7094 * @ctxt: an XML parser context
7095 *
7096 * parse the XML version value.
7097 *
7098 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7099 *
7100 * Returns the string giving the XML version number, or NULL
7101 */
7102xmlChar *
7103xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7104 xmlChar *buf = NULL;
7105 int len = 0;
7106 int size = 10;
7107 xmlChar cur;
7108
7109 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7110 if (buf == NULL) {
7111 xmlGenericError(xmlGenericErrorContext,
7112 "malloc of %d byte failed\n", size);
7113 return(NULL);
7114 }
7115 cur = CUR;
7116 while (((cur >= 'a') && (cur <= 'z')) ||
7117 ((cur >= 'A') && (cur <= 'Z')) ||
7118 ((cur >= '0') && (cur <= '9')) ||
7119 (cur == '_') || (cur == '.') ||
7120 (cur == ':') || (cur == '-')) {
7121 if (len + 1 >= size) {
7122 size *= 2;
7123 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7124 if (buf == NULL) {
7125 xmlGenericError(xmlGenericErrorContext,
7126 "realloc of %d byte failed\n", size);
7127 return(NULL);
7128 }
7129 }
7130 buf[len++] = cur;
7131 NEXT;
7132 cur=CUR;
7133 }
7134 buf[len] = 0;
7135 return(buf);
7136}
7137
7138/**
7139 * xmlParseVersionInfo:
7140 * @ctxt: an XML parser context
7141 *
7142 * parse the XML version.
7143 *
7144 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7145 *
7146 * [25] Eq ::= S? '=' S?
7147 *
7148 * Returns the version string, e.g. "1.0"
7149 */
7150
7151xmlChar *
7152xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7153 xmlChar *version = NULL;
7154 const xmlChar *q;
7155
7156 if ((RAW == 'v') && (NXT(1) == 'e') &&
7157 (NXT(2) == 'r') && (NXT(3) == 's') &&
7158 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7159 (NXT(6) == 'n')) {
7160 SKIP(7);
7161 SKIP_BLANKS;
7162 if (RAW != '=') {
7163 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7165 ctxt->sax->error(ctxt->userData,
7166 "xmlParseVersionInfo : expected '='\n");
7167 ctxt->wellFormed = 0;
7168 ctxt->disableSAX = 1;
7169 return(NULL);
7170 }
7171 NEXT;
7172 SKIP_BLANKS;
7173 if (RAW == '"') {
7174 NEXT;
7175 q = CUR_PTR;
7176 version = xmlParseVersionNum(ctxt);
7177 if (RAW != '"') {
7178 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7179 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7180 ctxt->sax->error(ctxt->userData,
7181 "String not closed\n%.50s\n", q);
7182 ctxt->wellFormed = 0;
7183 ctxt->disableSAX = 1;
7184 } else
7185 NEXT;
7186 } else if (RAW == '\''){
7187 NEXT;
7188 q = CUR_PTR;
7189 version = xmlParseVersionNum(ctxt);
7190 if (RAW != '\'') {
7191 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7193 ctxt->sax->error(ctxt->userData,
7194 "String not closed\n%.50s\n", q);
7195 ctxt->wellFormed = 0;
7196 ctxt->disableSAX = 1;
7197 } else
7198 NEXT;
7199 } else {
7200 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7202 ctxt->sax->error(ctxt->userData,
7203 "xmlParseVersionInfo : expected ' or \"\n");
7204 ctxt->wellFormed = 0;
7205 ctxt->disableSAX = 1;
7206 }
7207 }
7208 return(version);
7209}
7210
7211/**
7212 * xmlParseEncName:
7213 * @ctxt: an XML parser context
7214 *
7215 * parse the XML encoding name
7216 *
7217 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7218 *
7219 * Returns the encoding name value or NULL
7220 */
7221xmlChar *
7222xmlParseEncName(xmlParserCtxtPtr ctxt) {
7223 xmlChar *buf = NULL;
7224 int len = 0;
7225 int size = 10;
7226 xmlChar cur;
7227
7228 cur = CUR;
7229 if (((cur >= 'a') && (cur <= 'z')) ||
7230 ((cur >= 'A') && (cur <= 'Z'))) {
7231 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7232 if (buf == NULL) {
7233 xmlGenericError(xmlGenericErrorContext,
7234 "malloc of %d byte failed\n", size);
7235 return(NULL);
7236 }
7237
7238 buf[len++] = cur;
7239 NEXT;
7240 cur = CUR;
7241 while (((cur >= 'a') && (cur <= 'z')) ||
7242 ((cur >= 'A') && (cur <= 'Z')) ||
7243 ((cur >= '0') && (cur <= '9')) ||
7244 (cur == '.') || (cur == '_') ||
7245 (cur == '-')) {
7246 if (len + 1 >= size) {
7247 size *= 2;
7248 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7249 if (buf == NULL) {
7250 xmlGenericError(xmlGenericErrorContext,
7251 "realloc of %d byte failed\n", size);
7252 return(NULL);
7253 }
7254 }
7255 buf[len++] = cur;
7256 NEXT;
7257 cur = CUR;
7258 if (cur == 0) {
7259 SHRINK;
7260 GROW;
7261 cur = CUR;
7262 }
7263 }
7264 buf[len] = 0;
7265 } else {
7266 ctxt->errNo = XML_ERR_ENCODING_NAME;
7267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7268 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7269 ctxt->wellFormed = 0;
7270 ctxt->disableSAX = 1;
7271 }
7272 return(buf);
7273}
7274
7275/**
7276 * xmlParseEncodingDecl:
7277 * @ctxt: an XML parser context
7278 *
7279 * parse the XML encoding declaration
7280 *
7281 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7282 *
7283 * this setups the conversion filters.
7284 *
7285 * Returns the encoding value or NULL
7286 */
7287
7288xmlChar *
7289xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7290 xmlChar *encoding = NULL;
7291 const xmlChar *q;
7292
7293 SKIP_BLANKS;
7294 if ((RAW == 'e') && (NXT(1) == 'n') &&
7295 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7296 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7297 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7298 SKIP(8);
7299 SKIP_BLANKS;
7300 if (RAW != '=') {
7301 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7303 ctxt->sax->error(ctxt->userData,
7304 "xmlParseEncodingDecl : expected '='\n");
7305 ctxt->wellFormed = 0;
7306 ctxt->disableSAX = 1;
7307 return(NULL);
7308 }
7309 NEXT;
7310 SKIP_BLANKS;
7311 if (RAW == '"') {
7312 NEXT;
7313 q = CUR_PTR;
7314 encoding = xmlParseEncName(ctxt);
7315 if (RAW != '"') {
7316 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7318 ctxt->sax->error(ctxt->userData,
7319 "String not closed\n%.50s\n", q);
7320 ctxt->wellFormed = 0;
7321 ctxt->disableSAX = 1;
7322 } else
7323 NEXT;
7324 } else if (RAW == '\''){
7325 NEXT;
7326 q = CUR_PTR;
7327 encoding = xmlParseEncName(ctxt);
7328 if (RAW != '\'') {
7329 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7331 ctxt->sax->error(ctxt->userData,
7332 "String not closed\n%.50s\n", q);
7333 ctxt->wellFormed = 0;
7334 ctxt->disableSAX = 1;
7335 } else
7336 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007337 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007338 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7340 ctxt->sax->error(ctxt->userData,
7341 "xmlParseEncodingDecl : expected ' or \"\n");
7342 ctxt->wellFormed = 0;
7343 ctxt->disableSAX = 1;
7344 }
7345 if (encoding != NULL) {
7346 xmlCharEncoding enc;
7347 xmlCharEncodingHandlerPtr handler;
7348
7349 if (ctxt->input->encoding != NULL)
7350 xmlFree((xmlChar *) ctxt->input->encoding);
7351 ctxt->input->encoding = encoding;
7352
7353 enc = xmlParseCharEncoding((const char *) encoding);
7354 /*
7355 * registered set of known encodings
7356 */
7357 if (enc != XML_CHAR_ENCODING_ERROR) {
7358 xmlSwitchEncoding(ctxt, enc);
7359 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007360 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007361 xmlFree(encoding);
7362 return(NULL);
7363 }
7364 } else {
7365 /*
7366 * fallback for unknown encodings
7367 */
7368 handler = xmlFindCharEncodingHandler((const char *) encoding);
7369 if (handler != NULL) {
7370 xmlSwitchToEncoding(ctxt, handler);
7371 } else {
7372 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7374 ctxt->sax->error(ctxt->userData,
7375 "Unsupported encoding %s\n", encoding);
7376 return(NULL);
7377 }
7378 }
7379 }
7380 }
7381 return(encoding);
7382}
7383
7384/**
7385 * xmlParseSDDecl:
7386 * @ctxt: an XML parser context
7387 *
7388 * parse the XML standalone declaration
7389 *
7390 * [32] SDDecl ::= S 'standalone' Eq
7391 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7392 *
7393 * [ VC: Standalone Document Declaration ]
7394 * TODO The standalone document declaration must have the value "no"
7395 * if any external markup declarations contain declarations of:
7396 * - attributes with default values, if elements to which these
7397 * attributes apply appear in the document without specifications
7398 * of values for these attributes, or
7399 * - entities (other than amp, lt, gt, apos, quot), if references
7400 * to those entities appear in the document, or
7401 * - attributes with values subject to normalization, where the
7402 * attribute appears in the document with a value which will change
7403 * as a result of normalization, or
7404 * - element types with element content, if white space occurs directly
7405 * within any instance of those types.
7406 *
7407 * Returns 1 if standalone, 0 otherwise
7408 */
7409
7410int
7411xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7412 int standalone = -1;
7413
7414 SKIP_BLANKS;
7415 if ((RAW == 's') && (NXT(1) == 't') &&
7416 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7417 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7418 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7419 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7420 SKIP(10);
7421 SKIP_BLANKS;
7422 if (RAW != '=') {
7423 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7425 ctxt->sax->error(ctxt->userData,
7426 "XML standalone declaration : expected '='\n");
7427 ctxt->wellFormed = 0;
7428 ctxt->disableSAX = 1;
7429 return(standalone);
7430 }
7431 NEXT;
7432 SKIP_BLANKS;
7433 if (RAW == '\''){
7434 NEXT;
7435 if ((RAW == 'n') && (NXT(1) == 'o')) {
7436 standalone = 0;
7437 SKIP(2);
7438 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7439 (NXT(2) == 's')) {
7440 standalone = 1;
7441 SKIP(3);
7442 } else {
7443 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7445 ctxt->sax->error(ctxt->userData,
7446 "standalone accepts only 'yes' or 'no'\n");
7447 ctxt->wellFormed = 0;
7448 ctxt->disableSAX = 1;
7449 }
7450 if (RAW != '\'') {
7451 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7453 ctxt->sax->error(ctxt->userData, "String not closed\n");
7454 ctxt->wellFormed = 0;
7455 ctxt->disableSAX = 1;
7456 } else
7457 NEXT;
7458 } else if (RAW == '"'){
7459 NEXT;
7460 if ((RAW == 'n') && (NXT(1) == 'o')) {
7461 standalone = 0;
7462 SKIP(2);
7463 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7464 (NXT(2) == 's')) {
7465 standalone = 1;
7466 SKIP(3);
7467 } else {
7468 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7470 ctxt->sax->error(ctxt->userData,
7471 "standalone accepts only 'yes' or 'no'\n");
7472 ctxt->wellFormed = 0;
7473 ctxt->disableSAX = 1;
7474 }
7475 if (RAW != '"') {
7476 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7478 ctxt->sax->error(ctxt->userData, "String not closed\n");
7479 ctxt->wellFormed = 0;
7480 ctxt->disableSAX = 1;
7481 } else
7482 NEXT;
7483 } else {
7484 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7486 ctxt->sax->error(ctxt->userData,
7487 "Standalone value not found\n");
7488 ctxt->wellFormed = 0;
7489 ctxt->disableSAX = 1;
7490 }
7491 }
7492 return(standalone);
7493}
7494
7495/**
7496 * xmlParseXMLDecl:
7497 * @ctxt: an XML parser context
7498 *
7499 * parse an XML declaration header
7500 *
7501 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7502 */
7503
7504void
7505xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7506 xmlChar *version;
7507
7508 /*
7509 * We know that '<?xml' is here.
7510 */
7511 SKIP(5);
7512
7513 if (!IS_BLANK(RAW)) {
7514 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7516 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7517 ctxt->wellFormed = 0;
7518 ctxt->disableSAX = 1;
7519 }
7520 SKIP_BLANKS;
7521
7522 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007523 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007524 */
7525 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007526 if (version == NULL) {
7527 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7528 ctxt->sax->error(ctxt->userData,
7529 "Malformed declaration expecting version\n");
7530 ctxt->wellFormed = 0;
7531 ctxt->disableSAX = 1;
7532 } else {
7533 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7534 /*
7535 * TODO: Blueberry should be detected here
7536 */
7537 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7538 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7539 version);
7540 }
7541 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007542 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007543 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007544 }
Owen Taylor3473f882001-02-23 17:55:21 +00007545
7546 /*
7547 * We may have the encoding declaration
7548 */
7549 if (!IS_BLANK(RAW)) {
7550 if ((RAW == '?') && (NXT(1) == '>')) {
7551 SKIP(2);
7552 return;
7553 }
7554 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7556 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7557 ctxt->wellFormed = 0;
7558 ctxt->disableSAX = 1;
7559 }
7560 xmlParseEncodingDecl(ctxt);
7561 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7562 /*
7563 * The XML REC instructs us to stop parsing right here
7564 */
7565 return;
7566 }
7567
7568 /*
7569 * We may have the standalone status.
7570 */
7571 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7572 if ((RAW == '?') && (NXT(1) == '>')) {
7573 SKIP(2);
7574 return;
7575 }
7576 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7578 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7579 ctxt->wellFormed = 0;
7580 ctxt->disableSAX = 1;
7581 }
7582 SKIP_BLANKS;
7583 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7584
7585 SKIP_BLANKS;
7586 if ((RAW == '?') && (NXT(1) == '>')) {
7587 SKIP(2);
7588 } else if (RAW == '>') {
7589 /* Deprecated old WD ... */
7590 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7592 ctxt->sax->error(ctxt->userData,
7593 "XML declaration must end-up with '?>'\n");
7594 ctxt->wellFormed = 0;
7595 ctxt->disableSAX = 1;
7596 NEXT;
7597 } else {
7598 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7600 ctxt->sax->error(ctxt->userData,
7601 "parsing XML declaration: '?>' expected\n");
7602 ctxt->wellFormed = 0;
7603 ctxt->disableSAX = 1;
7604 MOVETO_ENDTAG(CUR_PTR);
7605 NEXT;
7606 }
7607}
7608
7609/**
7610 * xmlParseMisc:
7611 * @ctxt: an XML parser context
7612 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007613 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007614 *
7615 * [27] Misc ::= Comment | PI | S
7616 */
7617
7618void
7619xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007620 while (((RAW == '<') && (NXT(1) == '?')) ||
7621 ((RAW == '<') && (NXT(1) == '!') &&
7622 (NXT(2) == '-') && (NXT(3) == '-')) ||
7623 IS_BLANK(CUR)) {
7624 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007625 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007626 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007627 NEXT;
7628 } else
7629 xmlParseComment(ctxt);
7630 }
7631}
7632
7633/**
7634 * xmlParseDocument:
7635 * @ctxt: an XML parser context
7636 *
7637 * parse an XML document (and build a tree if using the standard SAX
7638 * interface).
7639 *
7640 * [1] document ::= prolog element Misc*
7641 *
7642 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7643 *
7644 * Returns 0, -1 in case of error. the parser context is augmented
7645 * as a result of the parsing.
7646 */
7647
7648int
7649xmlParseDocument(xmlParserCtxtPtr ctxt) {
7650 xmlChar start[4];
7651 xmlCharEncoding enc;
7652
7653 xmlInitParser();
7654
7655 GROW;
7656
7657 /*
7658 * SAX: beginning of the document processing.
7659 */
7660 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7661 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7662
Daniel Veillard50f34372001-08-03 12:06:36 +00007663 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007664 /*
7665 * Get the 4 first bytes and decode the charset
7666 * if enc != XML_CHAR_ENCODING_NONE
7667 * plug some encoding conversion routines.
7668 */
7669 start[0] = RAW;
7670 start[1] = NXT(1);
7671 start[2] = NXT(2);
7672 start[3] = NXT(3);
7673 enc = xmlDetectCharEncoding(start, 4);
7674 if (enc != XML_CHAR_ENCODING_NONE) {
7675 xmlSwitchEncoding(ctxt, enc);
7676 }
Owen Taylor3473f882001-02-23 17:55:21 +00007677 }
7678
7679
7680 if (CUR == 0) {
7681 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7683 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7684 ctxt->wellFormed = 0;
7685 ctxt->disableSAX = 1;
7686 }
7687
7688 /*
7689 * Check for the XMLDecl in the Prolog.
7690 */
7691 GROW;
7692 if ((RAW == '<') && (NXT(1) == '?') &&
7693 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7694 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7695
7696 /*
7697 * Note that we will switch encoding on the fly.
7698 */
7699 xmlParseXMLDecl(ctxt);
7700 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7701 /*
7702 * The XML REC instructs us to stop parsing right here
7703 */
7704 return(-1);
7705 }
7706 ctxt->standalone = ctxt->input->standalone;
7707 SKIP_BLANKS;
7708 } else {
7709 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7710 }
7711 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7712 ctxt->sax->startDocument(ctxt->userData);
7713
7714 /*
7715 * The Misc part of the Prolog
7716 */
7717 GROW;
7718 xmlParseMisc(ctxt);
7719
7720 /*
7721 * Then possibly doc type declaration(s) and more Misc
7722 * (doctypedecl Misc*)?
7723 */
7724 GROW;
7725 if ((RAW == '<') && (NXT(1) == '!') &&
7726 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7727 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7728 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7729 (NXT(8) == 'E')) {
7730
7731 ctxt->inSubset = 1;
7732 xmlParseDocTypeDecl(ctxt);
7733 if (RAW == '[') {
7734 ctxt->instate = XML_PARSER_DTD;
7735 xmlParseInternalSubset(ctxt);
7736 }
7737
7738 /*
7739 * Create and update the external subset.
7740 */
7741 ctxt->inSubset = 2;
7742 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7743 (!ctxt->disableSAX))
7744 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7745 ctxt->extSubSystem, ctxt->extSubURI);
7746 ctxt->inSubset = 0;
7747
7748
7749 ctxt->instate = XML_PARSER_PROLOG;
7750 xmlParseMisc(ctxt);
7751 }
7752
7753 /*
7754 * Time to start parsing the tree itself
7755 */
7756 GROW;
7757 if (RAW != '<') {
7758 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7759 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7760 ctxt->sax->error(ctxt->userData,
7761 "Start tag expected, '<' not found\n");
7762 ctxt->wellFormed = 0;
7763 ctxt->disableSAX = 1;
7764 ctxt->instate = XML_PARSER_EOF;
7765 } else {
7766 ctxt->instate = XML_PARSER_CONTENT;
7767 xmlParseElement(ctxt);
7768 ctxt->instate = XML_PARSER_EPILOG;
7769
7770
7771 /*
7772 * The Misc part at the end
7773 */
7774 xmlParseMisc(ctxt);
7775
Daniel Veillard561b7f82002-03-20 21:55:57 +00007776 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007777 ctxt->errNo = XML_ERR_DOCUMENT_END;
7778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7779 ctxt->sax->error(ctxt->userData,
7780 "Extra content at the end of the document\n");
7781 ctxt->wellFormed = 0;
7782 ctxt->disableSAX = 1;
7783 }
7784 ctxt->instate = XML_PARSER_EOF;
7785 }
7786
7787 /*
7788 * SAX: end of the document processing.
7789 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007790 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007791 ctxt->sax->endDocument(ctxt->userData);
7792
Daniel Veillard5997aca2002-03-18 18:36:20 +00007793 /*
7794 * Remove locally kept entity definitions if the tree was not built
7795 */
7796 if ((ctxt->myDoc != NULL) &&
7797 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7798 xmlFreeDoc(ctxt->myDoc);
7799 ctxt->myDoc = NULL;
7800 }
7801
Daniel Veillardc7612992002-02-17 22:47:37 +00007802 if (! ctxt->wellFormed) {
7803 ctxt->valid = 0;
7804 return(-1);
7805 }
Owen Taylor3473f882001-02-23 17:55:21 +00007806 return(0);
7807}
7808
7809/**
7810 * xmlParseExtParsedEnt:
7811 * @ctxt: an XML parser context
7812 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007813 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007814 * An external general parsed entity is well-formed if it matches the
7815 * production labeled extParsedEnt.
7816 *
7817 * [78] extParsedEnt ::= TextDecl? content
7818 *
7819 * Returns 0, -1 in case of error. the parser context is augmented
7820 * as a result of the parsing.
7821 */
7822
7823int
7824xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7825 xmlChar start[4];
7826 xmlCharEncoding enc;
7827
7828 xmlDefaultSAXHandlerInit();
7829
7830 GROW;
7831
7832 /*
7833 * SAX: beginning of the document processing.
7834 */
7835 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7836 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7837
7838 /*
7839 * Get the 4 first bytes and decode the charset
7840 * if enc != XML_CHAR_ENCODING_NONE
7841 * plug some encoding conversion routines.
7842 */
7843 start[0] = RAW;
7844 start[1] = NXT(1);
7845 start[2] = NXT(2);
7846 start[3] = NXT(3);
7847 enc = xmlDetectCharEncoding(start, 4);
7848 if (enc != XML_CHAR_ENCODING_NONE) {
7849 xmlSwitchEncoding(ctxt, enc);
7850 }
7851
7852
7853 if (CUR == 0) {
7854 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7856 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7857 ctxt->wellFormed = 0;
7858 ctxt->disableSAX = 1;
7859 }
7860
7861 /*
7862 * Check for the XMLDecl in the Prolog.
7863 */
7864 GROW;
7865 if ((RAW == '<') && (NXT(1) == '?') &&
7866 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7867 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7868
7869 /*
7870 * Note that we will switch encoding on the fly.
7871 */
7872 xmlParseXMLDecl(ctxt);
7873 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7874 /*
7875 * The XML REC instructs us to stop parsing right here
7876 */
7877 return(-1);
7878 }
7879 SKIP_BLANKS;
7880 } else {
7881 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7882 }
7883 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7884 ctxt->sax->startDocument(ctxt->userData);
7885
7886 /*
7887 * Doing validity checking on chunk doesn't make sense
7888 */
7889 ctxt->instate = XML_PARSER_CONTENT;
7890 ctxt->validate = 0;
7891 ctxt->loadsubset = 0;
7892 ctxt->depth = 0;
7893
7894 xmlParseContent(ctxt);
7895
7896 if ((RAW == '<') && (NXT(1) == '/')) {
7897 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7899 ctxt->sax->error(ctxt->userData,
7900 "chunk is not well balanced\n");
7901 ctxt->wellFormed = 0;
7902 ctxt->disableSAX = 1;
7903 } else if (RAW != 0) {
7904 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7906 ctxt->sax->error(ctxt->userData,
7907 "extra content at the end of well balanced chunk\n");
7908 ctxt->wellFormed = 0;
7909 ctxt->disableSAX = 1;
7910 }
7911
7912 /*
7913 * SAX: end of the document processing.
7914 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007915 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007916 ctxt->sax->endDocument(ctxt->userData);
7917
7918 if (! ctxt->wellFormed) return(-1);
7919 return(0);
7920}
7921
7922/************************************************************************
7923 * *
7924 * Progressive parsing interfaces *
7925 * *
7926 ************************************************************************/
7927
7928/**
7929 * xmlParseLookupSequence:
7930 * @ctxt: an XML parser context
7931 * @first: the first char to lookup
7932 * @next: the next char to lookup or zero
7933 * @third: the next char to lookup or zero
7934 *
7935 * Try to find if a sequence (first, next, third) or just (first next) or
7936 * (first) is available in the input stream.
7937 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7938 * to avoid rescanning sequences of bytes, it DOES change the state of the
7939 * parser, do not use liberally.
7940 *
7941 * Returns the index to the current parsing point if the full sequence
7942 * is available, -1 otherwise.
7943 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007944static int
Owen Taylor3473f882001-02-23 17:55:21 +00007945xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7946 xmlChar next, xmlChar third) {
7947 int base, len;
7948 xmlParserInputPtr in;
7949 const xmlChar *buf;
7950
7951 in = ctxt->input;
7952 if (in == NULL) return(-1);
7953 base = in->cur - in->base;
7954 if (base < 0) return(-1);
7955 if (ctxt->checkIndex > base)
7956 base = ctxt->checkIndex;
7957 if (in->buf == NULL) {
7958 buf = in->base;
7959 len = in->length;
7960 } else {
7961 buf = in->buf->buffer->content;
7962 len = in->buf->buffer->use;
7963 }
7964 /* take into account the sequence length */
7965 if (third) len -= 2;
7966 else if (next) len --;
7967 for (;base < len;base++) {
7968 if (buf[base] == first) {
7969 if (third != 0) {
7970 if ((buf[base + 1] != next) ||
7971 (buf[base + 2] != third)) continue;
7972 } else if (next != 0) {
7973 if (buf[base + 1] != next) continue;
7974 }
7975 ctxt->checkIndex = 0;
7976#ifdef DEBUG_PUSH
7977 if (next == 0)
7978 xmlGenericError(xmlGenericErrorContext,
7979 "PP: lookup '%c' found at %d\n",
7980 first, base);
7981 else if (third == 0)
7982 xmlGenericError(xmlGenericErrorContext,
7983 "PP: lookup '%c%c' found at %d\n",
7984 first, next, base);
7985 else
7986 xmlGenericError(xmlGenericErrorContext,
7987 "PP: lookup '%c%c%c' found at %d\n",
7988 first, next, third, base);
7989#endif
7990 return(base - (in->cur - in->base));
7991 }
7992 }
7993 ctxt->checkIndex = base;
7994#ifdef DEBUG_PUSH
7995 if (next == 0)
7996 xmlGenericError(xmlGenericErrorContext,
7997 "PP: lookup '%c' failed\n", first);
7998 else if (third == 0)
7999 xmlGenericError(xmlGenericErrorContext,
8000 "PP: lookup '%c%c' failed\n", first, next);
8001 else
8002 xmlGenericError(xmlGenericErrorContext,
8003 "PP: lookup '%c%c%c' failed\n", first, next, third);
8004#endif
8005 return(-1);
8006}
8007
8008/**
8009 * xmlParseTryOrFinish:
8010 * @ctxt: an XML parser context
8011 * @terminate: last chunk indicator
8012 *
8013 * Try to progress on parsing
8014 *
8015 * Returns zero if no parsing was possible
8016 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008017static int
Owen Taylor3473f882001-02-23 17:55:21 +00008018xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8019 int ret = 0;
8020 int avail;
8021 xmlChar cur, next;
8022
8023#ifdef DEBUG_PUSH
8024 switch (ctxt->instate) {
8025 case XML_PARSER_EOF:
8026 xmlGenericError(xmlGenericErrorContext,
8027 "PP: try EOF\n"); break;
8028 case XML_PARSER_START:
8029 xmlGenericError(xmlGenericErrorContext,
8030 "PP: try START\n"); break;
8031 case XML_PARSER_MISC:
8032 xmlGenericError(xmlGenericErrorContext,
8033 "PP: try MISC\n");break;
8034 case XML_PARSER_COMMENT:
8035 xmlGenericError(xmlGenericErrorContext,
8036 "PP: try COMMENT\n");break;
8037 case XML_PARSER_PROLOG:
8038 xmlGenericError(xmlGenericErrorContext,
8039 "PP: try PROLOG\n");break;
8040 case XML_PARSER_START_TAG:
8041 xmlGenericError(xmlGenericErrorContext,
8042 "PP: try START_TAG\n");break;
8043 case XML_PARSER_CONTENT:
8044 xmlGenericError(xmlGenericErrorContext,
8045 "PP: try CONTENT\n");break;
8046 case XML_PARSER_CDATA_SECTION:
8047 xmlGenericError(xmlGenericErrorContext,
8048 "PP: try CDATA_SECTION\n");break;
8049 case XML_PARSER_END_TAG:
8050 xmlGenericError(xmlGenericErrorContext,
8051 "PP: try END_TAG\n");break;
8052 case XML_PARSER_ENTITY_DECL:
8053 xmlGenericError(xmlGenericErrorContext,
8054 "PP: try ENTITY_DECL\n");break;
8055 case XML_PARSER_ENTITY_VALUE:
8056 xmlGenericError(xmlGenericErrorContext,
8057 "PP: try ENTITY_VALUE\n");break;
8058 case XML_PARSER_ATTRIBUTE_VALUE:
8059 xmlGenericError(xmlGenericErrorContext,
8060 "PP: try ATTRIBUTE_VALUE\n");break;
8061 case XML_PARSER_DTD:
8062 xmlGenericError(xmlGenericErrorContext,
8063 "PP: try DTD\n");break;
8064 case XML_PARSER_EPILOG:
8065 xmlGenericError(xmlGenericErrorContext,
8066 "PP: try EPILOG\n");break;
8067 case XML_PARSER_PI:
8068 xmlGenericError(xmlGenericErrorContext,
8069 "PP: try PI\n");break;
8070 case XML_PARSER_IGNORE:
8071 xmlGenericError(xmlGenericErrorContext,
8072 "PP: try IGNORE\n");break;
8073 }
8074#endif
8075
8076 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008077 SHRINK;
8078
Owen Taylor3473f882001-02-23 17:55:21 +00008079 /*
8080 * Pop-up of finished entities.
8081 */
8082 while ((RAW == 0) && (ctxt->inputNr > 1))
8083 xmlPopInput(ctxt);
8084
8085 if (ctxt->input ==NULL) break;
8086 if (ctxt->input->buf == NULL)
8087 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008088 else {
8089 /*
8090 * If we are operating on converted input, try to flush
8091 * remainng chars to avoid them stalling in the non-converted
8092 * buffer.
8093 */
8094 if ((ctxt->input->buf->raw != NULL) &&
8095 (ctxt->input->buf->raw->use > 0)) {
8096 int base = ctxt->input->base -
8097 ctxt->input->buf->buffer->content;
8098 int current = ctxt->input->cur - ctxt->input->base;
8099
8100 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8101 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8102 ctxt->input->cur = ctxt->input->base + current;
8103 ctxt->input->end =
8104 &ctxt->input->buf->buffer->content[
8105 ctxt->input->buf->buffer->use];
8106 }
8107 avail = ctxt->input->buf->buffer->use -
8108 (ctxt->input->cur - ctxt->input->base);
8109 }
Owen Taylor3473f882001-02-23 17:55:21 +00008110 if (avail < 1)
8111 goto done;
8112 switch (ctxt->instate) {
8113 case XML_PARSER_EOF:
8114 /*
8115 * Document parsing is done !
8116 */
8117 goto done;
8118 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008119 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8120 xmlChar start[4];
8121 xmlCharEncoding enc;
8122
8123 /*
8124 * Very first chars read from the document flow.
8125 */
8126 if (avail < 4)
8127 goto done;
8128
8129 /*
8130 * Get the 4 first bytes and decode the charset
8131 * if enc != XML_CHAR_ENCODING_NONE
8132 * plug some encoding conversion routines.
8133 */
8134 start[0] = RAW;
8135 start[1] = NXT(1);
8136 start[2] = NXT(2);
8137 start[3] = NXT(3);
8138 enc = xmlDetectCharEncoding(start, 4);
8139 if (enc != XML_CHAR_ENCODING_NONE) {
8140 xmlSwitchEncoding(ctxt, enc);
8141 }
8142 break;
8143 }
Owen Taylor3473f882001-02-23 17:55:21 +00008144
8145 cur = ctxt->input->cur[0];
8146 next = ctxt->input->cur[1];
8147 if (cur == 0) {
8148 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8149 ctxt->sax->setDocumentLocator(ctxt->userData,
8150 &xmlDefaultSAXLocator);
8151 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8153 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8154 ctxt->wellFormed = 0;
8155 ctxt->disableSAX = 1;
8156 ctxt->instate = XML_PARSER_EOF;
8157#ifdef DEBUG_PUSH
8158 xmlGenericError(xmlGenericErrorContext,
8159 "PP: entering EOF\n");
8160#endif
8161 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8162 ctxt->sax->endDocument(ctxt->userData);
8163 goto done;
8164 }
8165 if ((cur == '<') && (next == '?')) {
8166 /* PI or XML decl */
8167 if (avail < 5) return(ret);
8168 if ((!terminate) &&
8169 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8170 return(ret);
8171 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8172 ctxt->sax->setDocumentLocator(ctxt->userData,
8173 &xmlDefaultSAXLocator);
8174 if ((ctxt->input->cur[2] == 'x') &&
8175 (ctxt->input->cur[3] == 'm') &&
8176 (ctxt->input->cur[4] == 'l') &&
8177 (IS_BLANK(ctxt->input->cur[5]))) {
8178 ret += 5;
8179#ifdef DEBUG_PUSH
8180 xmlGenericError(xmlGenericErrorContext,
8181 "PP: Parsing XML Decl\n");
8182#endif
8183 xmlParseXMLDecl(ctxt);
8184 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8185 /*
8186 * The XML REC instructs us to stop parsing right
8187 * here
8188 */
8189 ctxt->instate = XML_PARSER_EOF;
8190 return(0);
8191 }
8192 ctxt->standalone = ctxt->input->standalone;
8193 if ((ctxt->encoding == NULL) &&
8194 (ctxt->input->encoding != NULL))
8195 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8196 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8197 (!ctxt->disableSAX))
8198 ctxt->sax->startDocument(ctxt->userData);
8199 ctxt->instate = XML_PARSER_MISC;
8200#ifdef DEBUG_PUSH
8201 xmlGenericError(xmlGenericErrorContext,
8202 "PP: entering MISC\n");
8203#endif
8204 } else {
8205 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8206 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8207 (!ctxt->disableSAX))
8208 ctxt->sax->startDocument(ctxt->userData);
8209 ctxt->instate = XML_PARSER_MISC;
8210#ifdef DEBUG_PUSH
8211 xmlGenericError(xmlGenericErrorContext,
8212 "PP: entering MISC\n");
8213#endif
8214 }
8215 } else {
8216 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8217 ctxt->sax->setDocumentLocator(ctxt->userData,
8218 &xmlDefaultSAXLocator);
8219 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8220 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8221 (!ctxt->disableSAX))
8222 ctxt->sax->startDocument(ctxt->userData);
8223 ctxt->instate = XML_PARSER_MISC;
8224#ifdef DEBUG_PUSH
8225 xmlGenericError(xmlGenericErrorContext,
8226 "PP: entering MISC\n");
8227#endif
8228 }
8229 break;
8230 case XML_PARSER_MISC:
8231 SKIP_BLANKS;
8232 if (ctxt->input->buf == NULL)
8233 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8234 else
8235 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8236 if (avail < 2)
8237 goto done;
8238 cur = ctxt->input->cur[0];
8239 next = ctxt->input->cur[1];
8240 if ((cur == '<') && (next == '?')) {
8241 if ((!terminate) &&
8242 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8243 goto done;
8244#ifdef DEBUG_PUSH
8245 xmlGenericError(xmlGenericErrorContext,
8246 "PP: Parsing PI\n");
8247#endif
8248 xmlParsePI(ctxt);
8249 } else if ((cur == '<') && (next == '!') &&
8250 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8251 if ((!terminate) &&
8252 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8253 goto done;
8254#ifdef DEBUG_PUSH
8255 xmlGenericError(xmlGenericErrorContext,
8256 "PP: Parsing Comment\n");
8257#endif
8258 xmlParseComment(ctxt);
8259 ctxt->instate = XML_PARSER_MISC;
8260 } else if ((cur == '<') && (next == '!') &&
8261 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8262 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8263 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8264 (ctxt->input->cur[8] == 'E')) {
8265 if ((!terminate) &&
8266 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8267 goto done;
8268#ifdef DEBUG_PUSH
8269 xmlGenericError(xmlGenericErrorContext,
8270 "PP: Parsing internal subset\n");
8271#endif
8272 ctxt->inSubset = 1;
8273 xmlParseDocTypeDecl(ctxt);
8274 if (RAW == '[') {
8275 ctxt->instate = XML_PARSER_DTD;
8276#ifdef DEBUG_PUSH
8277 xmlGenericError(xmlGenericErrorContext,
8278 "PP: entering DTD\n");
8279#endif
8280 } else {
8281 /*
8282 * Create and update the external subset.
8283 */
8284 ctxt->inSubset = 2;
8285 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8286 (ctxt->sax->externalSubset != NULL))
8287 ctxt->sax->externalSubset(ctxt->userData,
8288 ctxt->intSubName, ctxt->extSubSystem,
8289 ctxt->extSubURI);
8290 ctxt->inSubset = 0;
8291 ctxt->instate = XML_PARSER_PROLOG;
8292#ifdef DEBUG_PUSH
8293 xmlGenericError(xmlGenericErrorContext,
8294 "PP: entering PROLOG\n");
8295#endif
8296 }
8297 } else if ((cur == '<') && (next == '!') &&
8298 (avail < 9)) {
8299 goto done;
8300 } else {
8301 ctxt->instate = XML_PARSER_START_TAG;
8302#ifdef DEBUG_PUSH
8303 xmlGenericError(xmlGenericErrorContext,
8304 "PP: entering START_TAG\n");
8305#endif
8306 }
8307 break;
8308 case XML_PARSER_IGNORE:
8309 xmlGenericError(xmlGenericErrorContext,
8310 "PP: internal error, state == IGNORE");
8311 ctxt->instate = XML_PARSER_DTD;
8312#ifdef DEBUG_PUSH
8313 xmlGenericError(xmlGenericErrorContext,
8314 "PP: entering DTD\n");
8315#endif
8316 break;
8317 case XML_PARSER_PROLOG:
8318 SKIP_BLANKS;
8319 if (ctxt->input->buf == NULL)
8320 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8321 else
8322 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8323 if (avail < 2)
8324 goto done;
8325 cur = ctxt->input->cur[0];
8326 next = ctxt->input->cur[1];
8327 if ((cur == '<') && (next == '?')) {
8328 if ((!terminate) &&
8329 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8330 goto done;
8331#ifdef DEBUG_PUSH
8332 xmlGenericError(xmlGenericErrorContext,
8333 "PP: Parsing PI\n");
8334#endif
8335 xmlParsePI(ctxt);
8336 } else if ((cur == '<') && (next == '!') &&
8337 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8338 if ((!terminate) &&
8339 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8340 goto done;
8341#ifdef DEBUG_PUSH
8342 xmlGenericError(xmlGenericErrorContext,
8343 "PP: Parsing Comment\n");
8344#endif
8345 xmlParseComment(ctxt);
8346 ctxt->instate = XML_PARSER_PROLOG;
8347 } else if ((cur == '<') && (next == '!') &&
8348 (avail < 4)) {
8349 goto done;
8350 } else {
8351 ctxt->instate = XML_PARSER_START_TAG;
8352#ifdef DEBUG_PUSH
8353 xmlGenericError(xmlGenericErrorContext,
8354 "PP: entering START_TAG\n");
8355#endif
8356 }
8357 break;
8358 case XML_PARSER_EPILOG:
8359 SKIP_BLANKS;
8360 if (ctxt->input->buf == NULL)
8361 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8362 else
8363 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8364 if (avail < 2)
8365 goto done;
8366 cur = ctxt->input->cur[0];
8367 next = ctxt->input->cur[1];
8368 if ((cur == '<') && (next == '?')) {
8369 if ((!terminate) &&
8370 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8371 goto done;
8372#ifdef DEBUG_PUSH
8373 xmlGenericError(xmlGenericErrorContext,
8374 "PP: Parsing PI\n");
8375#endif
8376 xmlParsePI(ctxt);
8377 ctxt->instate = XML_PARSER_EPILOG;
8378 } else if ((cur == '<') && (next == '!') &&
8379 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8380 if ((!terminate) &&
8381 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8382 goto done;
8383#ifdef DEBUG_PUSH
8384 xmlGenericError(xmlGenericErrorContext,
8385 "PP: Parsing Comment\n");
8386#endif
8387 xmlParseComment(ctxt);
8388 ctxt->instate = XML_PARSER_EPILOG;
8389 } else if ((cur == '<') && (next == '!') &&
8390 (avail < 4)) {
8391 goto done;
8392 } else {
8393 ctxt->errNo = XML_ERR_DOCUMENT_END;
8394 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8395 ctxt->sax->error(ctxt->userData,
8396 "Extra content at the end of the document\n");
8397 ctxt->wellFormed = 0;
8398 ctxt->disableSAX = 1;
8399 ctxt->instate = XML_PARSER_EOF;
8400#ifdef DEBUG_PUSH
8401 xmlGenericError(xmlGenericErrorContext,
8402 "PP: entering EOF\n");
8403#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008404 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008405 ctxt->sax->endDocument(ctxt->userData);
8406 goto done;
8407 }
8408 break;
8409 case XML_PARSER_START_TAG: {
8410 xmlChar *name, *oldname;
8411
8412 if ((avail < 2) && (ctxt->inputNr == 1))
8413 goto done;
8414 cur = ctxt->input->cur[0];
8415 if (cur != '<') {
8416 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8417 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8418 ctxt->sax->error(ctxt->userData,
8419 "Start tag expect, '<' not found\n");
8420 ctxt->wellFormed = 0;
8421 ctxt->disableSAX = 1;
8422 ctxt->instate = XML_PARSER_EOF;
8423#ifdef DEBUG_PUSH
8424 xmlGenericError(xmlGenericErrorContext,
8425 "PP: entering EOF\n");
8426#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008427 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008428 ctxt->sax->endDocument(ctxt->userData);
8429 goto done;
8430 }
8431 if ((!terminate) &&
8432 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8433 goto done;
8434 if (ctxt->spaceNr == 0)
8435 spacePush(ctxt, -1);
8436 else
8437 spacePush(ctxt, *ctxt->space);
8438 name = xmlParseStartTag(ctxt);
8439 if (name == NULL) {
8440 spacePop(ctxt);
8441 ctxt->instate = XML_PARSER_EOF;
8442#ifdef DEBUG_PUSH
8443 xmlGenericError(xmlGenericErrorContext,
8444 "PP: entering EOF\n");
8445#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008446 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008447 ctxt->sax->endDocument(ctxt->userData);
8448 goto done;
8449 }
8450 namePush(ctxt, xmlStrdup(name));
8451
8452 /*
8453 * [ VC: Root Element Type ]
8454 * The Name in the document type declaration must match
8455 * the element type of the root element.
8456 */
8457 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8458 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8459 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8460
8461 /*
8462 * Check for an Empty Element.
8463 */
8464 if ((RAW == '/') && (NXT(1) == '>')) {
8465 SKIP(2);
8466 if ((ctxt->sax != NULL) &&
8467 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8468 ctxt->sax->endElement(ctxt->userData, name);
8469 xmlFree(name);
8470 oldname = namePop(ctxt);
8471 spacePop(ctxt);
8472 if (oldname != NULL) {
8473#ifdef DEBUG_STACK
8474 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8475#endif
8476 xmlFree(oldname);
8477 }
8478 if (ctxt->name == NULL) {
8479 ctxt->instate = XML_PARSER_EPILOG;
8480#ifdef DEBUG_PUSH
8481 xmlGenericError(xmlGenericErrorContext,
8482 "PP: entering EPILOG\n");
8483#endif
8484 } else {
8485 ctxt->instate = XML_PARSER_CONTENT;
8486#ifdef DEBUG_PUSH
8487 xmlGenericError(xmlGenericErrorContext,
8488 "PP: entering CONTENT\n");
8489#endif
8490 }
8491 break;
8492 }
8493 if (RAW == '>') {
8494 NEXT;
8495 } else {
8496 ctxt->errNo = XML_ERR_GT_REQUIRED;
8497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8498 ctxt->sax->error(ctxt->userData,
8499 "Couldn't find end of Start Tag %s\n",
8500 name);
8501 ctxt->wellFormed = 0;
8502 ctxt->disableSAX = 1;
8503
8504 /*
8505 * end of parsing of this node.
8506 */
8507 nodePop(ctxt);
8508 oldname = namePop(ctxt);
8509 spacePop(ctxt);
8510 if (oldname != NULL) {
8511#ifdef DEBUG_STACK
8512 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8513#endif
8514 xmlFree(oldname);
8515 }
8516 }
8517 xmlFree(name);
8518 ctxt->instate = XML_PARSER_CONTENT;
8519#ifdef DEBUG_PUSH
8520 xmlGenericError(xmlGenericErrorContext,
8521 "PP: entering CONTENT\n");
8522#endif
8523 break;
8524 }
8525 case XML_PARSER_CONTENT: {
8526 const xmlChar *test;
8527 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008528 if ((avail < 2) && (ctxt->inputNr == 1))
8529 goto done;
8530 cur = ctxt->input->cur[0];
8531 next = ctxt->input->cur[1];
8532
8533 test = CUR_PTR;
8534 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008535 if ((cur == '<') && (next == '?')) {
8536 if ((!terminate) &&
8537 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8538 goto done;
8539#ifdef DEBUG_PUSH
8540 xmlGenericError(xmlGenericErrorContext,
8541 "PP: Parsing PI\n");
8542#endif
8543 xmlParsePI(ctxt);
8544 } else if ((cur == '<') && (next == '!') &&
8545 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8546 if ((!terminate) &&
8547 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8548 goto done;
8549#ifdef DEBUG_PUSH
8550 xmlGenericError(xmlGenericErrorContext,
8551 "PP: Parsing Comment\n");
8552#endif
8553 xmlParseComment(ctxt);
8554 ctxt->instate = XML_PARSER_CONTENT;
8555 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8556 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8557 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8558 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8559 (ctxt->input->cur[8] == '[')) {
8560 SKIP(9);
8561 ctxt->instate = XML_PARSER_CDATA_SECTION;
8562#ifdef DEBUG_PUSH
8563 xmlGenericError(xmlGenericErrorContext,
8564 "PP: entering CDATA_SECTION\n");
8565#endif
8566 break;
8567 } else if ((cur == '<') && (next == '!') &&
8568 (avail < 9)) {
8569 goto done;
8570 } else if ((cur == '<') && (next == '/')) {
8571 ctxt->instate = XML_PARSER_END_TAG;
8572#ifdef DEBUG_PUSH
8573 xmlGenericError(xmlGenericErrorContext,
8574 "PP: entering END_TAG\n");
8575#endif
8576 break;
8577 } else if (cur == '<') {
8578 ctxt->instate = XML_PARSER_START_TAG;
8579#ifdef DEBUG_PUSH
8580 xmlGenericError(xmlGenericErrorContext,
8581 "PP: entering START_TAG\n");
8582#endif
8583 break;
8584 } else if (cur == '&') {
8585 if ((!terminate) &&
8586 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8587 goto done;
8588#ifdef DEBUG_PUSH
8589 xmlGenericError(xmlGenericErrorContext,
8590 "PP: Parsing Reference\n");
8591#endif
8592 xmlParseReference(ctxt);
8593 } else {
8594 /* TODO Avoid the extra copy, handle directly !!! */
8595 /*
8596 * Goal of the following test is:
8597 * - minimize calls to the SAX 'character' callback
8598 * when they are mergeable
8599 * - handle an problem for isBlank when we only parse
8600 * a sequence of blank chars and the next one is
8601 * not available to check against '<' presence.
8602 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008603 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008604 * of the parser.
8605 */
8606 if ((ctxt->inputNr == 1) &&
8607 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8608 if ((!terminate) &&
8609 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8610 goto done;
8611 }
8612 ctxt->checkIndex = 0;
8613#ifdef DEBUG_PUSH
8614 xmlGenericError(xmlGenericErrorContext,
8615 "PP: Parsing char data\n");
8616#endif
8617 xmlParseCharData(ctxt, 0);
8618 }
8619 /*
8620 * Pop-up of finished entities.
8621 */
8622 while ((RAW == 0) && (ctxt->inputNr > 1))
8623 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008624 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008625 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8627 ctxt->sax->error(ctxt->userData,
8628 "detected an error in element content\n");
8629 ctxt->wellFormed = 0;
8630 ctxt->disableSAX = 1;
8631 ctxt->instate = XML_PARSER_EOF;
8632 break;
8633 }
8634 break;
8635 }
8636 case XML_PARSER_CDATA_SECTION: {
8637 /*
8638 * The Push mode need to have the SAX callback for
8639 * cdataBlock merge back contiguous callbacks.
8640 */
8641 int base;
8642
8643 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8644 if (base < 0) {
8645 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8646 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8647 if (ctxt->sax->cdataBlock != NULL)
8648 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8649 XML_PARSER_BIG_BUFFER_SIZE);
8650 }
8651 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8652 ctxt->checkIndex = 0;
8653 }
8654 goto done;
8655 } else {
8656 if ((ctxt->sax != NULL) && (base > 0) &&
8657 (!ctxt->disableSAX)) {
8658 if (ctxt->sax->cdataBlock != NULL)
8659 ctxt->sax->cdataBlock(ctxt->userData,
8660 ctxt->input->cur, base);
8661 }
8662 SKIP(base + 3);
8663 ctxt->checkIndex = 0;
8664 ctxt->instate = XML_PARSER_CONTENT;
8665#ifdef DEBUG_PUSH
8666 xmlGenericError(xmlGenericErrorContext,
8667 "PP: entering CONTENT\n");
8668#endif
8669 }
8670 break;
8671 }
8672 case XML_PARSER_END_TAG:
8673 if (avail < 2)
8674 goto done;
8675 if ((!terminate) &&
8676 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8677 goto done;
8678 xmlParseEndTag(ctxt);
8679 if (ctxt->name == NULL) {
8680 ctxt->instate = XML_PARSER_EPILOG;
8681#ifdef DEBUG_PUSH
8682 xmlGenericError(xmlGenericErrorContext,
8683 "PP: entering EPILOG\n");
8684#endif
8685 } else {
8686 ctxt->instate = XML_PARSER_CONTENT;
8687#ifdef DEBUG_PUSH
8688 xmlGenericError(xmlGenericErrorContext,
8689 "PP: entering CONTENT\n");
8690#endif
8691 }
8692 break;
8693 case XML_PARSER_DTD: {
8694 /*
8695 * Sorry but progressive parsing of the internal subset
8696 * is not expected to be supported. We first check that
8697 * the full content of the internal subset is available and
8698 * the parsing is launched only at that point.
8699 * Internal subset ends up with "']' S? '>'" in an unescaped
8700 * section and not in a ']]>' sequence which are conditional
8701 * sections (whoever argued to keep that crap in XML deserve
8702 * a place in hell !).
8703 */
8704 int base, i;
8705 xmlChar *buf;
8706 xmlChar quote = 0;
8707
8708 base = ctxt->input->cur - ctxt->input->base;
8709 if (base < 0) return(0);
8710 if (ctxt->checkIndex > base)
8711 base = ctxt->checkIndex;
8712 buf = ctxt->input->buf->buffer->content;
8713 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8714 base++) {
8715 if (quote != 0) {
8716 if (buf[base] == quote)
8717 quote = 0;
8718 continue;
8719 }
8720 if (buf[base] == '"') {
8721 quote = '"';
8722 continue;
8723 }
8724 if (buf[base] == '\'') {
8725 quote = '\'';
8726 continue;
8727 }
8728 if (buf[base] == ']') {
8729 if ((unsigned int) base +1 >=
8730 ctxt->input->buf->buffer->use)
8731 break;
8732 if (buf[base + 1] == ']') {
8733 /* conditional crap, skip both ']' ! */
8734 base++;
8735 continue;
8736 }
8737 for (i = 0;
8738 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8739 i++) {
8740 if (buf[base + i] == '>')
8741 goto found_end_int_subset;
8742 }
8743 break;
8744 }
8745 }
8746 /*
8747 * We didn't found the end of the Internal subset
8748 */
8749 if (quote == 0)
8750 ctxt->checkIndex = base;
8751#ifdef DEBUG_PUSH
8752 if (next == 0)
8753 xmlGenericError(xmlGenericErrorContext,
8754 "PP: lookup of int subset end filed\n");
8755#endif
8756 goto done;
8757
8758found_end_int_subset:
8759 xmlParseInternalSubset(ctxt);
8760 ctxt->inSubset = 2;
8761 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8762 (ctxt->sax->externalSubset != NULL))
8763 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8764 ctxt->extSubSystem, ctxt->extSubURI);
8765 ctxt->inSubset = 0;
8766 ctxt->instate = XML_PARSER_PROLOG;
8767 ctxt->checkIndex = 0;
8768#ifdef DEBUG_PUSH
8769 xmlGenericError(xmlGenericErrorContext,
8770 "PP: entering PROLOG\n");
8771#endif
8772 break;
8773 }
8774 case XML_PARSER_COMMENT:
8775 xmlGenericError(xmlGenericErrorContext,
8776 "PP: internal error, state == COMMENT\n");
8777 ctxt->instate = XML_PARSER_CONTENT;
8778#ifdef DEBUG_PUSH
8779 xmlGenericError(xmlGenericErrorContext,
8780 "PP: entering CONTENT\n");
8781#endif
8782 break;
8783 case XML_PARSER_PI:
8784 xmlGenericError(xmlGenericErrorContext,
8785 "PP: internal error, state == PI\n");
8786 ctxt->instate = XML_PARSER_CONTENT;
8787#ifdef DEBUG_PUSH
8788 xmlGenericError(xmlGenericErrorContext,
8789 "PP: entering CONTENT\n");
8790#endif
8791 break;
8792 case XML_PARSER_ENTITY_DECL:
8793 xmlGenericError(xmlGenericErrorContext,
8794 "PP: internal error, state == ENTITY_DECL\n");
8795 ctxt->instate = XML_PARSER_DTD;
8796#ifdef DEBUG_PUSH
8797 xmlGenericError(xmlGenericErrorContext,
8798 "PP: entering DTD\n");
8799#endif
8800 break;
8801 case XML_PARSER_ENTITY_VALUE:
8802 xmlGenericError(xmlGenericErrorContext,
8803 "PP: internal error, state == ENTITY_VALUE\n");
8804 ctxt->instate = XML_PARSER_CONTENT;
8805#ifdef DEBUG_PUSH
8806 xmlGenericError(xmlGenericErrorContext,
8807 "PP: entering DTD\n");
8808#endif
8809 break;
8810 case XML_PARSER_ATTRIBUTE_VALUE:
8811 xmlGenericError(xmlGenericErrorContext,
8812 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8813 ctxt->instate = XML_PARSER_START_TAG;
8814#ifdef DEBUG_PUSH
8815 xmlGenericError(xmlGenericErrorContext,
8816 "PP: entering START_TAG\n");
8817#endif
8818 break;
8819 case XML_PARSER_SYSTEM_LITERAL:
8820 xmlGenericError(xmlGenericErrorContext,
8821 "PP: internal error, state == SYSTEM_LITERAL\n");
8822 ctxt->instate = XML_PARSER_START_TAG;
8823#ifdef DEBUG_PUSH
8824 xmlGenericError(xmlGenericErrorContext,
8825 "PP: entering START_TAG\n");
8826#endif
8827 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008828 case XML_PARSER_PUBLIC_LITERAL:
8829 xmlGenericError(xmlGenericErrorContext,
8830 "PP: internal error, state == PUBLIC_LITERAL\n");
8831 ctxt->instate = XML_PARSER_START_TAG;
8832#ifdef DEBUG_PUSH
8833 xmlGenericError(xmlGenericErrorContext,
8834 "PP: entering START_TAG\n");
8835#endif
8836 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008837 }
8838 }
8839done:
8840#ifdef DEBUG_PUSH
8841 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8842#endif
8843 return(ret);
8844}
8845
8846/**
Owen Taylor3473f882001-02-23 17:55:21 +00008847 * xmlParseChunk:
8848 * @ctxt: an XML parser context
8849 * @chunk: an char array
8850 * @size: the size in byte of the chunk
8851 * @terminate: last chunk indicator
8852 *
8853 * Parse a Chunk of memory
8854 *
8855 * Returns zero if no error, the xmlParserErrors otherwise.
8856 */
8857int
8858xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8859 int terminate) {
8860 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8861 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8862 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8863 int cur = ctxt->input->cur - ctxt->input->base;
8864
8865 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8866 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8867 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008868 ctxt->input->end =
8869 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008870#ifdef DEBUG_PUSH
8871 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8872#endif
8873
8874 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8875 xmlParseTryOrFinish(ctxt, terminate);
8876 } else if (ctxt->instate != XML_PARSER_EOF) {
8877 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8878 xmlParserInputBufferPtr in = ctxt->input->buf;
8879 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8880 (in->raw != NULL)) {
8881 int nbchars;
8882
8883 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8884 if (nbchars < 0) {
8885 xmlGenericError(xmlGenericErrorContext,
8886 "xmlParseChunk: encoder error\n");
8887 return(XML_ERR_INVALID_ENCODING);
8888 }
8889 }
8890 }
8891 }
8892 xmlParseTryOrFinish(ctxt, terminate);
8893 if (terminate) {
8894 /*
8895 * Check for termination
8896 */
8897 if ((ctxt->instate != XML_PARSER_EOF) &&
8898 (ctxt->instate != XML_PARSER_EPILOG)) {
8899 ctxt->errNo = XML_ERR_DOCUMENT_END;
8900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8901 ctxt->sax->error(ctxt->userData,
8902 "Extra content at the end of the document\n");
8903 ctxt->wellFormed = 0;
8904 ctxt->disableSAX = 1;
8905 }
8906 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008907 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008908 ctxt->sax->endDocument(ctxt->userData);
8909 }
8910 ctxt->instate = XML_PARSER_EOF;
8911 }
8912 return((xmlParserErrors) ctxt->errNo);
8913}
8914
8915/************************************************************************
8916 * *
8917 * I/O front end functions to the parser *
8918 * *
8919 ************************************************************************/
8920
8921/**
8922 * xmlStopParser:
8923 * @ctxt: an XML parser context
8924 *
8925 * Blocks further parser processing
8926 */
8927void
8928xmlStopParser(xmlParserCtxtPtr ctxt) {
8929 ctxt->instate = XML_PARSER_EOF;
8930 if (ctxt->input != NULL)
8931 ctxt->input->cur = BAD_CAST"";
8932}
8933
8934/**
8935 * xmlCreatePushParserCtxt:
8936 * @sax: a SAX handler
8937 * @user_data: The user data returned on SAX callbacks
8938 * @chunk: a pointer to an array of chars
8939 * @size: number of chars in the array
8940 * @filename: an optional file name or URI
8941 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00008942 * Create a parser context for using the XML parser in push mode.
8943 * If @buffer and @size are non-NULL, the data is used to detect
8944 * the encoding. The remaining characters will be parsed so they
8945 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00008946 * To allow content encoding detection, @size should be >= 4
8947 * The value of @filename is used for fetching external entities
8948 * and error/warning reports.
8949 *
8950 * Returns the new parser context or NULL
8951 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00008952
Owen Taylor3473f882001-02-23 17:55:21 +00008953xmlParserCtxtPtr
8954xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8955 const char *chunk, int size, const char *filename) {
8956 xmlParserCtxtPtr ctxt;
8957 xmlParserInputPtr inputStream;
8958 xmlParserInputBufferPtr buf;
8959 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8960
8961 /*
8962 * plug some encoding conversion routines
8963 */
8964 if ((chunk != NULL) && (size >= 4))
8965 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8966
8967 buf = xmlAllocParserInputBuffer(enc);
8968 if (buf == NULL) return(NULL);
8969
8970 ctxt = xmlNewParserCtxt();
8971 if (ctxt == NULL) {
8972 xmlFree(buf);
8973 return(NULL);
8974 }
8975 if (sax != NULL) {
8976 if (ctxt->sax != &xmlDefaultSAXHandler)
8977 xmlFree(ctxt->sax);
8978 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8979 if (ctxt->sax == NULL) {
8980 xmlFree(buf);
8981 xmlFree(ctxt);
8982 return(NULL);
8983 }
8984 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8985 if (user_data != NULL)
8986 ctxt->userData = user_data;
8987 }
8988 if (filename == NULL) {
8989 ctxt->directory = NULL;
8990 } else {
8991 ctxt->directory = xmlParserGetDirectory(filename);
8992 }
8993
8994 inputStream = xmlNewInputStream(ctxt);
8995 if (inputStream == NULL) {
8996 xmlFreeParserCtxt(ctxt);
8997 return(NULL);
8998 }
8999
9000 if (filename == NULL)
9001 inputStream->filename = NULL;
9002 else
9003 inputStream->filename = xmlMemStrdup(filename);
9004 inputStream->buf = buf;
9005 inputStream->base = inputStream->buf->buffer->content;
9006 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009007 inputStream->end =
9008 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009009
9010 inputPush(ctxt, inputStream);
9011
9012 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9013 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009014 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9015 int cur = ctxt->input->cur - ctxt->input->base;
9016
Owen Taylor3473f882001-02-23 17:55:21 +00009017 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009018
9019 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9020 ctxt->input->cur = ctxt->input->base + cur;
9021 ctxt->input->end =
9022 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009023#ifdef DEBUG_PUSH
9024 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9025#endif
9026 }
9027
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009028 if (enc != XML_CHAR_ENCODING_NONE) {
9029 xmlSwitchEncoding(ctxt, enc);
9030 }
9031
Owen Taylor3473f882001-02-23 17:55:21 +00009032 return(ctxt);
9033}
9034
9035/**
9036 * xmlCreateIOParserCtxt:
9037 * @sax: a SAX handler
9038 * @user_data: The user data returned on SAX callbacks
9039 * @ioread: an I/O read function
9040 * @ioclose: an I/O close function
9041 * @ioctx: an I/O handler
9042 * @enc: the charset encoding if known
9043 *
9044 * Create a parser context for using the XML parser with an existing
9045 * I/O stream
9046 *
9047 * Returns the new parser context or NULL
9048 */
9049xmlParserCtxtPtr
9050xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9051 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9052 void *ioctx, xmlCharEncoding enc) {
9053 xmlParserCtxtPtr ctxt;
9054 xmlParserInputPtr inputStream;
9055 xmlParserInputBufferPtr buf;
9056
9057 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9058 if (buf == NULL) return(NULL);
9059
9060 ctxt = xmlNewParserCtxt();
9061 if (ctxt == NULL) {
9062 xmlFree(buf);
9063 return(NULL);
9064 }
9065 if (sax != NULL) {
9066 if (ctxt->sax != &xmlDefaultSAXHandler)
9067 xmlFree(ctxt->sax);
9068 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9069 if (ctxt->sax == NULL) {
9070 xmlFree(buf);
9071 xmlFree(ctxt);
9072 return(NULL);
9073 }
9074 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9075 if (user_data != NULL)
9076 ctxt->userData = user_data;
9077 }
9078
9079 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9080 if (inputStream == NULL) {
9081 xmlFreeParserCtxt(ctxt);
9082 return(NULL);
9083 }
9084 inputPush(ctxt, inputStream);
9085
9086 return(ctxt);
9087}
9088
9089/************************************************************************
9090 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009091 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009092 * *
9093 ************************************************************************/
9094
9095/**
9096 * xmlIOParseDTD:
9097 * @sax: the SAX handler block or NULL
9098 * @input: an Input Buffer
9099 * @enc: the charset encoding if known
9100 *
9101 * Load and parse a DTD
9102 *
9103 * Returns the resulting xmlDtdPtr or NULL in case of error.
9104 * @input will be freed at parsing end.
9105 */
9106
9107xmlDtdPtr
9108xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9109 xmlCharEncoding enc) {
9110 xmlDtdPtr ret = NULL;
9111 xmlParserCtxtPtr ctxt;
9112 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009113 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009114
9115 if (input == NULL)
9116 return(NULL);
9117
9118 ctxt = xmlNewParserCtxt();
9119 if (ctxt == NULL) {
9120 return(NULL);
9121 }
9122
9123 /*
9124 * Set-up the SAX context
9125 */
9126 if (sax != NULL) {
9127 if (ctxt->sax != NULL)
9128 xmlFree(ctxt->sax);
9129 ctxt->sax = sax;
9130 ctxt->userData = NULL;
9131 }
9132
9133 /*
9134 * generate a parser input from the I/O handler
9135 */
9136
9137 pinput = xmlNewIOInputStream(ctxt, input, enc);
9138 if (pinput == NULL) {
9139 if (sax != NULL) ctxt->sax = NULL;
9140 xmlFreeParserCtxt(ctxt);
9141 return(NULL);
9142 }
9143
9144 /*
9145 * plug some encoding conversion routines here.
9146 */
9147 xmlPushInput(ctxt, pinput);
9148
9149 pinput->filename = NULL;
9150 pinput->line = 1;
9151 pinput->col = 1;
9152 pinput->base = ctxt->input->cur;
9153 pinput->cur = ctxt->input->cur;
9154 pinput->free = NULL;
9155
9156 /*
9157 * let's parse that entity knowing it's an external subset.
9158 */
9159 ctxt->inSubset = 2;
9160 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9161 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9162 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009163
9164 if (enc == XML_CHAR_ENCODING_NONE) {
9165 /*
9166 * Get the 4 first bytes and decode the charset
9167 * if enc != XML_CHAR_ENCODING_NONE
9168 * plug some encoding conversion routines.
9169 */
9170 start[0] = RAW;
9171 start[1] = NXT(1);
9172 start[2] = NXT(2);
9173 start[3] = NXT(3);
9174 enc = xmlDetectCharEncoding(start, 4);
9175 if (enc != XML_CHAR_ENCODING_NONE) {
9176 xmlSwitchEncoding(ctxt, enc);
9177 }
9178 }
9179
Owen Taylor3473f882001-02-23 17:55:21 +00009180 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9181
9182 if (ctxt->myDoc != NULL) {
9183 if (ctxt->wellFormed) {
9184 ret = ctxt->myDoc->extSubset;
9185 ctxt->myDoc->extSubset = NULL;
9186 } else {
9187 ret = NULL;
9188 }
9189 xmlFreeDoc(ctxt->myDoc);
9190 ctxt->myDoc = NULL;
9191 }
9192 if (sax != NULL) ctxt->sax = NULL;
9193 xmlFreeParserCtxt(ctxt);
9194
9195 return(ret);
9196}
9197
9198/**
9199 * xmlSAXParseDTD:
9200 * @sax: the SAX handler block
9201 * @ExternalID: a NAME* containing the External ID of the DTD
9202 * @SystemID: a NAME* containing the URL to the DTD
9203 *
9204 * Load and parse an external subset.
9205 *
9206 * Returns the resulting xmlDtdPtr or NULL in case of error.
9207 */
9208
9209xmlDtdPtr
9210xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9211 const xmlChar *SystemID) {
9212 xmlDtdPtr ret = NULL;
9213 xmlParserCtxtPtr ctxt;
9214 xmlParserInputPtr input = NULL;
9215 xmlCharEncoding enc;
9216
9217 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9218
9219 ctxt = xmlNewParserCtxt();
9220 if (ctxt == NULL) {
9221 return(NULL);
9222 }
9223
9224 /*
9225 * Set-up the SAX context
9226 */
9227 if (sax != NULL) {
9228 if (ctxt->sax != NULL)
9229 xmlFree(ctxt->sax);
9230 ctxt->sax = sax;
9231 ctxt->userData = NULL;
9232 }
9233
9234 /*
9235 * Ask the Entity resolver to load the damn thing
9236 */
9237
9238 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9239 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9240 if (input == NULL) {
9241 if (sax != NULL) ctxt->sax = NULL;
9242 xmlFreeParserCtxt(ctxt);
9243 return(NULL);
9244 }
9245
9246 /*
9247 * plug some encoding conversion routines here.
9248 */
9249 xmlPushInput(ctxt, input);
9250 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9251 xmlSwitchEncoding(ctxt, enc);
9252
9253 if (input->filename == NULL)
9254 input->filename = (char *) xmlStrdup(SystemID);
9255 input->line = 1;
9256 input->col = 1;
9257 input->base = ctxt->input->cur;
9258 input->cur = ctxt->input->cur;
9259 input->free = NULL;
9260
9261 /*
9262 * let's parse that entity knowing it's an external subset.
9263 */
9264 ctxt->inSubset = 2;
9265 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9266 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9267 ExternalID, SystemID);
9268 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9269
9270 if (ctxt->myDoc != NULL) {
9271 if (ctxt->wellFormed) {
9272 ret = ctxt->myDoc->extSubset;
9273 ctxt->myDoc->extSubset = NULL;
9274 } else {
9275 ret = NULL;
9276 }
9277 xmlFreeDoc(ctxt->myDoc);
9278 ctxt->myDoc = NULL;
9279 }
9280 if (sax != NULL) ctxt->sax = NULL;
9281 xmlFreeParserCtxt(ctxt);
9282
9283 return(ret);
9284}
9285
9286/**
9287 * xmlParseDTD:
9288 * @ExternalID: a NAME* containing the External ID of the DTD
9289 * @SystemID: a NAME* containing the URL to the DTD
9290 *
9291 * Load and parse an external subset.
9292 *
9293 * Returns the resulting xmlDtdPtr or NULL in case of error.
9294 */
9295
9296xmlDtdPtr
9297xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9298 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9299}
9300
9301/************************************************************************
9302 * *
9303 * Front ends when parsing an Entity *
9304 * *
9305 ************************************************************************/
9306
9307/**
Owen Taylor3473f882001-02-23 17:55:21 +00009308 * xmlParseCtxtExternalEntity:
9309 * @ctx: the existing parsing context
9310 * @URL: the URL for the entity to load
9311 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009312 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009313 *
9314 * Parse an external general entity within an existing parsing context
9315 * An external general parsed entity is well-formed if it matches the
9316 * production labeled extParsedEnt.
9317 *
9318 * [78] extParsedEnt ::= TextDecl? content
9319 *
9320 * Returns 0 if the entity is well formed, -1 in case of args problem and
9321 * the parser error code otherwise
9322 */
9323
9324int
9325xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009326 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009327 xmlParserCtxtPtr ctxt;
9328 xmlDocPtr newDoc;
9329 xmlSAXHandlerPtr oldsax = NULL;
9330 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009331 xmlChar start[4];
9332 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009333
9334 if (ctx->depth > 40) {
9335 return(XML_ERR_ENTITY_LOOP);
9336 }
9337
Daniel Veillardcda96922001-08-21 10:56:31 +00009338 if (lst != NULL)
9339 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009340 if ((URL == NULL) && (ID == NULL))
9341 return(-1);
9342 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9343 return(-1);
9344
9345
9346 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9347 if (ctxt == NULL) return(-1);
9348 ctxt->userData = ctxt;
9349 oldsax = ctxt->sax;
9350 ctxt->sax = ctx->sax;
9351 newDoc = xmlNewDoc(BAD_CAST "1.0");
9352 if (newDoc == NULL) {
9353 xmlFreeParserCtxt(ctxt);
9354 return(-1);
9355 }
9356 if (ctx->myDoc != NULL) {
9357 newDoc->intSubset = ctx->myDoc->intSubset;
9358 newDoc->extSubset = ctx->myDoc->extSubset;
9359 }
9360 if (ctx->myDoc->URL != NULL) {
9361 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9362 }
9363 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9364 if (newDoc->children == NULL) {
9365 ctxt->sax = oldsax;
9366 xmlFreeParserCtxt(ctxt);
9367 newDoc->intSubset = NULL;
9368 newDoc->extSubset = NULL;
9369 xmlFreeDoc(newDoc);
9370 return(-1);
9371 }
9372 nodePush(ctxt, newDoc->children);
9373 if (ctx->myDoc == NULL) {
9374 ctxt->myDoc = newDoc;
9375 } else {
9376 ctxt->myDoc = ctx->myDoc;
9377 newDoc->children->doc = ctx->myDoc;
9378 }
9379
Daniel Veillard87a764e2001-06-20 17:41:10 +00009380 /*
9381 * Get the 4 first bytes and decode the charset
9382 * if enc != XML_CHAR_ENCODING_NONE
9383 * plug some encoding conversion routines.
9384 */
9385 GROW
9386 start[0] = RAW;
9387 start[1] = NXT(1);
9388 start[2] = NXT(2);
9389 start[3] = NXT(3);
9390 enc = xmlDetectCharEncoding(start, 4);
9391 if (enc != XML_CHAR_ENCODING_NONE) {
9392 xmlSwitchEncoding(ctxt, enc);
9393 }
9394
Owen Taylor3473f882001-02-23 17:55:21 +00009395 /*
9396 * Parse a possible text declaration first
9397 */
Owen Taylor3473f882001-02-23 17:55:21 +00009398 if ((RAW == '<') && (NXT(1) == '?') &&
9399 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9400 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9401 xmlParseTextDecl(ctxt);
9402 }
9403
9404 /*
9405 * Doing validity checking on chunk doesn't make sense
9406 */
9407 ctxt->instate = XML_PARSER_CONTENT;
9408 ctxt->validate = ctx->validate;
9409 ctxt->loadsubset = ctx->loadsubset;
9410 ctxt->depth = ctx->depth + 1;
9411 ctxt->replaceEntities = ctx->replaceEntities;
9412 if (ctxt->validate) {
9413 ctxt->vctxt.error = ctx->vctxt.error;
9414 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009415 } else {
9416 ctxt->vctxt.error = NULL;
9417 ctxt->vctxt.warning = NULL;
9418 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009419 ctxt->vctxt.nodeTab = NULL;
9420 ctxt->vctxt.nodeNr = 0;
9421 ctxt->vctxt.nodeMax = 0;
9422 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009423
9424 xmlParseContent(ctxt);
9425
9426 if ((RAW == '<') && (NXT(1) == '/')) {
9427 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9429 ctxt->sax->error(ctxt->userData,
9430 "chunk is not well balanced\n");
9431 ctxt->wellFormed = 0;
9432 ctxt->disableSAX = 1;
9433 } else if (RAW != 0) {
9434 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9436 ctxt->sax->error(ctxt->userData,
9437 "extra content at the end of well balanced chunk\n");
9438 ctxt->wellFormed = 0;
9439 ctxt->disableSAX = 1;
9440 }
9441 if (ctxt->node != newDoc->children) {
9442 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9444 ctxt->sax->error(ctxt->userData,
9445 "chunk is not well balanced\n");
9446 ctxt->wellFormed = 0;
9447 ctxt->disableSAX = 1;
9448 }
9449
9450 if (!ctxt->wellFormed) {
9451 if (ctxt->errNo == 0)
9452 ret = 1;
9453 else
9454 ret = ctxt->errNo;
9455 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009456 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009457 xmlNodePtr cur;
9458
9459 /*
9460 * Return the newly created nodeset after unlinking it from
9461 * they pseudo parent.
9462 */
9463 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009464 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009465 while (cur != NULL) {
9466 cur->parent = NULL;
9467 cur = cur->next;
9468 }
9469 newDoc->children->children = NULL;
9470 }
9471 ret = 0;
9472 }
9473 ctxt->sax = oldsax;
9474 xmlFreeParserCtxt(ctxt);
9475 newDoc->intSubset = NULL;
9476 newDoc->extSubset = NULL;
9477 xmlFreeDoc(newDoc);
9478
9479 return(ret);
9480}
9481
9482/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009483 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009484 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009485 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009486 * @sax: the SAX handler bloc (possibly NULL)
9487 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9488 * @depth: Used for loop detection, use 0
9489 * @URL: the URL for the entity to load
9490 * @ID: the System ID for the entity to load
9491 * @list: the return value for the set of parsed nodes
9492 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009493 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009494 *
9495 * Returns 0 if the entity is well formed, -1 in case of args problem and
9496 * the parser error code otherwise
9497 */
9498
Daniel Veillard257d9102001-05-08 10:41:44 +00009499static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009500xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9501 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009502 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009503 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009504 xmlParserCtxtPtr ctxt;
9505 xmlDocPtr newDoc;
9506 xmlSAXHandlerPtr oldsax = NULL;
9507 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009508 xmlChar start[4];
9509 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009510
9511 if (depth > 40) {
9512 return(XML_ERR_ENTITY_LOOP);
9513 }
9514
9515
9516
9517 if (list != NULL)
9518 *list = NULL;
9519 if ((URL == NULL) && (ID == NULL))
9520 return(-1);
9521 if (doc == NULL) /* @@ relax but check for dereferences */
9522 return(-1);
9523
9524
9525 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9526 if (ctxt == NULL) return(-1);
9527 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009528 if (oldctxt != NULL) {
9529 ctxt->_private = oldctxt->_private;
9530 ctxt->loadsubset = oldctxt->loadsubset;
9531 ctxt->validate = oldctxt->validate;
9532 ctxt->external = oldctxt->external;
9533 } else {
9534 /*
9535 * Doing validity checking on chunk without context
9536 * doesn't make sense
9537 */
9538 ctxt->_private = NULL;
9539 ctxt->validate = 0;
9540 ctxt->external = 2;
9541 ctxt->loadsubset = 0;
9542 }
Owen Taylor3473f882001-02-23 17:55:21 +00009543 if (sax != NULL) {
9544 oldsax = ctxt->sax;
9545 ctxt->sax = sax;
9546 if (user_data != NULL)
9547 ctxt->userData = user_data;
9548 }
9549 newDoc = xmlNewDoc(BAD_CAST "1.0");
9550 if (newDoc == NULL) {
9551 xmlFreeParserCtxt(ctxt);
9552 return(-1);
9553 }
9554 if (doc != NULL) {
9555 newDoc->intSubset = doc->intSubset;
9556 newDoc->extSubset = doc->extSubset;
9557 }
9558 if (doc->URL != NULL) {
9559 newDoc->URL = xmlStrdup(doc->URL);
9560 }
9561 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9562 if (newDoc->children == NULL) {
9563 if (sax != NULL)
9564 ctxt->sax = oldsax;
9565 xmlFreeParserCtxt(ctxt);
9566 newDoc->intSubset = NULL;
9567 newDoc->extSubset = NULL;
9568 xmlFreeDoc(newDoc);
9569 return(-1);
9570 }
9571 nodePush(ctxt, newDoc->children);
9572 if (doc == NULL) {
9573 ctxt->myDoc = newDoc;
9574 } else {
9575 ctxt->myDoc = doc;
9576 newDoc->children->doc = doc;
9577 }
9578
Daniel Veillard87a764e2001-06-20 17:41:10 +00009579 /*
9580 * Get the 4 first bytes and decode the charset
9581 * if enc != XML_CHAR_ENCODING_NONE
9582 * plug some encoding conversion routines.
9583 */
9584 GROW;
9585 start[0] = RAW;
9586 start[1] = NXT(1);
9587 start[2] = NXT(2);
9588 start[3] = NXT(3);
9589 enc = xmlDetectCharEncoding(start, 4);
9590 if (enc != XML_CHAR_ENCODING_NONE) {
9591 xmlSwitchEncoding(ctxt, enc);
9592 }
9593
Owen Taylor3473f882001-02-23 17:55:21 +00009594 /*
9595 * Parse a possible text declaration first
9596 */
Owen Taylor3473f882001-02-23 17:55:21 +00009597 if ((RAW == '<') && (NXT(1) == '?') &&
9598 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9599 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9600 xmlParseTextDecl(ctxt);
9601 }
9602
Owen Taylor3473f882001-02-23 17:55:21 +00009603 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009604 ctxt->depth = depth;
9605
9606 xmlParseContent(ctxt);
9607
Daniel Veillard561b7f82002-03-20 21:55:57 +00009608 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009609 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9611 ctxt->sax->error(ctxt->userData,
9612 "chunk is not well balanced\n");
9613 ctxt->wellFormed = 0;
9614 ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009615 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009616 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9618 ctxt->sax->error(ctxt->userData,
9619 "extra content at the end of well balanced chunk\n");
9620 ctxt->wellFormed = 0;
9621 ctxt->disableSAX = 1;
9622 }
9623 if (ctxt->node != newDoc->children) {
9624 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9626 ctxt->sax->error(ctxt->userData,
9627 "chunk is not well balanced\n");
9628 ctxt->wellFormed = 0;
9629 ctxt->disableSAX = 1;
9630 }
9631
9632 if (!ctxt->wellFormed) {
9633 if (ctxt->errNo == 0)
9634 ret = 1;
9635 else
9636 ret = ctxt->errNo;
9637 } else {
9638 if (list != NULL) {
9639 xmlNodePtr cur;
9640
9641 /*
9642 * Return the newly created nodeset after unlinking it from
9643 * they pseudo parent.
9644 */
9645 cur = newDoc->children->children;
9646 *list = cur;
9647 while (cur != NULL) {
9648 cur->parent = NULL;
9649 cur = cur->next;
9650 }
9651 newDoc->children->children = NULL;
9652 }
9653 ret = 0;
9654 }
9655 if (sax != NULL)
9656 ctxt->sax = oldsax;
9657 xmlFreeParserCtxt(ctxt);
9658 newDoc->intSubset = NULL;
9659 newDoc->extSubset = NULL;
9660 xmlFreeDoc(newDoc);
9661
9662 return(ret);
9663}
9664
9665/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009666 * xmlParseExternalEntity:
9667 * @doc: the document the chunk pertains to
9668 * @sax: the SAX handler bloc (possibly NULL)
9669 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9670 * @depth: Used for loop detection, use 0
9671 * @URL: the URL for the entity to load
9672 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009673 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009674 *
9675 * Parse an external general entity
9676 * An external general parsed entity is well-formed if it matches the
9677 * production labeled extParsedEnt.
9678 *
9679 * [78] extParsedEnt ::= TextDecl? content
9680 *
9681 * Returns 0 if the entity is well formed, -1 in case of args problem and
9682 * the parser error code otherwise
9683 */
9684
9685int
9686xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009687 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009688 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009689 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009690}
9691
9692/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009693 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009694 * @doc: the document the chunk pertains to
9695 * @sax: the SAX handler bloc (possibly NULL)
9696 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9697 * @depth: Used for loop detection, use 0
9698 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009699 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009700 *
9701 * Parse a well-balanced chunk of an XML document
9702 * called by the parser
9703 * The allowed sequence for the Well Balanced Chunk is the one defined by
9704 * the content production in the XML grammar:
9705 *
9706 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9707 *
9708 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9709 * the parser error code otherwise
9710 */
9711
9712int
9713xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009714 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009715 xmlParserCtxtPtr ctxt;
9716 xmlDocPtr newDoc;
9717 xmlSAXHandlerPtr oldsax = NULL;
9718 int size;
9719 int ret = 0;
9720
9721 if (depth > 40) {
9722 return(XML_ERR_ENTITY_LOOP);
9723 }
9724
9725
Daniel Veillardcda96922001-08-21 10:56:31 +00009726 if (lst != NULL)
9727 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009728 if (string == NULL)
9729 return(-1);
9730
9731 size = xmlStrlen(string);
9732
9733 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9734 if (ctxt == NULL) return(-1);
9735 ctxt->userData = ctxt;
9736 if (sax != NULL) {
9737 oldsax = ctxt->sax;
9738 ctxt->sax = sax;
9739 if (user_data != NULL)
9740 ctxt->userData = user_data;
9741 }
9742 newDoc = xmlNewDoc(BAD_CAST "1.0");
9743 if (newDoc == NULL) {
9744 xmlFreeParserCtxt(ctxt);
9745 return(-1);
9746 }
9747 if (doc != NULL) {
9748 newDoc->intSubset = doc->intSubset;
9749 newDoc->extSubset = doc->extSubset;
9750 }
9751 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9752 if (newDoc->children == NULL) {
9753 if (sax != NULL)
9754 ctxt->sax = oldsax;
9755 xmlFreeParserCtxt(ctxt);
9756 newDoc->intSubset = NULL;
9757 newDoc->extSubset = NULL;
9758 xmlFreeDoc(newDoc);
9759 return(-1);
9760 }
9761 nodePush(ctxt, newDoc->children);
9762 if (doc == NULL) {
9763 ctxt->myDoc = newDoc;
9764 } else {
9765 ctxt->myDoc = doc;
9766 newDoc->children->doc = doc;
9767 }
9768 ctxt->instate = XML_PARSER_CONTENT;
9769 ctxt->depth = depth;
9770
9771 /*
9772 * Doing validity checking on chunk doesn't make sense
9773 */
9774 ctxt->validate = 0;
9775 ctxt->loadsubset = 0;
9776
9777 xmlParseContent(ctxt);
9778
9779 if ((RAW == '<') && (NXT(1) == '/')) {
9780 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9782 ctxt->sax->error(ctxt->userData,
9783 "chunk is not well balanced\n");
9784 ctxt->wellFormed = 0;
9785 ctxt->disableSAX = 1;
9786 } else if (RAW != 0) {
9787 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9789 ctxt->sax->error(ctxt->userData,
9790 "extra content at the end of well balanced chunk\n");
9791 ctxt->wellFormed = 0;
9792 ctxt->disableSAX = 1;
9793 }
9794 if (ctxt->node != newDoc->children) {
9795 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9797 ctxt->sax->error(ctxt->userData,
9798 "chunk is not well balanced\n");
9799 ctxt->wellFormed = 0;
9800 ctxt->disableSAX = 1;
9801 }
9802
9803 if (!ctxt->wellFormed) {
9804 if (ctxt->errNo == 0)
9805 ret = 1;
9806 else
9807 ret = ctxt->errNo;
9808 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009809 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009810 xmlNodePtr cur;
9811
9812 /*
9813 * Return the newly created nodeset after unlinking it from
9814 * they pseudo parent.
9815 */
9816 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009817 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009818 while (cur != NULL) {
9819 cur->parent = NULL;
9820 cur = cur->next;
9821 }
9822 newDoc->children->children = NULL;
9823 }
9824 ret = 0;
9825 }
9826 if (sax != NULL)
9827 ctxt->sax = oldsax;
9828 xmlFreeParserCtxt(ctxt);
9829 newDoc->intSubset = NULL;
9830 newDoc->extSubset = NULL;
9831 xmlFreeDoc(newDoc);
9832
9833 return(ret);
9834}
9835
9836/**
9837 * xmlSAXParseEntity:
9838 * @sax: the SAX handler block
9839 * @filename: the filename
9840 *
9841 * parse an XML external entity out of context and build a tree.
9842 * It use the given SAX function block to handle the parsing callback.
9843 * If sax is NULL, fallback to the default DOM tree building routines.
9844 *
9845 * [78] extParsedEnt ::= TextDecl? content
9846 *
9847 * This correspond to a "Well Balanced" chunk
9848 *
9849 * Returns the resulting document tree
9850 */
9851
9852xmlDocPtr
9853xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9854 xmlDocPtr ret;
9855 xmlParserCtxtPtr ctxt;
9856 char *directory = NULL;
9857
9858 ctxt = xmlCreateFileParserCtxt(filename);
9859 if (ctxt == NULL) {
9860 return(NULL);
9861 }
9862 if (sax != NULL) {
9863 if (ctxt->sax != NULL)
9864 xmlFree(ctxt->sax);
9865 ctxt->sax = sax;
9866 ctxt->userData = NULL;
9867 }
9868
9869 if ((ctxt->directory == NULL) && (directory == NULL))
9870 directory = xmlParserGetDirectory(filename);
9871
9872 xmlParseExtParsedEnt(ctxt);
9873
9874 if (ctxt->wellFormed)
9875 ret = ctxt->myDoc;
9876 else {
9877 ret = NULL;
9878 xmlFreeDoc(ctxt->myDoc);
9879 ctxt->myDoc = NULL;
9880 }
9881 if (sax != NULL)
9882 ctxt->sax = NULL;
9883 xmlFreeParserCtxt(ctxt);
9884
9885 return(ret);
9886}
9887
9888/**
9889 * xmlParseEntity:
9890 * @filename: the filename
9891 *
9892 * parse an XML external entity out of context and build a tree.
9893 *
9894 * [78] extParsedEnt ::= TextDecl? content
9895 *
9896 * This correspond to a "Well Balanced" chunk
9897 *
9898 * Returns the resulting document tree
9899 */
9900
9901xmlDocPtr
9902xmlParseEntity(const char *filename) {
9903 return(xmlSAXParseEntity(NULL, filename));
9904}
9905
9906/**
9907 * xmlCreateEntityParserCtxt:
9908 * @URL: the entity URL
9909 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009910 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009911 *
9912 * Create a parser context for an external entity
9913 * Automatic support for ZLIB/Compress compressed document is provided
9914 * by default if found at compile-time.
9915 *
9916 * Returns the new parser context or NULL
9917 */
9918xmlParserCtxtPtr
9919xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9920 const xmlChar *base) {
9921 xmlParserCtxtPtr ctxt;
9922 xmlParserInputPtr inputStream;
9923 char *directory = NULL;
9924 xmlChar *uri;
9925
9926 ctxt = xmlNewParserCtxt();
9927 if (ctxt == NULL) {
9928 return(NULL);
9929 }
9930
9931 uri = xmlBuildURI(URL, base);
9932
9933 if (uri == NULL) {
9934 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9935 if (inputStream == NULL) {
9936 xmlFreeParserCtxt(ctxt);
9937 return(NULL);
9938 }
9939
9940 inputPush(ctxt, inputStream);
9941
9942 if ((ctxt->directory == NULL) && (directory == NULL))
9943 directory = xmlParserGetDirectory((char *)URL);
9944 if ((ctxt->directory == NULL) && (directory != NULL))
9945 ctxt->directory = directory;
9946 } else {
9947 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9948 if (inputStream == NULL) {
9949 xmlFree(uri);
9950 xmlFreeParserCtxt(ctxt);
9951 return(NULL);
9952 }
9953
9954 inputPush(ctxt, inputStream);
9955
9956 if ((ctxt->directory == NULL) && (directory == NULL))
9957 directory = xmlParserGetDirectory((char *)uri);
9958 if ((ctxt->directory == NULL) && (directory != NULL))
9959 ctxt->directory = directory;
9960 xmlFree(uri);
9961 }
9962
9963 return(ctxt);
9964}
9965
9966/************************************************************************
9967 * *
9968 * Front ends when parsing from a file *
9969 * *
9970 ************************************************************************/
9971
9972/**
9973 * xmlCreateFileParserCtxt:
9974 * @filename: the filename
9975 *
9976 * Create a parser context for a file content.
9977 * Automatic support for ZLIB/Compress compressed document is provided
9978 * by default if found at compile-time.
9979 *
9980 * Returns the new parser context or NULL
9981 */
9982xmlParserCtxtPtr
9983xmlCreateFileParserCtxt(const char *filename)
9984{
9985 xmlParserCtxtPtr ctxt;
9986 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009987 char *directory = NULL;
9988
Owen Taylor3473f882001-02-23 17:55:21 +00009989 ctxt = xmlNewParserCtxt();
9990 if (ctxt == NULL) {
9991 if (xmlDefaultSAXHandler.error != NULL) {
9992 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9993 }
9994 return(NULL);
9995 }
9996
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009997 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009998 if (inputStream == NULL) {
9999 xmlFreeParserCtxt(ctxt);
10000 return(NULL);
10001 }
10002
Owen Taylor3473f882001-02-23 17:55:21 +000010003 inputPush(ctxt, inputStream);
10004 if ((ctxt->directory == NULL) && (directory == NULL))
10005 directory = xmlParserGetDirectory(filename);
10006 if ((ctxt->directory == NULL) && (directory != NULL))
10007 ctxt->directory = directory;
10008
10009 return(ctxt);
10010}
10011
10012/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010013 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010014 * @sax: the SAX handler block
10015 * @filename: the filename
10016 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10017 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010018 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010019 *
10020 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10021 * compressed document is provided by default if found at compile-time.
10022 * It use the given SAX function block to handle the parsing callback.
10023 * If sax is NULL, fallback to the default DOM tree building routines.
10024 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010025 * User data (void *) is stored within the parser context in the
10026 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010027 *
Owen Taylor3473f882001-02-23 17:55:21 +000010028 * Returns the resulting document tree
10029 */
10030
10031xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010032xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10033 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010034 xmlDocPtr ret;
10035 xmlParserCtxtPtr ctxt;
10036 char *directory = NULL;
10037
Daniel Veillard635ef722001-10-29 11:48:19 +000010038 xmlInitParser();
10039
Owen Taylor3473f882001-02-23 17:55:21 +000010040 ctxt = xmlCreateFileParserCtxt(filename);
10041 if (ctxt == NULL) {
10042 return(NULL);
10043 }
10044 if (sax != NULL) {
10045 if (ctxt->sax != NULL)
10046 xmlFree(ctxt->sax);
10047 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010048 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010049 if (data!=NULL) {
10050 ctxt->_private=data;
10051 }
Owen Taylor3473f882001-02-23 17:55:21 +000010052
10053 if ((ctxt->directory == NULL) && (directory == NULL))
10054 directory = xmlParserGetDirectory(filename);
10055 if ((ctxt->directory == NULL) && (directory != NULL))
10056 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10057
10058 xmlParseDocument(ctxt);
10059
10060 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10061 else {
10062 ret = NULL;
10063 xmlFreeDoc(ctxt->myDoc);
10064 ctxt->myDoc = NULL;
10065 }
10066 if (sax != NULL)
10067 ctxt->sax = NULL;
10068 xmlFreeParserCtxt(ctxt);
10069
10070 return(ret);
10071}
10072
10073/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010074 * xmlSAXParseFile:
10075 * @sax: the SAX handler block
10076 * @filename: the filename
10077 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10078 * documents
10079 *
10080 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10081 * compressed document is provided by default if found at compile-time.
10082 * It use the given SAX function block to handle the parsing callback.
10083 * If sax is NULL, fallback to the default DOM tree building routines.
10084 *
10085 * Returns the resulting document tree
10086 */
10087
10088xmlDocPtr
10089xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10090 int recovery) {
10091 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10092}
10093
10094/**
Owen Taylor3473f882001-02-23 17:55:21 +000010095 * xmlRecoverDoc:
10096 * @cur: a pointer to an array of xmlChar
10097 *
10098 * parse an XML in-memory document and build a tree.
10099 * In the case the document is not Well Formed, a tree is built anyway
10100 *
10101 * Returns the resulting document tree
10102 */
10103
10104xmlDocPtr
10105xmlRecoverDoc(xmlChar *cur) {
10106 return(xmlSAXParseDoc(NULL, cur, 1));
10107}
10108
10109/**
10110 * xmlParseFile:
10111 * @filename: the filename
10112 *
10113 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10114 * compressed document is provided by default if found at compile-time.
10115 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010116 * Returns the resulting document tree if the file was wellformed,
10117 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010118 */
10119
10120xmlDocPtr
10121xmlParseFile(const char *filename) {
10122 return(xmlSAXParseFile(NULL, filename, 0));
10123}
10124
10125/**
10126 * xmlRecoverFile:
10127 * @filename: the filename
10128 *
10129 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10130 * compressed document is provided by default if found at compile-time.
10131 * In the case the document is not Well Formed, a tree is built anyway
10132 *
10133 * Returns the resulting document tree
10134 */
10135
10136xmlDocPtr
10137xmlRecoverFile(const char *filename) {
10138 return(xmlSAXParseFile(NULL, filename, 1));
10139}
10140
10141
10142/**
10143 * xmlSetupParserForBuffer:
10144 * @ctxt: an XML parser context
10145 * @buffer: a xmlChar * buffer
10146 * @filename: a file name
10147 *
10148 * Setup the parser context to parse a new buffer; Clears any prior
10149 * contents from the parser context. The buffer parameter must not be
10150 * NULL, but the filename parameter can be
10151 */
10152void
10153xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10154 const char* filename)
10155{
10156 xmlParserInputPtr input;
10157
10158 input = xmlNewInputStream(ctxt);
10159 if (input == NULL) {
10160 perror("malloc");
10161 xmlFree(ctxt);
10162 return;
10163 }
10164
10165 xmlClearParserCtxt(ctxt);
10166 if (filename != NULL)
10167 input->filename = xmlMemStrdup(filename);
10168 input->base = buffer;
10169 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010170 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010171 inputPush(ctxt, input);
10172}
10173
10174/**
10175 * xmlSAXUserParseFile:
10176 * @sax: a SAX handler
10177 * @user_data: The user data returned on SAX callbacks
10178 * @filename: a file name
10179 *
10180 * parse an XML file and call the given SAX handler routines.
10181 * Automatic support for ZLIB/Compress compressed document is provided
10182 *
10183 * Returns 0 in case of success or a error number otherwise
10184 */
10185int
10186xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10187 const char *filename) {
10188 int ret = 0;
10189 xmlParserCtxtPtr ctxt;
10190
10191 ctxt = xmlCreateFileParserCtxt(filename);
10192 if (ctxt == NULL) return -1;
10193 if (ctxt->sax != &xmlDefaultSAXHandler)
10194 xmlFree(ctxt->sax);
10195 ctxt->sax = sax;
10196 if (user_data != NULL)
10197 ctxt->userData = user_data;
10198
10199 xmlParseDocument(ctxt);
10200
10201 if (ctxt->wellFormed)
10202 ret = 0;
10203 else {
10204 if (ctxt->errNo != 0)
10205 ret = ctxt->errNo;
10206 else
10207 ret = -1;
10208 }
10209 if (sax != NULL)
10210 ctxt->sax = NULL;
10211 xmlFreeParserCtxt(ctxt);
10212
10213 return ret;
10214}
10215
10216/************************************************************************
10217 * *
10218 * Front ends when parsing from memory *
10219 * *
10220 ************************************************************************/
10221
10222/**
10223 * xmlCreateMemoryParserCtxt:
10224 * @buffer: a pointer to a char array
10225 * @size: the size of the array
10226 *
10227 * Create a parser context for an XML in-memory document.
10228 *
10229 * Returns the new parser context or NULL
10230 */
10231xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010232xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010233 xmlParserCtxtPtr ctxt;
10234 xmlParserInputPtr input;
10235 xmlParserInputBufferPtr buf;
10236
10237 if (buffer == NULL)
10238 return(NULL);
10239 if (size <= 0)
10240 return(NULL);
10241
10242 ctxt = xmlNewParserCtxt();
10243 if (ctxt == NULL)
10244 return(NULL);
10245
10246 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10247 if (buf == NULL) return(NULL);
10248
10249 input = xmlNewInputStream(ctxt);
10250 if (input == NULL) {
10251 xmlFreeParserCtxt(ctxt);
10252 return(NULL);
10253 }
10254
10255 input->filename = NULL;
10256 input->buf = buf;
10257 input->base = input->buf->buffer->content;
10258 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010259 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010260
10261 inputPush(ctxt, input);
10262 return(ctxt);
10263}
10264
10265/**
10266 * xmlSAXParseMemory:
10267 * @sax: the SAX handler block
10268 * @buffer: an pointer to a char array
10269 * @size: the size of the array
10270 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10271 * documents
10272 *
10273 * parse an XML in-memory block and use the given SAX function block
10274 * to handle the parsing callback. If sax is NULL, fallback to the default
10275 * DOM tree building routines.
10276 *
10277 * Returns the resulting document tree
10278 */
10279xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010280xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10281 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010282 xmlDocPtr ret;
10283 xmlParserCtxtPtr ctxt;
10284
10285 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10286 if (ctxt == NULL) return(NULL);
10287 if (sax != NULL) {
10288 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010289 }
10290
10291 xmlParseDocument(ctxt);
10292
10293 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10294 else {
10295 ret = NULL;
10296 xmlFreeDoc(ctxt->myDoc);
10297 ctxt->myDoc = NULL;
10298 }
10299 if (sax != NULL)
10300 ctxt->sax = NULL;
10301 xmlFreeParserCtxt(ctxt);
10302
10303 return(ret);
10304}
10305
10306/**
10307 * xmlParseMemory:
10308 * @buffer: an pointer to a char array
10309 * @size: the size of the array
10310 *
10311 * parse an XML in-memory block and build a tree.
10312 *
10313 * Returns the resulting document tree
10314 */
10315
Daniel Veillard50822cb2001-07-26 20:05:51 +000010316xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010317 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10318}
10319
10320/**
10321 * xmlRecoverMemory:
10322 * @buffer: an pointer to a char array
10323 * @size: the size of the array
10324 *
10325 * parse an XML in-memory block and build a tree.
10326 * In the case the document is not Well Formed, a tree is built anyway
10327 *
10328 * Returns the resulting document tree
10329 */
10330
Daniel Veillard50822cb2001-07-26 20:05:51 +000010331xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010332 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10333}
10334
10335/**
10336 * xmlSAXUserParseMemory:
10337 * @sax: a SAX handler
10338 * @user_data: The user data returned on SAX callbacks
10339 * @buffer: an in-memory XML document input
10340 * @size: the length of the XML document in bytes
10341 *
10342 * A better SAX parsing routine.
10343 * parse an XML in-memory buffer and call the given SAX handler routines.
10344 *
10345 * Returns 0 in case of success or a error number otherwise
10346 */
10347int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010348 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010349 int ret = 0;
10350 xmlParserCtxtPtr ctxt;
10351 xmlSAXHandlerPtr oldsax = NULL;
10352
10353 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10354 if (ctxt == NULL) return -1;
10355 if (sax != NULL) {
10356 oldsax = ctxt->sax;
10357 ctxt->sax = sax;
10358 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010359 if (user_data != NULL)
10360 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010361
10362 xmlParseDocument(ctxt);
10363
10364 if (ctxt->wellFormed)
10365 ret = 0;
10366 else {
10367 if (ctxt->errNo != 0)
10368 ret = ctxt->errNo;
10369 else
10370 ret = -1;
10371 }
10372 if (sax != NULL) {
10373 ctxt->sax = oldsax;
10374 }
10375 xmlFreeParserCtxt(ctxt);
10376
10377 return ret;
10378}
10379
10380/**
10381 * xmlCreateDocParserCtxt:
10382 * @cur: a pointer to an array of xmlChar
10383 *
10384 * Creates a parser context for an XML in-memory document.
10385 *
10386 * Returns the new parser context or NULL
10387 */
10388xmlParserCtxtPtr
10389xmlCreateDocParserCtxt(xmlChar *cur) {
10390 int len;
10391
10392 if (cur == NULL)
10393 return(NULL);
10394 len = xmlStrlen(cur);
10395 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10396}
10397
10398/**
10399 * xmlSAXParseDoc:
10400 * @sax: the SAX handler block
10401 * @cur: a pointer to an array of xmlChar
10402 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10403 * documents
10404 *
10405 * parse an XML in-memory document and build a tree.
10406 * It use the given SAX function block to handle the parsing callback.
10407 * If sax is NULL, fallback to the default DOM tree building routines.
10408 *
10409 * Returns the resulting document tree
10410 */
10411
10412xmlDocPtr
10413xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10414 xmlDocPtr ret;
10415 xmlParserCtxtPtr ctxt;
10416
10417 if (cur == NULL) return(NULL);
10418
10419
10420 ctxt = xmlCreateDocParserCtxt(cur);
10421 if (ctxt == NULL) return(NULL);
10422 if (sax != NULL) {
10423 ctxt->sax = sax;
10424 ctxt->userData = NULL;
10425 }
10426
10427 xmlParseDocument(ctxt);
10428 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10429 else {
10430 ret = NULL;
10431 xmlFreeDoc(ctxt->myDoc);
10432 ctxt->myDoc = NULL;
10433 }
10434 if (sax != NULL)
10435 ctxt->sax = NULL;
10436 xmlFreeParserCtxt(ctxt);
10437
10438 return(ret);
10439}
10440
10441/**
10442 * xmlParseDoc:
10443 * @cur: a pointer to an array of xmlChar
10444 *
10445 * parse an XML in-memory document and build a tree.
10446 *
10447 * Returns the resulting document tree
10448 */
10449
10450xmlDocPtr
10451xmlParseDoc(xmlChar *cur) {
10452 return(xmlSAXParseDoc(NULL, cur, 0));
10453}
10454
Daniel Veillard8107a222002-01-13 14:10:10 +000010455/************************************************************************
10456 * *
10457 * Specific function to keep track of entities references *
10458 * and used by the XSLT debugger *
10459 * *
10460 ************************************************************************/
10461
10462static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10463
10464/**
10465 * xmlAddEntityReference:
10466 * @ent : A valid entity
10467 * @firstNode : A valid first node for children of entity
10468 * @lastNode : A valid last node of children entity
10469 *
10470 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10471 */
10472static void
10473xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10474 xmlNodePtr lastNode)
10475{
10476 if (xmlEntityRefFunc != NULL) {
10477 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10478 }
10479}
10480
10481
10482/**
10483 * xmlSetEntityReferenceFunc:
10484 * @func : A valid function
10485 *
10486 * Set the function to call call back when a xml reference has been made
10487 */
10488void
10489xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10490{
10491 xmlEntityRefFunc = func;
10492}
Owen Taylor3473f882001-02-23 17:55:21 +000010493
10494/************************************************************************
10495 * *
10496 * Miscellaneous *
10497 * *
10498 ************************************************************************/
10499
10500#ifdef LIBXML_XPATH_ENABLED
10501#include <libxml/xpath.h>
10502#endif
10503
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010504extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010505static int xmlParserInitialized = 0;
10506
10507/**
10508 * xmlInitParser:
10509 *
10510 * Initialization function for the XML parser.
10511 * This is not reentrant. Call once before processing in case of
10512 * use in multithreaded programs.
10513 */
10514
10515void
10516xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010517 if (xmlParserInitialized != 0)
10518 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010519
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010520 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10521 (xmlGenericError == NULL))
10522 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010523 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010524 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010525 xmlInitCharEncodingHandlers();
10526 xmlInitializePredefinedEntities();
10527 xmlDefaultSAXHandlerInit();
10528 xmlRegisterDefaultInputCallbacks();
10529 xmlRegisterDefaultOutputCallbacks();
10530#ifdef LIBXML_HTML_ENABLED
10531 htmlInitAutoClose();
10532 htmlDefaultSAXHandlerInit();
10533#endif
10534#ifdef LIBXML_XPATH_ENABLED
10535 xmlXPathInit();
10536#endif
10537 xmlParserInitialized = 1;
10538}
10539
10540/**
10541 * xmlCleanupParser:
10542 *
10543 * Cleanup function for the XML parser. It tries to reclaim all
10544 * parsing related global memory allocated for the parser processing.
10545 * It doesn't deallocate any document related memory. Calling this
10546 * function should not prevent reusing the parser.
10547 */
10548
10549void
10550xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010551 xmlCleanupCharEncodingHandlers();
10552 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010553#ifdef LIBXML_CATALOG_ENABLED
10554 xmlCatalogCleanup();
10555#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010556 xmlCleanupThreads();
10557 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010558}